From 62ea5d3069f6f4b3f37585f841ba6f2088a15bbc Mon Sep 17 00:00:00 2001 From: Tiemo Bang Date: Mon, 13 May 2024 17:38:33 -0700 Subject: [PATCH] TPCH similar to InkFuse --- Cargo.lock | 1628 ++++++++++++++++++------ analytics_plans.ipynb | 916 +++++++++++++ base/Cargo.toml | 4 +- base/src/lib.rs | 3 +- base/src/tpch/initialize.rs | 22 + base/src/tpch/mod.rs | 5 + base/src/tpch/query_1.rs | 265 ++++ base/src/tpch/query_19.rs | 257 ++++ base/src/tpch/query_4.rs | 247 ++++ base/src/tpch/util.rs | 29 + flow/examples/kmeans_baseline.rs | 23 - flow/examples/query_4.rs | 40 + flow/examples/query_4_distributed.rs | 77 ++ flow/src/bin/first_ten_distributed.rs | 4 +- flow/src/bin/query_4.rs | 18 + flow/src/bin/query_4_distributed.rs | 16 + flow/src/first_ten.rs | 8 +- flow/src/first_ten_distributed.rs | 10 +- flow/src/lib.rs | 11 + flow/src/tpch/mod.rs | 2 + flow/src/tpch/query_4.rs | 109 ++ flow/src/tpch/query_4_distributed.rs | 210 +++ flow_macro/Cargo.toml | 5 +- flow_macro/src/lib.rs | 1 - hydro_local_benchmarks/Cargo.toml | 4 + hydro_local_benchmarks/benches/tpch.rs | 111 ++ hydroflow_base/src/lib.rs | 3 +- hydroflow_base/src/tpch/mod.rs | 3 + hydroflow_base/src/tpch/query_1.rs | 98 ++ hydroflow_base/src/tpch/query_19.rs | 97 ++ hydroflow_base/src/tpch/query_4.rs | 84 ++ requirements.txt | 5 + 32 files changed, 3922 insertions(+), 393 deletions(-) create mode 100644 analytics_plans.ipynb create mode 100644 base/src/tpch/initialize.rs create mode 100644 base/src/tpch/mod.rs create mode 100644 base/src/tpch/query_1.rs create mode 100644 base/src/tpch/query_19.rs create mode 100644 base/src/tpch/query_4.rs create mode 100644 base/src/tpch/util.rs delete mode 100644 flow/examples/kmeans_baseline.rs create mode 100644 flow/examples/query_4.rs create mode 100644 flow/examples/query_4_distributed.rs create mode 100644 flow/src/bin/query_4.rs create mode 100644 flow/src/bin/query_4_distributed.rs create mode 100644 flow/src/tpch/mod.rs create mode 100644 flow/src/tpch/query_4.rs create mode 100644 flow/src/tpch/query_4_distributed.rs delete mode 100644 flow_macro/src/lib.rs create mode 100644 hydro_local_benchmarks/benches/tpch.rs create mode 100644 hydroflow_base/src/tpch/mod.rs create mode 100644 hydroflow_base/src/tpch/query_1.rs create mode 100644 hydroflow_base/src/tpch/query_19.rs create mode 100644 hydroflow_base/src/tpch/query_4.rs create mode 100644 requirements.txt diff --git a/Cargo.lock b/Cargo.lock index 178d0fa..d5e6f51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,31 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -26,6 +51,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -58,47 +89,48 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.13" +version = "0.6.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", + "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" [[package]] name = "anstyle-parse" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +checksum = "a64c907d4e79225ac72e2a354c9ce84d50ebb4586dee56c82b3ee73004f537f5" dependencies = [ "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.2" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" dependencies = [ "anstyle", "windows-sys 0.52.0", @@ -106,13 +138,182 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" dependencies = [ "backtrace", ] +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "arrow" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219d05930b81663fd3b32e3bde8ce5bff3c4d23052a99f11a8fa50a3b47b2658" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0272150200c07a86a390be651abdd320a2d12e84535f0837566ca87ecd8f95e0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8010572cf8c745e242d1b632bd97bd6d4f40fefed5ed1290a8f433abaa686fea" +dependencies = [ + "ahash 0.8.11", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.5", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d0a2432f0cba5692bf4cb757469c66791394bac9ec7ce63c1afe74744c37b27" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9abc10cd7995e83505cc290df9384d6e5412b207b79ce6bdff89a10505ed2cba" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-data" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2742ac1f6650696ab08c88f6dd3f0eb68ce10f8c253958a18c943a68cd04aec5" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ord" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3e6b61e3dc468f503181dccc2fc705bdcc5f2f146755fa5b56d0a6c5943f412" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "848ee52bb92eb459b811fb471175ea3afcf620157674c8794f539838920f9228" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", + "hashbrown 0.14.5", +] + +[[package]] +name = "arrow-schema" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d9483aaabe910c4781153ae1b6ae0393f72d9ef757d38d09d450070cf2e528" +dependencies = [ + "bitflags 2.5.0", +] + +[[package]] +name = "arrow-select" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "849524fa70e0e3c5ab58394c770cb8f514d0122d20de08475f7b472ed8075830" +dependencies = [ + "ahash 0.8.11", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "51.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9373cb5a021aee58863498c37eb484998ef13377f69989c6c5ccfbd258236cdb" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] + [[package]] name = "ascii" version = "1.1.0" @@ -132,13 +333,13 @@ dependencies = [ [[package]] name = "async-channel" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28243a43d821d11341ab73c80bed182dc015c514b951616cf79bd4af39af0c3" +checksum = "9f2776ead772134d55b62dd45e59a79e21612d85d0af729b8b7d3967d601a62a" dependencies = [ "concurrent-queue", - "event-listener 5.2.0", - "event-listener-strategy 0.5.0", + "event-listener 5.3.0", + "event-listener-strategy 0.5.2", "futures-core", "pin-project-lite", ] @@ -175,8 +376,8 @@ dependencies = [ "futures-io", "futures-lite 2.3.0", "parking", - "polling 3.5.0", - "rustix 0.38.32", + "polling 3.7.0", + "rustix 0.38.34", "slab", "tracing", "windows-sys 0.52.0", @@ -221,37 +422,37 @@ dependencies = [ "cfg-if", "event-listener 3.1.0", "futures-lite 1.13.0", - "rustix 0.38.32", + "rustix 0.38.34", "windows-sys 0.48.0", ] [[package]] name = "async-recursion" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30c5ef0ede93efbf733c1a727f3b6b5a1060bbedd5600183e66f6e4be4af0ec5" +checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "async-signal" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e47d90f65a225c4527103a8d747001fc56e375203592b25ad103e1ca13124c5" +checksum = "afe66191c335039c7bb78f99dc7520b0cbb166b3a1cb33a03f53d8a1c6f2afda" dependencies = [ "async-io 2.3.2", - "async-lock 2.8.0", + "async-lock 3.3.0", "atomic-waker", "cfg-if", "futures-core", "futures-io", - "rustix 0.38.32", + "rustix 0.38.34", "signal-hook-registry", "slab", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -269,19 +470,28 @@ dependencies = [ [[package]] name = "async-task" -version = "4.7.0" +version = "4.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbb36e985947064623dbd357f727af08ffd077f93d696782f3c56365fa2e2799" +checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" [[package]] name = "async-trait" -version = "0.1.78" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "461abc97219de0eaaf81fe3ef974a540158f3d079c2ab200f891f1a2ef201e85" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", +] + +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", ] [[package]] @@ -309,20 +519,20 @@ checksum = "3c87f3f15e7794432337fc718554eaa4dc8f04c9677a950ffe366f20a162ae42" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "autocfg" -version = "1.1.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.69" +version = "0.3.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" dependencies = [ "addr2line", "cc", @@ -337,9 +547,17 @@ dependencies = [ name = "base" version = "0.0.0" dependencies = [ + "chrono", + "duckdb", "itertools 0.12.1", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bincode" version = "1.3.3" @@ -361,6 +579,18 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -370,27 +600,80 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43ff7d91d3c1d568065b06c899777d1e48dcf76103a672a0adbc238a7f247f1e" +dependencies = [ + "objc2", +] + [[package]] name = "blocking" -version = "1.5.1" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a37913e8dc4ddcc604f0c6d3bf2887c995153af3611de9e23c352b44c1b9118" +checksum = "495f7104e962b7356f0aeb34247aca1fe7d2e783b346582db7f2904cb5717e88" dependencies = [ - "async-channel 2.2.0", + "async-channel 2.3.0", "async-lock 3.3.0", "async-task", - "fastrand 2.0.1", "futures-io", "futures-lite 2.3.0", "piper", - "tracing", +] + +[[package]] +name = "borsh" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbe5b10e214954177fb1dc9fbd20a1a2608fe99e6c832033bdc7cea287a20d77" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7a8646f94ab393e43e8b35a2558b1624bed28b97ee09c5d15456e3c9463f46d" +dependencies = [ + "once_cell", + "proc-macro-crate 3.1.0", + "proc-macro2", + "quote", + "syn 2.0.63", + "syn_derive", ] [[package]] name = "bumpalo" -version = "3.15.4" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] [[package]] name = "byteorder" @@ -400,9 +683,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "c2rust-bitfields" @@ -435,9 +718,9 @@ dependencies = [ [[package]] name = "cargo-platform" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "694c8807f2ae16faecc43dc17d74b3eb042482789fd0eb64b39a2e04e087053f" +checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" dependencies = [ "serde", ] @@ -464,9 +747,14 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.90" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" +checksum = "099a5357d84c4c61eb35fc8eafa9a79a902c2f76911e5747ced4e032edd8d9b4" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] [[package]] name = "cc-traits" @@ -486,11 +774,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "chrono" -version = "0.4.35" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eaf5903dcbc0a39312feb77df2ff4c76387d591b9fc7b04a238dcf8bb62639a" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ "android-tzdata", "iana-time-zone", @@ -498,7 +792,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -534,21 +828,6 @@ dependencies = [ "half", ] -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "ansi_term", - "atty", - "bitflags 1.3.2", - "strsim 0.8.0", - "textwrap 0.11.0", - "unicode-width", - "vec_map", -] - [[package]] name = "clap" version = "3.2.25" @@ -558,7 +837,7 @@ dependencies = [ "bitflags 1.3.2", "clap_lex 0.2.4", "indexmap 1.9.3", - "textwrap 0.16.1", + "textwrap", ] [[package]] @@ -580,7 +859,7 @@ dependencies = [ "anstream", "anstyle", "clap_lex 0.7.0", - "strsim 0.11.0", + "strsim", ] [[package]] @@ -592,7 +871,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] @@ -612,25 +891,36 @@ checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" [[package]] name = "colorchoice" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" [[package]] name = "combine" -version = "4.6.6" +version = "4.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" dependencies = [ "bytes", "memchr", ] +[[package]] +name = "comfy-table" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +dependencies = [ + "strum 0.26.2", + "strum_macros 0.26.2", + "unicode-width", +] + [[package]] name = "concurrent-queue" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16048cd947b08fa32c24458a22f5dc5e835264f689f4f5653210c69fd107363" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" dependencies = [ "crossbeam-utils", ] @@ -648,6 +938,26 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -673,6 +983,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] + [[package]] name = "criterion" version = "0.4.0" @@ -750,11 +1069,21 @@ dependencies = [ "typenum", ] +[[package]] +name = "ctrlc" +version = "3.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "672465ae37dc1bc6380a6547a8883d5dd397b0f1faaad4f265726cc7042a5345" +dependencies = [ + "nix 0.28.0", + "windows-sys 0.52.0", +] + [[package]] name = "data-encoding" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5" +checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" [[package]] name = "difference" @@ -774,22 +1103,41 @@ dependencies = [ [[package]] name = "dirs" -version = "3.0.2" +version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" dependencies = [ "dirs-sys", ] [[package]] name = "dirs-sys" -version = "0.3.7" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ "libc", + "option-ext", "redox_users", - "winapi", + "windows-sys 0.48.0", +] + +[[package]] +name = "duckdb" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "424ede399a5d1084e65c0888fda71e407e5809400c92ff2cf510bfd1697b9c76" +dependencies = [ + "arrow", + "cast", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libduckdb-sys", + "memchr", + "rust_decimal", + "smallvec", + "strum 0.25.0", ] [[package]] @@ -806,9 +1154,9 @@ checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" [[package]] name = "either" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" [[package]] name = "encode_unicode" @@ -824,9 +1172,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" dependencies = [ "libc", "windows-sys 0.52.0", @@ -862,9 +1210,9 @@ dependencies = [ [[package]] name = "event-listener" -version = "5.2.0" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b5fb89194fa3cad959b833185b3063ba881dbfc7030680b314250779fb4cc91" +checksum = "6d9944b8ca13534cdfb2800775f8dd4902ff3fc75a50101466decadfdf322a24" dependencies = [ "concurrent-queue", "parking", @@ -883,14 +1231,26 @@ dependencies = [ [[package]] name = "event-listener-strategy" -version = "0.5.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "feedafcaa9b749175d5ac357452a9d41ea2911da598fde46ce1fe02c37751291" +checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" dependencies = [ - "event-listener 5.2.0", + "event-listener 5.3.0", "pin-project-lite", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "1.9.0" @@ -902,9 +1262,31 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.1" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + +[[package]] +name = "filetime" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.4.1", + "windows-sys 0.52.0", +] + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] [[package]] name = "flow" @@ -931,6 +1313,8 @@ dependencies = [ name = "flow_macro" version = "0.0.0" dependencies = [ + "base", + "chrono", "hydroflow_plus", "hydroflow_plus_cli_integration", "stageleft", @@ -947,6 +1331,22 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs4" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73969b81e8bc90a3828d913dd3973d80771bfb9d7fbe1a78a79122aad456af15" +dependencies = [ + "rustix 0.38.34", + "windows-sys 0.52.0", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.30" @@ -1028,7 +1428,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] @@ -1073,9 +1473,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.12" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "js-sys", @@ -1098,12 +1498,13 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "half" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", + "num-traits", ] [[package]] @@ -1111,12 +1512,28 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] [[package]] name = "hashbrown" -version = "0.14.3" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.11", + "allocator-api2", +] + +[[package]] +name = "hashlink" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" +dependencies = [ + "hashbrown 0.14.5", +] [[package]] name = "heck" @@ -1177,8 +1594,8 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hydro_deploy" -version = "0.6.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.6.1" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "anyhow", "async-channel 1.9.0", @@ -1196,7 +1613,7 @@ dependencies = [ "hydroflow_cli_integration", "indicatif", "nanoid", - "nix", + "nix 0.26.4", "once_cell", "serde", "serde_json", @@ -1218,8 +1635,8 @@ dependencies = [ [[package]] name = "hydroflow" -version = "0.6.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.6.2" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "bincode", "byteorder", @@ -1263,8 +1680,8 @@ dependencies = [ [[package]] name = "hydroflow_cli_integration" -version = "0.5.1" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.5.2" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "async-recursion", "async-trait", @@ -1274,40 +1691,41 @@ dependencies = [ "serde", "tempfile", "tokio", + "tokio-stream", "tokio-util", ] [[package]] name = "hydroflow_datalog" version = "0.6.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "hydroflow_datalog_core", - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "hydroflow_datalog_core" -version = "0.6.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.6.1" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "hydroflow_lang", - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", "rust-sitter", "rust-sitter-tool", "slotmap", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "hydroflow_lang" -version = "0.6.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.6.2" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "auto_impl", "clap 4.5.4", @@ -1320,44 +1738,45 @@ dependencies = [ "serde", "serde_json", "slotmap", - "syn 2.0.53", - "webbrowser", + "syn 2.0.63", + "webbrowser 0.8.15", ] [[package]] name = "hydroflow_macro" version = "0.6.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "hydroflow_lang", "itertools 0.10.5", - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "hydroflow_plus" -version = "0.6.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.6.1" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "bincode", + "dyn-clone", "hydroflow", "hydroflow_lang", - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", "serde", "stageleft", "stageleft_tool", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "hydroflow_plus_cli_integration" -version = "0.6.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.6.1" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "async-channel 1.9.0", "hydro_deploy", @@ -1365,7 +1784,7 @@ dependencies = [ "serde", "stageleft", "stageleft_tool", - "syn 2.0.53", + "syn 2.0.63", "tokio", ] @@ -1414,12 +1833,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.5" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", - "hashbrown 0.14.3", + "hashbrown 0.14.5", ] [[package]] @@ -1435,6 +1854,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + [[package]] name = "instant" version = "0.1.12" @@ -1458,6 +1883,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + [[package]] name = "itertools" version = "0.10.5" @@ -1478,9 +1909,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jni" @@ -1504,6 +1935,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.69" @@ -1515,8 +1955,8 @@ dependencies = [ [[package]] name = "lattices" -version = "0.5.3" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.5.4" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "cc-traits", "sealed", @@ -1529,31 +1969,116 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" -version = "0.2.153" +version = "0.2.154" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" + +[[package]] +name = "libduckdb-sys" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b3f02cecc430f61561bde538d42af4be2d9d5a8b058f74883e460bc1055461" +dependencies = [ + "autocfg", + "cc", + "flate2", + "pkg-config", + "serde", + "serde_json", + "tar", + "vcpkg", +] [[package]] name = "libloading" -version = "0.7.4" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "winapi", + "windows-targets 0.52.5", ] +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "libredox" -version = "0.0.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.5.0", "libc", - "redox_syscall 0.4.1", ] [[package]] @@ -1596,9 +2121,9 @@ checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" dependencies = [ "autocfg", "scopeguard", @@ -1621,9 +2146,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" [[package]] name = "memoffset" @@ -1682,13 +2207,90 @@ dependencies = [ "pin-utils", ] +[[package]] +name = "nix" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +dependencies = [ + "bitflags 2.5.0", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c165a9ab64cf766f73521c0dd2cfdff64f488b8f0b3e621face3462d3db536d7" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1705,15 +2307,47 @@ dependencies = [ name = "number_prefix" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + +[[package]] +name = "objc-sys" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da284c198fb9b7b0603f8635185e85fbd5b64ee154b1ed406d489077de2d6d60" + +[[package]] +name = "objc2" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4b25e1034d0e636cd84707ccdaa9f81243d399196b8a773946dcffec0401659" +dependencies = [ + "objc-sys", + "objc2-encode", +] + +[[package]] +name = "objc2-encode" +version = "4.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88658da63e4cc2c8adb1262902cd6af51094df0488b760d6fd27194269c0950a" [[package]] -name = "objc" -version = "0.2.7" +name = "objc2-foundation" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +checksum = "cfaefe14254871ea16c7d88968c0ff14ba554712a20d76421eec52f0a7fb8904" dependencies = [ - "malloc_buf", + "block2", + "objc2", ] [[package]] @@ -1748,9 +2382,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.101" +version = "0.9.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dda2b0f344e78efc2facf7d195d098df0dd72151b26ab98da807afc26c198dff" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" dependencies = [ "cc", "libc", @@ -1759,6 +2393,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "os_str_bytes" version = "6.6.1" @@ -1784,12 +2424,12 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" dependencies = [ "lock_api", - "parking_lot_core 0.9.9", + "parking_lot_core 0.9.10", ] [[package]] @@ -1808,15 +2448,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", + "redox_syscall 0.5.1", "smallvec", - "windows-targets 0.48.5", + "windows-targets 0.52.5", ] [[package]] @@ -1842,14 +2482,14 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -1859,12 +2499,12 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "piper" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "668d31b1c4eba19242f2088b2bf3316b82ca31082a8335764db4e083db7485d4" +checksum = "464db0c665917b13ebb5d453ccdec4add5658ee1adc7affc7677615356a8afaf" dependencies = [ "atomic-waker", - "fastrand 2.0.1", + "fastrand 2.1.0", "futures-io", ] @@ -1920,14 +2560,15 @@ dependencies = [ [[package]] name = "polling" -version = "3.5.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24f040dee2588b4963afb4e420540439d126f73fdacf4a9c486a96d840bac3c9" +checksum = "645493cf344456ef24219d02a768cf1fb92ddf8c92161679ae3d91b91a637be3" dependencies = [ "cfg-if", "concurrent-queue", + "hermit-abi 0.3.9", "pin-project-lite", - "rustix 0.38.32", + "rustix 0.38.34", "tracing", "windows-sys 0.52.0", ] @@ -1946,12 +2587,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.16" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] @@ -1961,22 +2602,74 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" dependencies = [ "once_cell", - "toml_edit", + "toml_edit 0.19.15", +] + +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit 0.21.1", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", ] [[package]] name = "proc-macro2" -version = "1.0.79" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +checksum = "8ad3d49ab951a01fbaafe34f2ec74122942fe18a3f9814c3268f1bb72042131b" dependencies = [ "unicode-ident", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "pusherator" version = "0.0.5" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "either", "variadics", @@ -1984,13 +2677,19 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -2029,9 +2728,9 @@ checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9" [[package]] name = "rayon" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", @@ -2065,11 +2764,20 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +dependencies = [ + "bitflags 2.5.0", +] + [[package]] name = "redox_users" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" +checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" dependencies = [ "getrandom", "libredox", @@ -2078,34 +2786,34 @@ dependencies = [ [[package]] name = "ref-cast" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4846d4c50d1721b1a3bef8af76924eef20d5e723647333798c1b519b3a9473f" +checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" +checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "regex" -version = "1.10.3" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", "regex-automata", - "regex-syntax 0.8.2", + "regex-syntax", ] [[package]] @@ -2116,26 +2824,58 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.2", + "regex-syntax", ] [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] -name = "regex-syntax" -version = "0.8.2" +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.7.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cba464629b3394fc4dbc6f940ff8f5b4ff5c7aef40f29166fd4ad12acbc99c0" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +checksum = "a7dddfff8de25e6f62b9d64e6e432bf1c6736c57d20323e15ee10435fbda7c65" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] [[package]] name = "rust-sitter" -version = "0.3.4" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a0f365b4eb9591dd3e685791389a932041b0dc6ccf5db1ec3d8913f67279365" +checksum = "f69b9a5d53b74db5166799a0024c2849e144c652dd6253c5bf58dfe086798cbc" dependencies = [ "rust-sitter-macro", "tree-sitter-c2rust", @@ -2143,9 +2883,9 @@ dependencies = [ [[package]] name = "rust-sitter-common" -version = "0.3.4" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c0a0b1da7317031274502b7c52cbb7cf529e7d1e1f3e23876519372b173a94" +checksum = "b559ebfd4114d398a36dfe25d7221bf84839fc3ef1309a6b7f4d1eece78dc690" dependencies = [ "quote", "syn 1.0.109", @@ -2153,9 +2893,9 @@ dependencies = [ [[package]] name = "rust-sitter-macro" -version = "0.3.4" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25e213e40efa00713547cc0f3529694aca547cfceb0839bbc9406632e14d410" +checksum = "8238447de92f7104ddbda8b5fd38a9be055229373283ef42b774b340d8117def" dependencies = [ "proc-macro2", "quote", @@ -2165,9 +2905,9 @@ dependencies = [ [[package]] name = "rust-sitter-tool" -version = "0.3.4" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "803c6596476a188a4dd18106eb927a926a202e00077cdaa5648dd620262af158" +checksum = "b840052f42d08fb67d13f68b72f1c41f99865d83239f4edff8fa1c6fd6fa0a12" dependencies = [ "cc", "rust-sitter-common", @@ -2180,11 +2920,27 @@ dependencies = [ "tree-sitter-cli", ] +[[package]] +name = "rust_decimal" +version = "1.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1790d1c4c0ca81211399e0e0af16333276f375209e71a37b67698a373db5b47a" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand", + "rkyv", + "serde", + "serde_json", +] + [[package]] name = "rustc-demangle" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" @@ -2208,9 +2964,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.32" +version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ "bitflags 2.5.0", "errno", @@ -2219,11 +2975,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustversion" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092474d1a01ea8278f69e6a358998405fae5b8b963ddaeb2b0b04a128bf1dfb0" + [[package]] name = "ryu" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "same-file" @@ -2240,6 +3002,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "sealed" version = "0.5.0" @@ -2249,45 +3017,45 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "semver" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" dependencies = [ "serde", ] [[package]] name = "serde" -version = "1.0.197" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ - "indexmap 2.2.5", + "indexmap 2.2.6", "itoa", "ryu", "serde", @@ -2325,13 +3093,19 @@ checksum = "45bb67a18fa91266cc7807181f62f9178a6873bfad7dc788c42e6430db40184f" [[package]] name = "signal-hook-registry" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "slab" version = "0.4.9" @@ -2375,9 +3149,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.6" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" +checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" dependencies = [ "libc", "windows-sys 0.52.0", @@ -2397,52 +3171,93 @@ dependencies = [ [[package]] name = "stageleft" -version = "0.2.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.2.1" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", "stageleft_macro", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "stageleft_macro" -version = "0.1.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.1.1" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "lazy_static", - "proc-macro-crate", + "proc-macro-crate 1.3.1", "proc-macro2", "quote", "sha256", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] name = "stageleft_tool" -version = "0.1.0" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +version = "0.1.1" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "proc-macro2", "quote", "sha256", - "syn 2.0.53", + "syn 2.0.63", "syn-inline-mod 0.6.0", ] [[package]] -name = "strsim" -version = "0.8.0" +name = "static_assertions" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strsim" -version = "0.11.0" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.3", +] + +[[package]] +name = "strum" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" + +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.63", +] + +[[package]] +name = "strum_macros" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.63", +] [[package]] name = "syn" @@ -2457,9 +3272,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.53" +version = "2.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" +checksum = "bf5be731623ca1a1fb7d8be6f261a3be6d3e2337b8a1f97be944d020c8fcb704" dependencies = [ "proc-macro2", "quote", @@ -2483,28 +3298,48 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fa6dca1fdb7b2ed46dd534a326725419d4fb10f23d8c85a8b2860e5eb25d0f9" dependencies = [ "proc-macro2", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] -name = "tempfile" -version = "3.10.1" +name = "syn_derive" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" dependencies = [ - "cfg-if", - "fastrand 2.0.1", - "rustix 0.38.32", - "windows-sys 0.52.0", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.63", ] [[package]] -name = "textwrap" -version = "0.11.0" +name = "tap" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tar" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" dependencies = [ - "unicode-width", + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +dependencies = [ + "cfg-if", + "fastrand 2.1.0", + "rustix 0.38.34", + "windows-sys 0.52.0", ] [[package]] @@ -2515,22 +3350,31 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", ] [[package]] @@ -2572,19 +3416,19 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.36.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", "libc", "mio", "num_cpus", - "parking_lot 0.12.1", + "parking_lot 0.12.2", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.6", + "socket2 0.5.7", "tokio-macros", "windows-sys 0.48.0", ] @@ -2597,7 +3441,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] @@ -2614,9 +3458,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" dependencies = [ "bytes", "futures-core", @@ -2624,16 +3468,6 @@ dependencies = [ "futures-sink", "pin-project-lite", "tokio", - "tracing", -] - -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", ] [[package]] @@ -2648,7 +3482,18 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.2.5", + "indexmap 2.2.6", + "toml_datetime", + "winnow", +] + +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap 2.2.6", "toml_datetime", "winnow", ] @@ -2672,7 +3517,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", ] [[package]] @@ -2686,9 +3531,9 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.20.10" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e747b1f9b7b931ed39a548c1fae149101497de3c1fc8d9e18c62c1a66c683d3d" +checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" dependencies = [ "cc", "regex", @@ -2696,9 +3541,9 @@ dependencies = [ [[package]] name = "tree-sitter-c2rust" -version = "0.20.10" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee40a4d9cf5a30c199935f346887588239daceae4d1418d81b789276fffb8d91" +checksum = "7b6691846f063193072c546a0c1722d3ccce95ecf8adbb31ab371553e250629a" dependencies = [ "c2rust-bitfields", "once_cell", @@ -2707,45 +3552,50 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.20.8" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae7e9d844d4d38e511a7b93fe8ced79f2a364c32fdea10d04546f1c8317d5a0c" +checksum = "f7437ac48e37e5014007527ed9281c00c333c9ad0731e1c8489c0eff667b99d5" dependencies = [ "ansi_term", + "anstyle", "anyhow", - "atty", - "clap 2.34.0", + "clap 4.5.4", + "ctrlc", "difference", "dirs", + "filetime", "glob", + "heck 0.5.0", "html-escape", - "indexmap 1.9.3", + "indexmap 2.2.6", + "indoc", "lazy_static", "log", + "memchr", "regex", - "regex-syntax 0.6.29", + "regex-syntax", "rustc-hash", "semver", "serde", + "serde_derive", "serde_json", "smallbitvec", "tiny_http", - "toml", "tree-sitter", "tree-sitter-config", "tree-sitter-highlight", "tree-sitter-loader", "tree-sitter-tags", "walkdir", - "webbrowser", - "which", + "wasmparser", + "webbrowser 1.0.1", ] [[package]] name = "tree-sitter-config" -version = "0.19.0" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5fec4cb27f052ead2246631b332dba0cb6af9a54ce012badee59c4b0ded5e03" +checksum = "5d64b4608a1d822f56e3afcecabfa4915a768ea92bc44abad1ae32cd4c607ebd" dependencies = [ "anyhow", "dirs", @@ -2755,10 +3605,11 @@ dependencies = [ [[package]] name = "tree-sitter-highlight" -version = "0.20.1" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "042342584c5a7a0b833d9fc4e2bdab3f9868ddc6c4b339a1e01451c6720868bc" +checksum = "eaca0fe34fa96eec6aaa8e63308dbe1bafe65a6317487c287f93938959b21907" dependencies = [ + "lazy_static", "regex", "thiserror", "tree-sitter", @@ -2766,18 +3617,21 @@ dependencies = [ [[package]] name = "tree-sitter-loader" -version = "0.20.0" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0b17eef4833c7c139abed66d562dfa23228e97e647597baf246fd56c21bbfaf" +checksum = "73c9b13749644fbe22ec25c79861dc1e637ef4ab9e469fd820fcb30b10091293" dependencies = [ "anyhow", "cc", "dirs", + "fs4", + "indoc", "libloading", "once_cell", "regex", "serde", "serde_json", + "tempfile", "tree-sitter", "tree-sitter-highlight", "tree-sitter-tags", @@ -2785,9 +3639,9 @@ dependencies = [ [[package]] name = "tree-sitter-tags" -version = "0.20.2" +version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccb3f1376219530a37a809751ecf65aa35fd8b9c1c4ab6d4faf5f6a9eeda2c05" +checksum = "34380416097ab36d1b4cd83f887d9e150ea4feaeb6ee9a5ecfe53d26839acc69" dependencies = [ "memchr", "regex", @@ -2824,9 +3678,9 @@ dependencies = [ [[package]] name = "unicode-width" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" [[package]] name = "url" @@ -2851,10 +3705,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "uuid" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" + [[package]] name = "variadics" version = "0.0.4" -source = "git+https://github.com/hydro-project/hydroflow#70d420a870a7977a444236d3912da406448cdc1b" +source = "git+https://github.com/hydro-project/hydroflow#c2f6c9578127a71c879752d52e115df75659e2b0" dependencies = [ "sealed", ] @@ -2865,12 +3725,6 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" version = "0.9.4" @@ -2879,9 +3733,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "waker-fn" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c4517f54858c779bbcbf228f4fca63d121bf85fbecb2dc578cdf4a39395690" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" [[package]] name = "walkdir" @@ -2920,7 +3774,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", "wasm-bindgen-shared", ] @@ -2942,7 +3796,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.63", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2953,6 +3807,19 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +[[package]] +name = "wasmparser" +version = "0.206.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39192edb55d55b41963db40fd49b0b542156f04447b5b512744a91d38567bdbc" +dependencies = [ + "ahash 0.8.11", + "bitflags 2.5.0", + "hashbrown 0.14.5", + "indexmap 2.2.6", + "semver", +] + [[package]] name = "web-sys" version = "0.3.69" @@ -2965,9 +3832,9 @@ dependencies = [ [[package]] name = "webbrowser" -version = "0.8.13" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1b04c569c83a9bb971dd47ec6fd48753315f4bf989b9b04a2e7ca4d7f0dc950" +checksum = "db67ae75a9405634f5882791678772c94ff5f16a66535aae186e26aa0841fc8b" dependencies = [ "core-foundation", "home", @@ -2981,15 +3848,21 @@ dependencies = [ ] [[package]] -name = "which" -version = "4.4.2" +name = "webbrowser" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +checksum = "425ba64c1e13b1c6e8c5d2541c8fac10022ca584f33da781db01b5756aef1f4e" dependencies = [ - "either", + "block2", + "core-foundation", "home", - "once_cell", - "rustix 0.38.32", + "jni", + "log", + "ndk-context", + "objc2", + "objc2-foundation", + "url", + "web-sys", ] [[package]] @@ -3010,11 +3883,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys 0.52.0", ] [[package]] @@ -3029,7 +3902,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -3056,7 +3929,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -3091,17 +3964,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", ] [[package]] @@ -3118,9 +3992,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" [[package]] name = "windows_aarch64_msvc" @@ -3136,9 +4010,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" @@ -3154,9 +4028,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.4" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" @@ -3172,9 +4052,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" @@ -3190,9 +4070,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" [[package]] name = "windows_x86_64_gnullvm" @@ -3208,9 +4088,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" @@ -3226,9 +4106,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "winnow" @@ -3238,3 +4118,43 @@ checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" dependencies = [ "memchr", ] + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys 0.4.13", + "rustix 0.38.34", +] + +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.63", +] diff --git a/analytics_plans.ipynb b/analytics_plans.ipynb new file mode 100644 index 0000000..18de141 --- /dev/null +++ b/analytics_plans.ipynb @@ -0,0 +1,916 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import duckdb\n", + "import pandas as pd\n", + "\n", + "%load_ext sql\n", + "# Connect to an in-memory database\n", + "conn = duckdb.connect()\n", + "%sql conn --alias duckdb" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Running query in 'duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Running query in 'duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Success
" + ], + "text/plain": [ + "+---------+\n", + "| Success |\n", + "+---------+\n", + "+---------+" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Initialize TPCH extension (it should come with the duckdb installation)\n", + "%sql CREATE OR REPLACE SCHEMA SF_1;\n", + "%sql USE SF_1;\n", + "%sql CALL dbgen(sf =1)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
name
customer
lineitem
nation
orders
part
partsupp
region
supplier
" + ], + "text/plain": [ + "+----------+\n", + "| name |\n", + "+----------+\n", + "| customer |\n", + "| lineitem |\n", + "| nation |\n", + "| orders |\n", + "| part |\n", + "| partsupp |\n", + "| region |\n", + "| supplier |\n", + "+----------+" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%sql show tables;" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT\n", + " sum(l_extendedprice * (1 - l_discount)) AS revenue\n", + "FROM\n", + " lineitem,\n", + " part\n", + "WHERE (p_partkey = l_partkey\n", + " AND p_brand = 'Brand#12'\n", + " AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')\n", + " AND l_quantity >= 1\n", + " AND l_quantity <= 1 + 10\n", + " AND p_size BETWEEN 1 AND 5\n", + " AND l_shipmode IN ('AIR', 'AIR REG')\n", + " AND l_shipinstruct = 'DELIVER IN PERSON')\n", + " OR (p_partkey = l_partkey\n", + " AND p_brand = 'Brand#23'\n", + " AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')\n", + " AND l_quantity >= 10\n", + " AND l_quantity <= 10 + 10\n", + " AND p_size BETWEEN 1 AND 10\n", + " AND l_shipmode IN ('AIR', 'AIR REG')\n", + " AND l_shipinstruct = 'DELIVER IN PERSON')\n", + " OR (p_partkey = l_partkey\n", + " AND p_brand = 'Brand#34'\n", + " AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')\n", + " AND l_quantity >= 20\n", + " AND l_quantity <= 20 + 10\n", + " AND p_size BETWEEN 1 AND 15\n", + " AND l_shipmode IN ('AIR', 'AIR REG')\n", + " AND l_shipinstruct = 'DELIVER IN PERSON');\n", + "\n" + ] + } + ], + "source": [ + "#%sql FROM tpch_queries();\n", + "res = conn.sql('FROM tpch_queries()').df()\n", + "query = res.iloc[18]['query']\n", + "print(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
column_namecolumn_typenullkeydefaultextra
l_orderkeyBIGINTNONoneNoneNone
l_partkeyBIGINTNONoneNoneNone
l_suppkeyBIGINTNONoneNoneNone
l_linenumberBIGINTNONoneNoneNone
l_quantityDECIMAL(15,2)NONoneNoneNone
l_extendedpriceDECIMAL(15,2)NONoneNoneNone
l_discountDECIMAL(15,2)NONoneNoneNone
l_taxDECIMAL(15,2)NONoneNoneNone
l_returnflagVARCHARNONoneNoneNone
l_linestatusVARCHARNONoneNoneNone
l_shipdateDATENONoneNoneNone
l_commitdateDATENONoneNoneNone
l_receiptdateDATENONoneNoneNone
l_shipinstructVARCHARNONoneNoneNone
l_shipmodeVARCHARNONoneNoneNone
l_commentVARCHARNONoneNoneNone
" + ], + "text/plain": [ + "+-----------------+---------------+------+------+---------+-------+\n", + "| column_name | column_type | null | key | default | extra |\n", + "+-----------------+---------------+------+------+---------+-------+\n", + "| l_orderkey | BIGINT | NO | None | None | None |\n", + "| l_partkey | BIGINT | NO | None | None | None |\n", + "| l_suppkey | BIGINT | NO | None | None | None |\n", + "| l_linenumber | BIGINT | NO | None | None | None |\n", + "| l_quantity | DECIMAL(15,2) | NO | None | None | None |\n", + "| l_extendedprice | DECIMAL(15,2) | NO | None | None | None |\n", + "| l_discount | DECIMAL(15,2) | NO | None | None | None |\n", + "| l_tax | DECIMAL(15,2) | NO | None | None | None |\n", + "| l_returnflag | VARCHAR | NO | None | None | None |\n", + "| l_linestatus | VARCHAR | NO | None | None | None |\n", + "| l_shipdate | DATE | NO | None | None | None |\n", + "| l_commitdate | DATE | NO | None | None | None |\n", + "| l_receiptdate | DATE | NO | None | None | None |\n", + "| l_shipinstruct | VARCHAR | NO | None | None | None |\n", + "| l_shipmode | VARCHAR | NO | None | None | None |\n", + "| l_comment | VARCHAR | NO | None | None | None |\n", + "+-----------------+---------------+------+------+---------+-------+" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%sql DESCRIBE lineitem;" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
column_namecolumn_typenullkeydefaultextra
o_orderkeyBIGINTNONoneNoneNone
o_custkeyBIGINTNONoneNoneNone
o_orderstatusVARCHARNONoneNoneNone
o_totalpriceDECIMAL(15,2)NONoneNoneNone
o_orderdateDATENONoneNoneNone
o_orderpriorityVARCHARNONoneNoneNone
o_clerkVARCHARNONoneNoneNone
o_shippriorityINTEGERNONoneNoneNone
o_commentVARCHARNONoneNoneNone
" + ], + "text/plain": [ + "+-----------------+---------------+------+------+---------+-------+\n", + "| column_name | column_type | null | key | default | extra |\n", + "+-----------------+---------------+------+------+---------+-------+\n", + "| o_orderkey | BIGINT | NO | None | None | None |\n", + "| o_custkey | BIGINT | NO | None | None | None |\n", + "| o_orderstatus | VARCHAR | NO | None | None | None |\n", + "| o_totalprice | DECIMAL(15,2) | NO | None | None | None |\n", + "| o_orderdate | DATE | NO | None | None | None |\n", + "| o_orderpriority | VARCHAR | NO | None | None | None |\n", + "| o_clerk | VARCHAR | NO | None | None | None |\n", + "| o_shippriority | INTEGER | NO | None | None | None |\n", + "| o_comment | VARCHAR | NO | None | None | None |\n", + "+-----------------+---------------+------+------+---------+-------+" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%sql DESCRIBE orders;" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
column_namecolumn_typenullkeydefaultextra
p_partkeyBIGINTNONoneNoneNone
p_nameVARCHARNONoneNoneNone
p_mfgrVARCHARNONoneNoneNone
p_brandVARCHARNONoneNoneNone
p_typeVARCHARNONoneNoneNone
p_sizeINTEGERNONoneNoneNone
p_containerVARCHARNONoneNoneNone
p_retailpriceDECIMAL(15,2)NONoneNoneNone
p_commentVARCHARNONoneNoneNone
" + ], + "text/plain": [ + "+---------------+---------------+------+------+---------+-------+\n", + "| column_name | column_type | null | key | default | extra |\n", + "+---------------+---------------+------+------+---------+-------+\n", + "| p_partkey | BIGINT | NO | None | None | None |\n", + "| p_name | VARCHAR | NO | None | None | None |\n", + "| p_mfgr | VARCHAR | NO | None | None | None |\n", + "| p_brand | VARCHAR | NO | None | None | None |\n", + "| p_type | VARCHAR | NO | None | None | None |\n", + "| p_size | INTEGER | NO | None | None | None |\n", + "| p_container | VARCHAR | NO | None | None | None |\n", + "| p_retailprice | DECIMAL(15,2) | NO | None | None | None |\n", + "| p_comment | VARCHAR | NO | None | None | None |\n", + "+---------------+---------------+------+------+---------+-------+" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%sql DESCRIBE part;" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# Configure cell to output plain text\n", + "%config SqlMagic.displaycon = True\n", + "%config SqlMagic.displaylimit = 100" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date
1992-03-22
" + ], + "text/plain": [ + "+------------+\n", + "| date |\n", + "+------------+\n", + "| 1992-03-22 |\n", + "+------------+" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%sql SELECT DATE '1992-03-22' as date;" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
explain_keyexplain_value
physical_plan┌───────────────────────────┐
│ PROJECTION │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│__internal_decompress_strin│
│ g(#0) │
│ #1 │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ ORDER_BY │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ ORDERS: │
│ orders.o_orderpriority ASC│
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ PROJECTION │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│__internal_compress_string_│
│ hugeint(#0) │
│ #1 │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ PROJECTION │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│__internal_decompress_strin│
│ g(#0) │
│ #1 │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ HASH_GROUP_BY │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ #0 │
│ count_star() │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ PROJECTION │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ o_orderpriority │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ PROJECTION │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│__internal_compress_integra│
│ l_uinteger(#0, 1) │
│__internal_compress_string_│
│ hugeint(#1) │
└─────────────┬─────────────┘
┌─────────────┴─────────────┐
│ RIGHT_DELIM_JOIN │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ RIGHT_SEMI │
│ o_orderkey IS NOT DISTINCT├──────────────┐
│ FROM o_orderkey │ │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │
│ EC: 1200243 │ │
└─────────────┬─────────────┘ │
┌─────────────┴─────────────┐┌─────────────┴─────────────┐
│ SEQ_SCAN ││ HASH_JOIN │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ orders ││ RIGHT_SEMI │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ o_orderkey IS NOT DISTINCT│
│ o_orderkey ││ FROM o_orderkey │
│ o_orderpriority ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ EC: 1200243 ├───────────────────────────────────────────┐
│Filters: o_orderdate>='1993││ │ │
│ -07-01'::DATE AND ││ │ │
│ o_orderdate<'1993-10-... ││ │ │
│ o_orderdate IS NOT NULL ││ │ │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ │ │
│ EC: 300000 ││ │ │
└───────────────────────────┘└─────────────┬─────────────┘ │
┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐
│ PROJECTION │ │ DUMMY_SCAN │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │ │
│ o_orderkey │ │ │
└─────────────┬─────────────┘ └───────────────────────────┘
┌─────────────┴─────────────┐
│ HASH_JOIN │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ INNER │
│ l_orderkey = o_orderkey ├──────────────┐
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │
│ EC: 1200243 │ │
└─────────────┬─────────────┘ │
┌─────────────┴─────────────┐┌─────────────┴─────────────┐
│ FILTER ││ DELIM_SCAN │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ │
│ (l_commitdate < ││ │
│ l_receiptdate) ││ │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ │
│ EC: 6001215 ││ │
└─────────────┬─────────────┘└───────────────────────────┘
┌─────────────┴─────────────┐
│ SEQ_SCAN │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ lineitem │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ l_orderkey │
│ l_commitdate │
│ l_receiptdate │
│ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │
│ EC: 6001215 │
└───────────────────────────┘
" + ], + "text/plain": [ + "+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| explain_key | explain_value |\n", + "+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------+\n", + "| physical_plan | ┌───────────────────────────┐ |\n", + "| | │ PROJECTION │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │__internal_decompress_strin│ |\n", + "| | │ g(#0) │ |\n", + "| | │ #1 │ |\n", + "| | └─────────────┬─────────────┘ |\n", + "| | ┌─────────────┴─────────────┐ |\n", + "| | │ ORDER_BY │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ ORDERS: │ |\n", + "| | │ orders.o_orderpriority ASC│ |\n", + "| | └─────────────┬─────────────┘ |\n", + "| | ┌─────────────┴─────────────┐ |\n", + "| | │ PROJECTION │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │__internal_compress_string_│ |\n", + "| | │ hugeint(#0) │ |\n", + "| | │ #1 │ |\n", + "| | └─────────────┬─────────────┘ |\n", + "| | ┌─────────────┴─────────────┐ |\n", + "| | │ PROJECTION │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │__internal_decompress_strin│ |\n", + "| | │ g(#0) │ |\n", + "| | │ #1 │ |\n", + "| | └─────────────┬─────────────┘ |\n", + "| | ┌─────────────┴─────────────┐ |\n", + "| | │ HASH_GROUP_BY │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ #0 │ |\n", + "| | │ count_star() │ |\n", + "| | └─────────────┬─────────────┘ |\n", + "| | ┌─────────────┴─────────────┐ |\n", + "| | │ PROJECTION │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ o_orderpriority │ |\n", + "| | └─────────────┬─────────────┘ |\n", + "| | ┌─────────────┴─────────────┐ |\n", + "| | │ PROJECTION │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │__internal_compress_integra│ |\n", + "| | │ l_uinteger(#0, 1) │ |\n", + "| | │__internal_compress_string_│ |\n", + "| | │ hugeint(#1) │ |\n", + "| | └─────────────┬─────────────┘ |\n", + "| | ┌─────────────┴─────────────┐ |\n", + "| | │ RIGHT_DELIM_JOIN │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ RIGHT_SEMI │ |\n", + "| | │ o_orderkey IS NOT DISTINCT├──────────────┐ |\n", + "| | │ FROM o_orderkey │ │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │ |\n", + "| | │ EC: 1200243 │ │ |\n", + "| | └─────────────┬─────────────┘ │ |\n", + "| | ┌─────────────┴─────────────┐┌─────────────┴─────────────┐ |\n", + "| | │ SEQ_SCAN ││ HASH_JOIN │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ orders ││ RIGHT_SEMI │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ o_orderkey IS NOT DISTINCT│ |\n", + "| | │ o_orderkey ││ FROM o_orderkey │ |\n", + "| | │ o_orderpriority ││ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ EC: 1200243 ├───────────────────────────────────────────┐ |\n", + "| | │Filters: o_orderdate>='1993││ │ │ |\n", + "| | │ -07-01'::DATE AND ││ │ │ |\n", + "| | │ o_orderdate<'1993-10-... ││ │ │ |\n", + "| | │ o_orderdate IS NOT NULL ││ │ │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ │ │ |\n", + "| | │ EC: 300000 ││ │ │ |\n", + "| | └───────────────────────────┘└─────────────┬─────────────┘ │ |\n", + "| | ┌─────────────┴─────────────┐ ┌─────────────┴─────────────┐ |\n", + "| | │ PROJECTION │ │ DUMMY_SCAN │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │ │ |\n", + "| | │ o_orderkey │ │ │ |\n", + "| | └─────────────┬─────────────┘ └───────────────────────────┘ |\n", + "| | ┌─────────────┴─────────────┐ |\n", + "| | │ HASH_JOIN │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ INNER │ |\n", + "| | │ l_orderkey = o_orderkey ├──────────────┐ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ │ |\n", + "| | │ EC: 1200243 │ │ |\n", + "| | └─────────────┬─────────────┘ │ |\n", + "| | ┌─────────────┴─────────────┐┌─────────────┴─────────────┐ |\n", + "| | │ FILTER ││ DELIM_SCAN │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ │ |\n", + "| | │ (l_commitdate < ││ │ |\n", + "| | │ l_receiptdate) ││ │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ││ │ |\n", + "| | │ EC: 6001215 ││ │ |\n", + "| | └─────────────┬─────────────┘└───────────────────────────┘ |\n", + "| | ┌─────────────┴─────────────┐ |\n", + "| | │ SEQ_SCAN │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ lineitem │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ l_orderkey │ |\n", + "| | │ l_commitdate │ |\n", + "| | │ l_receiptdate │ |\n", + "| | │ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ │ |\n", + "| | │ EC: 6001215 │ |\n", + "| | └───────────────────────────┘ |\n", + "| | |\n", + "+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------+" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql explain SELECT\n", + "o_orderpriority,\n", + "count(*) AS order_count\n", + "FROM\n", + "orders\n", + "WHERE\n", + "o_orderdate >= CAST('1993-07-01' AS date)\n", + "AND o_orderdate < CAST('1993-10-01' AS date)\n", + "AND EXISTS (\n", + "SELECT\n", + "*\n", + "FROM\n", + "lineitem\n", + "WHERE\n", + "l_orderkey = o_orderkey\n", + "AND l_commitdate < l_receiptdate)\n", + "GROUP BY\n", + "o_orderpriority\n", + "ORDER BY\n", + "o_orderpriority;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/base/Cargo.toml b/base/Cargo.toml index 6eb6ec6..afb0d6f 100644 --- a/base/Cargo.toml +++ b/base/Cargo.toml @@ -5,4 +5,6 @@ version = "0.0.0" edition = "2021" [dependencies] -itertools = "0.12.1" \ No newline at end of file +itertools = "0.12.1" +chrono = "0.4.38" +duckdb = {version = "0.10.2", features = ["bundled"]} \ No newline at end of file diff --git a/base/src/lib.rs b/base/src/lib.rs index 046279f..397a4a9 100644 --- a/base/src/lib.rs +++ b/base/src/lib.rs @@ -1,4 +1,5 @@ pub mod kmeans_baseline; pub mod point; pub mod matrix_vector_multiply; -pub mod vectorized_sum; \ No newline at end of file +pub mod vectorized_sum; +pub mod tpch; \ No newline at end of file diff --git a/base/src/tpch/initialize.rs b/base/src/tpch/initialize.rs new file mode 100644 index 0000000..a6b0b4d --- /dev/null +++ b/base/src/tpch/initialize.rs @@ -0,0 +1,22 @@ +use duckdb::Connection; + +/** + * Using DuckDB to load the data + * */ +pub fn initialize_database(scale_factor: u32) -> Connection { + + // Create a in-memory database + let conn = Connection::open_in_memory().expect("Error creating in-memory database"); + conn.execute( + &format!("CREATE OR REPLACE SCHEMA SF_{};", scale_factor), + [], + ) + .expect("Error creating schema"); + conn.execute(&format!("USE SF_{};", scale_factor), []) + .expect("Error using schema"); + // Load the data via TPCH extension + conn.execute(&format!("CALL dbgen(sf ={});", scale_factor), []) + .expect("Error loading data via TPCH extension"); + + conn +} \ No newline at end of file diff --git a/base/src/tpch/mod.rs b/base/src/tpch/mod.rs new file mode 100644 index 0000000..27f833c --- /dev/null +++ b/base/src/tpch/mod.rs @@ -0,0 +1,5 @@ +pub mod query_1; +pub mod query_4; +pub mod query_19; +pub mod initialize; +mod util; \ No newline at end of file diff --git a/base/src/tpch/query_1.rs b/base/src/tpch/query_1.rs new file mode 100644 index 0000000..8966df2 --- /dev/null +++ b/base/src/tpch/query_1.rs @@ -0,0 +1,265 @@ +use std::collections::HashMap; + +use chrono::NaiveDate; +use duckdb::Connection; +use itertools::Itertools; + +use super::util::to_date; + +pub struct LineItem { + pub l_returnflag: char, + pub l_linestatus: char, + pub l_quantity: f64, + pub l_extendedprice: f64, + pub l_discount: f64, + pub l_tax: f64, + pub l_shipdate: NaiveDate, +} + +impl LineItem { + pub fn load(conn: &Connection, limit: Option) -> Vec { + // Create iterator for LineItem + let query = match limit { + Some(limit) => format!("SELECT l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate FROM lineitem LIMIT {};", limit), + None => "SELECT l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax, l_shipdate FROM lineitem;".to_string(), + }; + let mut stmt = conn + .prepare(&query) + .expect("Error preparing query for LineItem"); + let line_items = stmt + .query_map([], |row| { + Ok(LineItem { + l_returnflag: row.get::<_, String>(0)?.chars().next().unwrap(), + l_linestatus: row.get::<_, String>(1)?.chars().next().unwrap(), + l_quantity: row.get(2)?, + l_extendedprice: row.get(3)?, + l_discount: row.get(4)?, + l_tax: row.get(5)?, + l_shipdate: to_date(row.get(6)?), + }) + }) + .expect("Error querying LineItem"); + + let line_items = line_items.filter(|x| x.is_ok()).map(|x| x.unwrap()); + + line_items.collect() + } +} + +pub struct LineItem2 { + pub l_returnflag: char, + pub l_linestatus: char, + pub l_quantity: f64, + pub l_extendedprice: f64, + pub l_discount: f64, + pub l_tax: f64, +} + +impl Into for LineItem { + fn into(self) -> LineItem2 { + LineItem2 { + l_returnflag: self.l_returnflag, + l_linestatus: self.l_linestatus, + l_quantity: self.l_quantity, + l_extendedprice: self.l_extendedprice, + l_discount: self.l_discount, + l_tax: self.l_tax, + } + } +} + +pub struct LineItemAgg1 { + pub sum_qty: f64, + pub sum_base_price: f64, + pub sum_disc_price: f64, + pub sum_charge: f64, + pub count_order: u64, +} + +impl Default for LineItemAgg1 { + fn default() -> Self { + LineItemAgg1 { + sum_qty: 0.0, + sum_base_price: 0.0, + sum_disc_price: 0.0, + sum_charge: 0.0, + count_order: 0, + } + } +} + +#[derive(Debug)] +pub struct LineItemAgg2 { + pub sum_qty: f64, + pub sum_base_price: f64, + pub sum_disc_price: f64, + pub sum_charge: f64, + pub avg_qty: f64, + pub avg_price: f64, + pub avg_disc: f64, + pub count_order: u64, +} + +impl From for LineItemAgg2 { + fn from(x: LineItemAgg1) -> LineItemAgg2 { + LineItemAgg2 { + sum_qty: x.sum_qty, + sum_base_price: x.sum_base_price, + sum_disc_price: x.sum_disc_price, + sum_charge: x.sum_charge, + avg_qty: x.sum_qty / x.count_order as f64, + avg_price: x.sum_base_price / x.count_order as f64, + avg_disc: x.sum_disc_price / x.count_order as f64, + count_order: x.count_order, + } + } +} + +pub fn load(conn: &Connection) -> Vec { + LineItem::load(conn, None) +} + +pub fn query(line_items: Vec) { + + // 1. Scan from lineitem: "l_returnflag", "l_linestatus", "l_quantity", "l_extendedprice", "l_discount", "l_tax", "l_shipdate" + let line_items = line_items.into_iter(); + + // 2. Filter l_shipdate <= date '1998-12-01' - interval '90' day i.e., l_shipdate <= '1998-09-02'. + // (2.1 Evaluate expression.) + // 2.2. Filter on the expression. Need everything apart from l_shipdate. + let line_items_filtered = line_items.filter(|x| x.l_shipdate <= NaiveDate::from_ymd_opt(1998, 9, 2).unwrap()); + + // 3. Evaluate expressions for the aggregations. + // XXX: Skipping projection of l_tax, cloud drop that column. + let line_items_proj = line_items_filtered.map(|x| { + // Project the fields. + let x: LineItem2 = x.into(); + // l_extendedprice * (1 - l_discount) AS disc_price, + let disc_price = x.l_extendedprice * (1.0 - x.l_discount); + // l_extendedprice * (1 - l_discount) * (1 + l_tax) AS charge, + let charge = disc_price * (1.0 + x.l_tax); + return (x, disc_price, charge) + }); + + // 4. Group by l_returnflag, l_linestatus & compute aggregates. + let agg = line_items_proj.map(|(x, disc_price, charge)| { + // Group by l_returnflag, l_linestatus + ((x.l_returnflag, x.l_linestatus), x.l_quantity, x.l_extendedprice, disc_price, charge) + }).fold(HashMap::new(), |mut acc, x| { + // Hash aggregate without average + let (key, l_quantity, l_extendedprice, disc_price, charge) = x; + let entry = acc.entry(key).or_insert(LineItemAgg1 { + sum_qty: 0.0, + sum_base_price: 0.0, + sum_disc_price: 0.0, + sum_charge: 0.0, + count_order: 0, + }); + entry.sum_qty += l_quantity; + entry.sum_base_price += l_extendedprice; + entry.sum_disc_price += disc_price; + entry.sum_charge += charge; + entry.count_order += 1; + acc + }).into_iter().map(|(key, value)| { + // Finalize aggregation with average + let (l_returnflag, l_linestatus) = key; + let value: LineItemAgg2 = value.into(); + ((l_returnflag, l_linestatus), value) + }); + + // 5. Sort by l_returnflag, l_linestatus. + // XXX: InkFuse is skipping this step. + let ordered = agg.sorted_by_key(|x| x.0); + + // Attach the sink for printing. + ordered.for_each(|x| { + let ((l_returnflag, l_linestatus), x) = x; + println!("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}", + l_returnflag, + l_linestatus, + x.sum_qty, + x.sum_base_price, + x.sum_disc_price, + x.sum_charge, + x.avg_qty, + x.avg_price, + x.avg_disc, + x.count_order, + ); + }); +} + +pub fn query_duckdb(conn: &Connection, limit: Option) { + let table = match limit { + Some(limit) => format!("(SELECT * FROM lineitem LIMIT {})", limit), + None => "lineitem".to_string(), + }; + let mut stmt = conn.prepare(&format!(r#" + SELECT + l_returnflag, + l_linestatus, + sum(l_quantity) AS sum_qty, + sum(l_extendedprice) AS sum_base_price, + sum(l_extendedprice * (1 - l_discount)) AS sum_disc_price, + sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) AS sum_charge, + avg(l_quantity) AS avg_qty, + avg(l_extendedprice) AS avg_price, + avg(l_discount) AS avg_disc, + count(*) AS count_order + FROM + {} + WHERE + l_shipdate <= CAST('1998-09-02' AS date) + GROUP BY + l_returnflag, + l_linestatus + ORDER BY + l_returnflag, + l_linestatus; + "#, table)).expect("Error preparing query for LineItem"); + let mut rows = stmt.query([]).expect("Error executing Query 1"); + while let Some(row) = rows.next().unwrap() { + let l_returnflag: char = row.get::<_, String>(0).unwrap().chars().next().unwrap(); + let l_linestatus: char = row.get::<_, String>(1).unwrap().chars().next().unwrap(); + let sum_qty: f64 = row.get(2).unwrap(); + let sum_base_price: f64 = row.get(3).unwrap(); + let sum_disc_price: f64 = row.get(4).unwrap(); + let sum_charge: f64 = row.get(5).unwrap(); + let avg_qty: f64 = row.get(6).unwrap(); + let avg_price: f64 = row.get(7).unwrap(); + let avg_disc: f64 = row.get(8).unwrap(); + let count_order: u64 = row.get(9).unwrap(); + println!("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}", + l_returnflag, + l_linestatus, + sum_qty, + sum_base_price, + sum_disc_price, + sum_charge, + avg_qty, + avg_price, + avg_disc, + count_order, + ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tpch::initialize::initialize_database; + + #[test] + fn test_query_1() { + let conn = initialize_database(1); + let line_items = LineItem::load(&conn, Some(1000)); + query(line_items); + } + + #[test] + fn test_query_1_duckdb() { + let conn = initialize_database(1); + query_duckdb(&conn, Some(1000)); + } +} \ No newline at end of file diff --git a/base/src/tpch/query_19.rs b/base/src/tpch/query_19.rs new file mode 100644 index 0000000..e5d8520 --- /dev/null +++ b/base/src/tpch/query_19.rs @@ -0,0 +1,257 @@ +use std::collections::HashMap; +use duckdb::Connection; + +pub struct Part { + pub p_partkey: i64, + pub p_brand: String, + pub p_container: String, + pub p_size: i32, +} + +impl Part { + pub fn load(conn: &Connection, limit: Option) -> Vec { + // Create iterator for Part + let query = match limit { + Some(limit) => format!( + "SELECT p_partkey, p_brand, p_container, p_size FROM part LIMIT {};", + limit + ), + None => "SELECT p_partkey, p_brand, p_container, p_size FROM part;".to_string(), + }; + let mut stmt = conn + .prepare(&query) + .expect("Error preparing query for Part"); + let parts = stmt + .query_map([], |row| { + Ok(Part { + p_partkey: row.get(0)?, + p_brand: row.get(1)?, + p_container: row.get(2)?, + p_size: row.get(3)?, + }) + }) + .expect("Error querying Part"); + + let parts = parts.filter(|x| x.is_ok()).map(|x| x.unwrap()); + + parts.collect() + } + + fn filter(brand_pred: &str, brand_val: &str, size_between: &(i32, i32), size_val: &i32, container_list: &[&str], container_val: &str) -> bool { + return brand_pred == brand_val + && size_between.0 <= *size_val + && *size_val <= size_between.1 + && container_list.contains(&container_val); + } + + pub fn filter_1(brand_val: &str, size_val: &i32, container_val: &str) -> bool { + return Part::filter("Brand#12", brand_val, &(1, 5), size_val, &["SM CASE", "SM BOX", "SM PACK", "SM PKG"], container_val); + } + + pub fn filter_2(brand_val: &str, size_val: &i32, container_val: &str) -> bool { + return Part::filter("Brand#23", brand_val, &(1, 10), size_val, &["MED BAG", "MED BOX", "MED PKG", "MED PACK"], container_val); + } + + pub fn filter_3(brand_val: &str, size_val: &i32, container_val: &str) -> bool { + return Part::filter("Brand#34", brand_val, &(1, 15), size_val, &["LG CASE", "LG BOX", "LG PACK", "LG PKG"], container_val); + } +} + +pub struct LineItem { + pub l_partkey: i64, + pub l_shipmode: String, + pub l_quantity: f64, + pub l_shipinstruct: String, + pub l_discount: f64, + pub l_extendedprice: f64, +} + +impl LineItem { + pub fn load(conn: &Connection, limit: Option) -> Vec { + // Create iterator for LineItem + let query = match limit { + Some(limit) => format!( + "SELECT l_partkey, l_shipmode, l_quantity, l_shipinstruct, l_discount, l_extendedprice FROM lineitem LIMIT {};", + limit + ), + None => "SELECT l_partkey, l_shipmode, l_quantity, l_shipinstruct, l_discount, l_extendedprice FROM lineitem;".to_string(), + }; + let mut stmt = conn + .prepare(&query) + .expect("Error preparing query for LineItem"); + let line_items = stmt + .query_map([], |row| { + Ok(LineItem { + l_partkey: row.get(0)?, + l_shipmode: row.get(1)?, + l_quantity: row.get(2)?, + l_shipinstruct: row.get(3)?, + l_discount: row.get(4)?, + l_extendedprice: row.get(5)?, + }) + }) + .expect("Error querying LineItem"); + + let line_items = line_items.filter(|x| x.is_ok()).map(|x| x.unwrap()); + + line_items.collect() + } + + fn filter(quantity_between: &(f64, f64), quantity_val: &f64) -> bool { + return quantity_between.0 <= *quantity_val && *quantity_val <= quantity_between.1; + } + + pub fn filter_1(quantity_val: &f64) -> bool { + return LineItem::filter(&(1.0, 11.0), quantity_val); + } + + pub fn filter_2(quantity_val: &f64) -> bool { + return LineItem::filter(&(10.0, 20.0), quantity_val); + } + + pub fn filter_3(quantity_val: &f64) -> bool { + return LineItem::filter(&(20.0, 30.0), quantity_val); + } +} + +pub fn load(conn: &Connection) -> (Vec, Vec) { + (LineItem::load(conn, None), Part::load(conn, None)) +} + +pub fn query(line_items: Vec, part: Vec) { + // XXX: InkFuse adds an early filter before the join. DuckDB does not + + // 1. Scan part. + let part_filtered = part + .into_iter() + // 2. Pushed down filter on part. + .filter(|part| { + Part::filter_1(&part.p_brand, &part.p_size, &part.p_container) + || Part::filter_2(&part.p_brand, &part.p_size, &part.p_container) + || Part::filter_3(&part.p_brand, &part.p_size, &part.p_container) + }); + + // 3. Scan lineitem. + let lineitem_filtered = line_items.into_iter() + // 4. Pushed down lineitem filter. + // l_shipinstruct = "DELIVER IN PERSON" + // l_shipmode = "AIR" or "AIR REG" + .filter(|lineitem| lineitem.l_shipinstruct == "DELIVER IN PERSON" && (lineitem.l_shipmode == "AIR" || lineitem.l_shipmode == "AIR REG")) + .filter(|lineitem| LineItem::filter_1(&lineitem.l_quantity) || LineItem::filter_2(&lineitem.l_quantity) || LineItem::filter_3(&lineitem.l_quantity)); + + // 5. Join the two + // Build: Part? + // Probe: LineItem? + + // Keys left (p_partkey) + // Payload left (p_brand, p_container, p_size) + let join_build = part_filtered.map(|p| (p.p_partkey, (p.p_brand, p.p_container, p.p_size))) + .fold(HashMap::new(), |mut map, (key, value)| { + map.insert(key, value); + map + }); + + // Keys right (l_partkey) + // Payload right (l_quantity, l_discount, l_extendedprice) + let join_probe = lineitem_filtered.map(|l| (l.l_partkey, (l.l_quantity, l.l_discount, l.l_extendedprice))); + + let join = join_probe.filter_map(|(key, value)| { + if let Some((p_brand, p_container, p_size)) = join_build.get(&key) { + Some((p_brand, p_container, p_size, value.0, value.1, value.2)) + } else { + None + } + }); + + // 6. Filter again, we need to make sure the right tuples survived. + let join_filtered = join.filter(|(p_brand, p_container, p_size, l_quantity, _l_discount, _l_extendedprice)| { + (Part::filter_1(&p_brand, &p_size, &p_container) && LineItem::filter_1(&l_quantity)) + || (Part::filter_2(&p_brand, &p_size, &p_container) && LineItem::filter_2(&l_quantity)) + || (Part::filter_3(&p_brand, &p_size, &p_container) && LineItem::filter_3(&l_quantity)) + }); + + // 7. Aggregate the result. + // 7.1 Compute (l_extendedprice * (1 - l_discount)) + // 7.2. Aggregate sum + let agg = join_filtered.map(|(_p_brand, _p_container, _p_size, _l_quantity, l_discount, l_extendedprice)| { + l_extendedprice * (1.0 - l_discount) + }).reduce(|a, b| a + b); + + + // 8. Print + println!("{}", agg.unwrap()); + +} + +pub fn query_duckdb(conn: &Connection, limit: Option) { + let lineitem_table = match limit { + Some(limit) => format!("(SELECT * FROM lineitem LIMIT {})", limit), + None => "lineitem".to_string(), + }; + let part_table = match limit { + Some(limit) => format!("(SELECT * FROM part LIMIT {})", limit), + None => "part".to_string(), + }; + let mut stmt = conn + .prepare(&format!( + r#" + SELECT + sum(l_extendedprice * (1 - l_discount)) AS revenue + FROM + {}, + {} + WHERE (p_partkey = l_partkey + AND p_brand = 'Brand#12' + AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND l_quantity >= 1 + AND l_quantity <= 1 + 10 + AND p_size BETWEEN 1 AND 5 + AND l_shipmode IN ('AIR', 'AIR REG') + AND l_shipinstruct = 'DELIVER IN PERSON') + OR (p_partkey = l_partkey + AND p_brand = 'Brand#23' + AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + AND l_quantity >= 10 + AND l_quantity <= 10 + 10 + AND p_size BETWEEN 1 AND 10 + AND l_shipmode IN ('AIR', 'AIR REG') + AND l_shipinstruct = 'DELIVER IN PERSON') + OR (p_partkey = l_partkey + AND p_brand = 'Brand#34' + AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND l_quantity >= 20 + AND l_quantity <= 20 + 10 + AND p_size BETWEEN 1 AND 15 + AND l_shipmode IN ('AIR', 'AIR REG') + AND l_shipinstruct = 'DELIVER IN PERSON'); + "#, + lineitem_table, part_table + )) + .expect("Error preparing query for LineItem"); + let mut rows = stmt.query([]).expect("Error executing Query 1"); + while let Some(row) = rows.next().unwrap() { + let revenue: f64 = row.get(0).unwrap(); + println!("{}", revenue); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tpch::initialize::initialize_database; + + #[test] + fn test_query_19() { + let limit = None; + let conn = initialize_database(1); + let line_items = LineItem::load(&conn, limit); + let parts = Part::load(&conn, limit); + query(line_items, parts); + } + + #[test] + fn test_query_19_duckdb() { + let conn = initialize_database(1); + query_duckdb(&conn, None); + } +} \ No newline at end of file diff --git a/base/src/tpch/query_4.rs b/base/src/tpch/query_4.rs new file mode 100644 index 0000000..bd61909 --- /dev/null +++ b/base/src/tpch/query_4.rs @@ -0,0 +1,247 @@ +use std::collections::{HashMap, HashSet}; + +use chrono::NaiveDate; +use duckdb::Connection; + +use super::util::to_date; + +#[derive(Debug, Clone)] +pub struct LineItem { + pub order_key: i32, // BIGINT + pub receiptdate: NaiveDate, // DATE + pub commit_date: NaiveDate, // DATE +} + +impl LineItem { + pub fn load(conn: &Connection, limit: Option) -> Vec { + // Create iterator for LineItem + let query = match limit { + Some(limit) => format!( + "SELECT l_orderkey, l_receiptdate, l_commitdate FROM lineitem LIMIT {};", + limit + ), + None => "SELECT l_orderkey, l_receiptdate, l_commitdate FROM lineitem;".to_string(), + }; + let mut stmt = conn + .prepare(&query) + .expect("Error preparing query for LineItem"); + let line_items = stmt + .query_map([], |row| { + Ok(LineItem { + order_key: row.get(0)?, + receiptdate: to_date(row.get(1)?), + commit_date: to_date(row.get(2)?), + }) + }) + .expect("Error querying LineItem"); + + let line_items = line_items.filter(|x| x.is_ok()).map(|x| x.unwrap()); + + line_items.collect() + } +} + +#[derive(Debug, Clone)] +pub struct Order { + pub order_key: i32, + pub order_date: NaiveDate, + pub order_priority: String, +} + +impl Order { + pub fn load(conn: &Connection, limit: Option) -> Vec { + // Create iterator for Orders + let query = match limit { + Some(limit) => format!( + "SELECT o_orderkey, o_orderdate, o_orderpriority FROM orders LIMIT {};", + limit + ), + None => "SELECT o_orderkey, o_orderdate, o_orderpriority FROM orders;".to_string(), + }; + let mut stmt = conn + .prepare(&query) + .expect("Error preparing query for LineItem"); + let orders = stmt + .query_map([], |row| { + Ok(Self { + order_key: row.get(0)?, + order_date: to_date(row.get(1)?), + order_priority: row.get(2)?, + }) + }) + .expect("Error querying LineItem"); + + let orders = orders.filter_map(|x| x.ok()); + + orders.collect() + } +} + +pub fn load(conn: &Connection) -> (Vec, Vec) { + let line_items = LineItem::load(&conn, None); + let orders = Order::load(&conn, None); + + (line_items, orders) +} + +pub fn query(line_items: Vec, orders: Vec) { + + // 1. Scan orders + let orders_filtered = orders + .into_iter() + // 1.2 Filter orders on o_orderdate >= '1993-07-01' and < '1993-10-01' + .filter(|order| { + order.order_date >= NaiveDate::from_ymd_opt(1993, 7, 1).unwrap() + && order.order_date < NaiveDate::from_ymd_opt(1993, 10, 1).unwrap() + }); + + // 2. Scan from lineitem. + let line_items_filtered = line_items + .into_iter() + // 2.2 Filter lineitem on l_commitdate < l_receiptdate + .filter(|line_item| line_item.commit_date < line_item.receiptdate); + + // 3. Join the two. o_orderkey = l_orderkey, payload: o_orderpriority + // Build side: Orders? + // Probe side: LineItem? + // HashJoin? + let join_build = line_items_filtered.fold(HashSet::new(), |mut acc, e| { + acc.insert(e.order_key); + acc + }); + let joined = + orders_filtered.filter_map(|o| join_build.get(&o.order_key).map(|_| o.order_priority)); + + // 4. Aggregate. + // Hash aggregation in DuckDB + // 4.1 Group by: "o_orderpriority" + // 4.2 Count + let agg = joined.fold(HashMap::new(), |mut acc, e| { + let count = acc.entry(e).or_insert(0); + *count += 1; + acc + }); + + // 5. Print: "o_orderpriority", "order_count" + agg.into_iter().for_each(|x| println!("{:?}", x)); +} + +pub fn query_duckdb(conn: &Connection, limit: Option) { + let orders_table = match limit { + Some(limit) => format!("(SELECT * FROM orders LIMIT {})", limit), + None => "orders".to_string(), + }; + let lineitem_table = match limit { + Some(limit) => format!("(SELECT * FROM lineitem LIMIT {})", limit), + None => "lineitem".to_string(), + }; + + let mut stmt = conn + .prepare(&format!( + r#" + SELECT + o_orderpriority, + count(*) AS order_count + FROM + {} + WHERE + o_orderdate >= CAST('1993-07-01' AS date) + AND o_orderdate < CAST('1993-10-01' AS date) + AND EXISTS ( + SELECT + * + FROM + {} + WHERE + l_orderkey = o_orderkey + AND l_commitdate < l_receiptdate) + GROUP BY + o_orderpriority + ORDER BY + o_orderpriority; +"#, + orders_table, lineitem_table + )) + .expect("Error preparing query for DuckDB"); + + let mut rows = stmt.query([]).expect("Error executing Query 4"); + while let Some(row) = rows.next().unwrap() { + let order_priority: String = row.get(0).unwrap(); + let order_count: i64 = row.get(1).unwrap(); + println!("{}, {}", order_priority, order_count); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::tpch::initialize::initialize_database; + + #[test] + fn test_query_4() { + let limit = None; + + let conn = initialize_database(1); + let line_items = LineItem::load(&conn, limit); + let orders = Order::load(&conn, limit); + + // Call the query function + query(line_items, orders); + } + + #[test] + fn test_query_4_duckdb() { + let conn = initialize_database(1); + + let res = conn + .query_row( + r#" + SELECT + count(*) AS order_count + FROM + orders + WHERE + o_orderdate >= CAST('1993-07-01' AS date) + AND o_orderdate < CAST('1993-10-01' AS date) + "#, + [], + |row| { + let order_count: i64 = row.get(0)?; + Ok(order_count) + }, + ) + .unwrap(); + println!("Orders filtered: {:?}", res); + + let res = conn + .query_row( + r#" + SELECT + count(*) + FROM + lineitem + WHERE + l_commitdate < l_receiptdate + "#, + [], + |row| { + let order_count: i64 = row.get(0)?; + Ok(order_count) + }, + ) + .unwrap(); + println!("LineItem filtered: {:?}", res); + + query_duckdb(&conn, None); + } + + #[test] + fn test_load() { + let conn = initialize_database(1); + let line_items = LineItem::load(&conn, Some(1)); + let orders = Order::load(&conn, Some(1)); + + assert_eq!(line_items.len(), 1); + assert_eq!(orders.len(), 1); + } +} diff --git a/base/src/tpch/util.rs b/base/src/tpch/util.rs new file mode 100644 index 0000000..1d50146 --- /dev/null +++ b/base/src/tpch/util.rs @@ -0,0 +1,29 @@ +use chrono::{DateTime, NaiveDate}; + +// Conversion taken from DuckDB's main branch: https://github.com/duckdb/duckdb-rs/blob/a1aa55aff22b75e149e9cf7cface6464b3dc0ccc/src/types/chrono.rs#L71C39-L71C111 +pub fn to_date(value: i32) -> NaiveDate { + DateTime::from_timestamp(24 * 3600 * (value as i64), 0) + .unwrap() + .naive_utc() + .date() +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::NaiveDate; + use duckdb::Connection; + + + #[test] + fn test_to_date() { + let conn = Connection::open_in_memory().expect("Error creating in-memory database"); + let raw: i32 = conn + .query_row("SELECT DATE '1992-03-22' as date;", [], |row| { + row.get::<_, i32>(0) + }) + .unwrap(); + let date = to_date(raw); + assert_eq!(date, NaiveDate::from_ymd_opt(1992, 3, 22).unwrap()); + } +} \ No newline at end of file diff --git a/flow/examples/kmeans_baseline.rs b/flow/examples/kmeans_baseline.rs deleted file mode 100644 index 0ab32c8..0000000 --- a/flow/examples/kmeans_baseline.rs +++ /dev/null @@ -1,23 +0,0 @@ -use flow::kmeans_baseline::kmeans; - -fn main() { - // Example usage - let points = vec![ - vec![1.0, 2.0], - vec![2.0, 1.0], - vec![3.0, 4.0], - vec![4.0, 3.0], - vec![5.0, 6.0], - vec![6.0, 5.0], - ]; - - let k = 2; - let max_iterations = 100; - let tolerance = 0.0001; - - let clusters = kmeans(&points, k, max_iterations, tolerance); - - for (i, cluster) in clusters.iter().enumerate() { - println!("Point {:?} belongs to cluster {}", points[i], cluster); - } -} \ No newline at end of file diff --git a/flow/examples/query_4.rs b/flow/examples/query_4.rs new file mode 100644 index 0000000..cc90212 --- /dev/null +++ b/flow/examples/query_4.rs @@ -0,0 +1,40 @@ +use flow::tpch::query_4::query_4; +use hydro_deploy::{Deployment, HydroflowCrate}; +use hydroflow::futures::StreamExt; +use hydroflow_plus_cli_integration::{DeployCrateWrapper, DeployProcessSpec}; + +#[tokio::main] +async fn main() { + let profile = "dev"; + let mut deployment = Deployment::new(); + let localhost = deployment.Localhost(); + + let flow = hydroflow_plus::FlowBuilder::new(); + let orders = stageleft::RuntimeData::new(&"FAKE"); + let lineitem = stageleft::RuntimeData::new(&"FAKE"); + let second_process = query_4( + &flow, + &DeployProcessSpec::new(|| { + deployment.add_service( + HydroflowCrate::new(".", localhost.clone()) + .bin("query_4") + .profile(profile), + ) + }), + lineitem, + orders, + ); + + println!("Deploying"); + deployment.deploy().await.unwrap(); + + println!("Getting stdout"); + let second_process_stdout = second_process.stdout().await; + + println!("Starting"); + deployment.start().await.unwrap(); + + println!("Collecting"); + let res = second_process_stdout.take(5).collect::>().await; + println!("{:?}", res); +} diff --git a/flow/examples/query_4_distributed.rs b/flow/examples/query_4_distributed.rs new file mode 100644 index 0000000..6dfe21f --- /dev/null +++ b/flow/examples/query_4_distributed.rs @@ -0,0 +1,77 @@ +use std::cell::RefCell; + +use flow::tpch::query_4_distributed::query_4_distributed; +use hydro_deploy::{Deployment, HydroflowCrate}; +use hydroflow::futures::StreamExt; +use hydroflow_plus_cli_integration::{DeployClusterSpec, DeployCrateWrapper, DeployProcessSpec}; + +#[tokio::main] +async fn main() { + let intermediate_aggregation = true; + let cluster_size = 2; + let profile = "dev"; + + let deployment = RefCell::new(Deployment::new()); + let localhost = deployment.borrow_mut().Localhost(); + + let flow = hydroflow_plus::FlowBuilder::new(); + let orders = stageleft::RuntimeData::new(&"FAKE"); + let lineitem = stageleft::RuntimeData::new(&"FAKE"); + let second_process = query_4_distributed( + &flow, + &DeployProcessSpec::new(|| { + deployment.borrow_mut().add_service( + HydroflowCrate::new(".", localhost.clone()) + .bin("query_4_distributed") + //.perf("~/query_4_perf.dat".into()).profile("profile"), + .profile(profile), + ) + }), + &DeployClusterSpec::new(|| { + (0..cluster_size) + .map(|_| { + deployment.borrow_mut().add_service( + HydroflowCrate::new(".", localhost.clone()) + .bin("query_4_distributed") + //.perf("~/query_4_perf.dat".into()).profile("profile"), + .profile(profile), + ) + }) + .collect() + }), + lineitem, + orders, + intermediate_aggregation, + ); + + let mut deployment = deployment.into_inner(); + + println!("Deploying"); + deployment.deploy().await.unwrap(); + + println!("Getting stdout"); + let mut second_process_stdout = second_process.stdout().await; + + println!("Starting"); + deployment.start().await.unwrap(); + + println!("Collecting"); + + let expected = vec![ + ("1-URGENT", 10594), + ("2-HIGH", 10476), + ("3-MEDIUM", 10410), + ("4-NOT SPECIFIED", 10556), + ("5-LOW", 10487), + ]; + let expected = format!("{:?}", expected); + println!("Expected: {:?}", expected); + while let Some(res) = second_process_stdout.next().await { + println!("{:?}", res); + + // Check if fixed point reached then break + if res == expected { + break; + } + } +} diff --git a/flow/src/bin/first_ten_distributed.rs b/flow/src/bin/first_ten_distributed.rs index ab70f18..d448b63 100644 --- a/flow/src/bin/first_ten_distributed.rs +++ b/flow/src/bin/first_ten_distributed.rs @@ -1,7 +1,7 @@ #[tokio::main] async fn main() { - hydroflow_plus::util::cli::launch(|ports| { - flow::first_ten_distributed::first_ten_distributed_runtime!(&ports) + hydroflow_plus::util::cli::launch!(|ports| { + flow::first_ten_distributed::first_ten_distributed_runtime!(ports) }) .await; } diff --git a/flow/src/bin/query_4.rs b/flow/src/bin/query_4.rs new file mode 100644 index 0000000..18f1fce --- /dev/null +++ b/flow/src/bin/query_4.rs @@ -0,0 +1,18 @@ +use base::tpch::initialize::initialize_database; + +#[tokio::main] +async fn main() { + //flow::tpch::query_4::query_4_runtime!().run_async().await; + hydroflow_plus::util::cli::launch!(|ports| { + //let orders: Vec = vec![]; + + // Load query data + let scale_factor = 1; + let conn = initialize_database(scale_factor); + let (lineitem, orders) = base::tpch::query_4::load(&conn); + //let orders = Order::load(&conn, Some(1000)); + + flow::tpch::query_4::query_4_runtime!(ports, lineitem, orders) + }) + .await; +} diff --git a/flow/src/bin/query_4_distributed.rs b/flow/src/bin/query_4_distributed.rs new file mode 100644 index 0000000..4996aa5 --- /dev/null +++ b/flow/src/bin/query_4_distributed.rs @@ -0,0 +1,16 @@ +use base::tpch::initialize::initialize_database; + +#[tokio::main] +async fn main() { + + hydroflow_plus::util::cli::launch!(|ports| { + + // Load query data + let scale_factor = 1; + let conn = initialize_database(scale_factor); + let (lineitem, orders) = base::tpch::query_4::load(&conn); + + flow::tpch::query_4_distributed::query_4_distributed_runtime!(ports, lineitem, orders, false) + }) + .await; +} diff --git a/flow/src/first_ten.rs b/flow/src/first_ten.rs index 53160db..1fe52cd 100644 --- a/flow/src/first_ten.rs +++ b/flow/src/first_ten.rs @@ -2,19 +2,19 @@ use hydroflow_plus::*; use stageleft::*; pub fn first_ten<'a, D: LocalDeploy<'a>>( - flow: &'a FlowBuilder<'a, D>, + flow: &FlowBuilder<'a, D>, process_spec: &impl ProcessSpec<'a, D>, ) { let process = flow.process(process_spec); - let numbers = process.source_iter(q!(0..10)); + let numbers = flow.source_iter(&process, q!(0..10)); numbers.for_each(q!(|n| println!("{}", n))); } #[stageleft::entry] pub fn first_ten_runtime<'a>( - flow: &'a FlowBuilder<'a, SingleProcessGraph>, + flow: FlowBuilder<'a, SingleProcessGraph>, ) -> impl Quoted<'a, Hydroflow<'a>> { - first_ten(flow, &()); + first_ten(&flow, &()); flow.extract().optimize_default() } diff --git a/flow/src/first_ten_distributed.rs b/flow/src/first_ten_distributed.rs index 5c84250..0db9f7b 100644 --- a/flow/src/first_ten_distributed.rs +++ b/flow/src/first_ten_distributed.rs @@ -1,14 +1,14 @@ -use hydroflow_plus::{util::cli, *}; +use hydroflow_plus::*; use stageleft::*; pub fn first_ten_distributed<'a, D: Deploy<'a>>( - flow: &'a FlowBuilder<'a, D>, + flow: &FlowBuilder<'a, D>, process_spec: &impl ProcessSpec<'a, D>, ) -> D::Process { let process = flow.process(process_spec); let second_process = flow.process(process_spec); - let numbers = process.source_iter(q!(0..10)); + let numbers = flow.source_iter(&process, q!(0..10)); numbers .send_bincode(&second_process) .for_each(q!(|n| println!("{}", n))); @@ -21,10 +21,10 @@ use hydroflow_plus_cli_integration::{CLIRuntime, HydroflowPlusMeta}; #[stageleft::entry] pub fn first_ten_distributed_runtime<'a>( - flow: &'a FlowBuilder<'a, CLIRuntime>, + flow: FlowBuilder<'a, CLIRuntime>, cli: RuntimeData<&'a HydroCLI>, ) -> impl Quoted<'a, Hydroflow<'a>> { - let _ = first_ten_distributed(flow, &cli); + let _ = first_ten_distributed(&flow, &cli); flow.extract().optimize_default().with_dynamic_id(q!(cli.meta.subgraph_id)) } diff --git a/flow/src/lib.rs b/flow/src/lib.rs index 40627cb..50e4335 100644 --- a/flow/src/lib.rs +++ b/flow/src/lib.rs @@ -1,8 +1,19 @@ stageleft::stageleft_crate!(flow_macro); +#[cfg(stageleft_macro)] +pub(crate) mod first_ten; +#[cfg(not(stageleft_macro))] pub mod first_ten; +#[cfg(stageleft_macro)] +pub(crate) mod first_ten_distributed; +#[cfg(not(stageleft_macro))] pub mod first_ten_distributed; +#[cfg(stageleft_macro)] +pub(crate) mod tpch; +#[cfg(not(stageleft_macro))] +pub mod tpch; + //pub mod kmeans_shallow_hfp; //pub mod multiply_MM_hfp; \ No newline at end of file diff --git a/flow/src/tpch/mod.rs b/flow/src/tpch/mod.rs new file mode 100644 index 0000000..903f22f --- /dev/null +++ b/flow/src/tpch/mod.rs @@ -0,0 +1,2 @@ +pub mod query_4; +pub mod query_4_distributed; \ No newline at end of file diff --git a/flow/src/tpch/query_4.rs b/flow/src/tpch/query_4.rs new file mode 100644 index 0000000..bc5d78c --- /dev/null +++ b/flow/src/tpch/query_4.rs @@ -0,0 +1,109 @@ +use base::tpch::query_4::{LineItem, Order}; +use chrono::NaiveDate; +use hydroflow_plus::*; +use hydroflow_plus::util::cli::HydroCLI; +use hydroflow_plus_cli_integration::HydroflowPlusMeta; +use stageleft::*; + +pub fn query_4<'a, D: LocalDeploy<'a>>( + flow: &FlowBuilder<'a, D>, + process_spec: &impl ProcessSpec<'a, D>, + lineitem: RuntimeData>, + orders: RuntimeData> +) -> D::Process { + let process = flow.process(process_spec); + + // 1. Scan orders + let orders_filtered = flow.source_iter(&process, orders) + //let orders_filtered = process.source_iter(q!(vec![Order{order_key: 0, order_date: NaiveDate::from_ymd_opt(1993, 7, 1).unwrap(), order_priority: "prior".to_string()}])) + // 1.2 Filter orders on o_orderdate >= '1993-07-01' and < '1993-10-01' + .filter(q!(|order: &Order| { + order.order_date >= NaiveDate::from_ymd_opt(1993, 7, 1).unwrap() + && order.order_date < NaiveDate::from_ymd_opt(1993, 10, 1).unwrap() + })); + + // 2. Scan from lineitem. + //let line_items_filtered = process.source_iter(q!(vec![LineItem{order_key: 0, receiptdate: NaiveDate::from_ymd_opt(1993, 7, 1).unwrap(), commit_date: NaiveDate::from_ymd_opt(1993, 10, 1).unwrap()}])) + let line_items_filtered = flow.source_iter(&process, lineitem) + // 2.2 Filter lineitem on l_commitdate < l_receiptdate + .filter(q!(|line_item: &LineItem| line_item.commit_date < line_item.receiptdate)); + + // 3. Join the two. o_orderkey = l_orderkey, payload: o_orderpriority + // TODO: How to define the build side? + // TODO: Multiset join in HF+? + // XXX: Semijoin? + // Build side: Orders? + // Probe side: LineItem? + let line_items_join = line_items_filtered.map(q!(|l|(l.order_key, None::))); + let joined = orders_filtered.map(q!(|e|(e.order_key, e.order_priority))).join(line_items_join); + // XXX: Join only outputs unique values? Where did we take these semantics? + + // 4. Aggregate. + // Hash aggregation in DuckDB + // 4.1 Group by: "o_orderpriority" + // 4.2 Count + // XXX: Why is it legal to return a value when it is not used afterwards? + let agg = joined.map(q!(|x|(x.1.0, 1))).reduce_keyed(q!(|acc, x| *acc = *acc + x)); + + // 5. Print: "o_orderpriority", "order_count" + agg.for_each(q!(|x| println!("{:?}", x))); + + process +} + +#[stageleft::entry] +pub fn query_4_runtime<'a>( + flow: FlowBuilder<'a, SingleProcessGraph>, + _cli: RuntimeData<&'a HydroCLI>, + lineitem: RuntimeData>, + orders: RuntimeData>, +) -> impl Quoted<'a, Hydroflow<'a>> { + query_4(&flow, &(), lineitem, orders); + flow.extract().optimize_default() +} + +#[stageleft::runtime] +#[cfg(test)] +mod tests { + use hydro_deploy::{Deployment, HydroflowCrate}; + use hydroflow_plus::futures::StreamExt; + use hydroflow_plus_cli_integration::{DeployCrateWrapper, DeployProcessSpec}; + + #[tokio::test] + async fn test_query_4() { + let mut deployment = Deployment::new(); + let localhost = deployment.Localhost(); + + let flow = hydroflow_plus::FlowBuilder::new(); + let orders = stageleft::RuntimeData::new(&"FAKE"); + let lineitem = stageleft::RuntimeData::new(&"FAKE"); + let second_process = super::query_4( + &flow, + &DeployProcessSpec::new(|| { + deployment.add_service( + HydroflowCrate::new(".", localhost.clone()) + .bin("query_4") + .profile("dev"), + ) + }), + lineitem, + orders + ); + + println!("Deploying"); + deployment.deploy().await.unwrap(); + + println!("Getting stdout"); + let second_process_stdout = second_process.stdout().await; + + println!("Starting"); + deployment.start().await.unwrap(); + + println!("Collecting"); + let res = second_process_stdout.take(5).collect::>().await; + println!("{:?}", res); + /* assert_eq!( + vec!["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] + ); */ + } +} \ No newline at end of file diff --git a/flow/src/tpch/query_4_distributed.rs b/flow/src/tpch/query_4_distributed.rs new file mode 100644 index 0000000..ca677c0 --- /dev/null +++ b/flow/src/tpch/query_4_distributed.rs @@ -0,0 +1,210 @@ +use std::hash::{Hash, Hasher}; + +use __staged::stream::Windowed; +use serde::{de::DeserializeOwned, Serialize}; +use base::tpch::query_4::{LineItem, Order}; +use chrono::NaiveDate; +use hydroflow_plus::*; +use hydroflow_plus::util::cli::HydroCLI; +use hydroflow_plus_cli_integration::{CLIRuntime, HydroflowPlusMeta}; +use stageleft::*; + +/* Inputs on single machine + */ +fn distributed_join<'a, Key: Hash + Eq + Serialize + DeserializeOwned, T: Serialize + DeserializeOwned, U: Serialize + DeserializeOwned, D: Deploy<'a, ClusterId = u32>>(build: Stream<'a, (Key, T), Windowed, D::Process>, probe: Stream<'a, (Key, U), Windowed, D::Process>, cluster: &D::Cluster) -> Stream<'a, (Key, (T, U)), Windowed, D::Cluster> { + + // Note: Let compiler move the persist after the broadcast when user puts it before the broadcast + let build = build.broadcast_bincode(cluster).all_ticks(); + + let all_ids_vec = cluster.ids(); + + let probe = probe.map(q!(|(key, value)| { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + key.hash(&mut hasher); + let id = (hasher.finish() % all_ids_vec.len() as u64) as u32; + + (id, (key, value)) + + })).send_bincode(cluster); + + let joined = build.join(probe.tick_batch()); + + joined +} + +pub fn query_4_distributed<'a, D: Deploy<'a, ClusterId = u32>>( + flow: &FlowBuilder<'a, D>, + process_spec: &impl ProcessSpec<'a, D>, + cluster_spec: &impl ClusterSpec<'a, D>, + lineitem: RuntimeData>, + orders: RuntimeData>, + intermediate_aggregation: bool, +) -> D::Process { + let process = flow.process(process_spec); + let cluster = flow.cluster(cluster_spec); + + println!("Query 4 distributed with intermediate aggregation: {}", intermediate_aggregation); + + // 1. Scan orders + let orders_filtered = flow.source_iter(&process, orders) + // 1.2 Filter orders on o_orderdate >= '1993-07-01' and < '1993-10-01' + .filter(q!(|order: &Order| { + order.order_date >= NaiveDate::from_ymd_opt(1993, 7, 1).unwrap() + && order.order_date < NaiveDate::from_ymd_opt(1993, 10, 1).unwrap() + })); + + // 2. Scan from lineitem. + let line_items_filtered = flow.source_iter(&process, lineitem) + // 2.2 Filter lineitem on l_commitdate < l_receiptdate + .filter(q!(|line_item: &LineItem| line_item.commit_date < line_item.receiptdate)); + + // 3. Join the two. o_orderkey = l_orderkey, payload: o_orderpriority + // TODO: How to define the build side? + // TODO: Multiset join in HF+? + // XXX: Semijoin? + // Build side: Orders? + // Probe side: LineItem? + let line_items_join = line_items_filtered.map(q!(|l|(l.order_key, None::))); + let orders_join = orders_filtered.map(q!(|e|(e.order_key, e.order_priority))); + + // Distributed join + //let joined = orders_join.join(line_items_join); + let joined = distributed_join::<_, _, _, D>(orders_join, line_items_join, &cluster); + + // 4. Aggregate. + // Hash aggregation in DuckDB + // 4.1 Group by: "o_orderpriority" + // 4.2 Count + let agg = joined.map(q!(|x|(x.1.0, 1))); + + // Example of alternate code generation + // Note: Let compiler replicate reduction before the send when commutative + let agg = if intermediate_aggregation { + // Intermediate aggregation + agg.reduce_keyed(q!(|acc, x| *acc = *acc + x)) + } else { + agg + }; + + // Collect the results and reduce again over all ticks + let agg = agg.send_bincode_interleaved(&process).all_ticks().reduce_keyed(q!(|acc, x| *acc = *acc + x)); + + // 5. Print: "o_orderpriority", "order_count" + // Reduce to single output and then print all at once + agg.fold(q!(|| vec![]), q!(|acc, x| {acc.push(x);})) + .for_each(q!(|x: Vec<(String, i32)>| { + let mut x = x; + x.sort_by(|a, b| a.0.cmp(&b.0)); + println!("{:?}", x); + })); + + process +} + +pub fn query_4_distributed_partitioned<'a, D: Deploy<'a, ClusterId = u32>>( + flow: &FlowBuilder<'a, D>, + process_spec: &impl ProcessSpec<'a, D>, + cluster_spec: &impl ClusterSpec<'a, D>, + lineitem: RuntimeData>, + orders: RuntimeData>, + intermediate_aggregation: bool, +) -> D::Process { + let process = flow.process(process_spec); + let cluster = flow.cluster(cluster_spec); + + println!("Query 4 distributed with intermediate aggregation: {}", intermediate_aggregation); + + let all_ids_vec = cluster.ids(); + + // 1. Scan orders + let orders_filtered = flow.source_iter(&cluster, orders) + // 1.2 Filter orders on o_orderdate >= '1993-07-01' and < '1993-10-01' + .filter(q!(|order: &Order| { + order.order_date >= NaiveDate::from_ymd_opt(1993, 7, 1).unwrap() + && order.order_date < NaiveDate::from_ymd_opt(1993, 10, 1).unwrap() + })); + + // 2. Scan from lineitem. + let line_items_filtered = flow.source_iter(&cluster, lineitem) + // Filter out hash partition of this cluster node + .filter(q!(|lineitem: &LineItem| { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + lineitem.order_key.hash(&mut hasher); + let id = (hasher.finish() % all_ids_vec.len() as u64) as u32; + // TODO: Get ID of this cluster node + let my_id = 32; + //let my_id = cluster.id() as u32; + + id == my_id + })) + // 2.2 Filter lineitem on l_commitdate < l_receiptdate + .filter(q!(|line_item: &LineItem| line_item.commit_date < line_item.receiptdate)); + + // 3. Join the two. o_orderkey = l_orderkey, payload: o_orderpriority + // XXX: Semijoin? + // Build side: Orders? + // Probe side: LineItem? + let line_items_join = line_items_filtered.map(q!(|l|(l.order_key, None::))); + let orders_join = orders_filtered.map(q!(|e|(e.order_key, e.order_priority))); + + // Local join + let joined = orders_join.all_ticks().join(line_items_join); + //let joined = distributed_join::<_, _, _, D>(orders_join, line_items_join, &cluster); + + // 4. Aggregate. + // Hash aggregation in DuckDB + // 4.1 Group by: "o_orderpriority" + // 4.2 Count + let agg = joined.map(q!(|x|(x.1.0, 1))); + + // Example of alternate code generation + // Note: Let compiler replicate reduction before the send when commutative + let agg = if intermediate_aggregation { + // Intermediate aggregation + agg.reduce_keyed(q!(|acc, x| *acc = *acc + x)) + } else { + agg + }; + + // Collect the results and reduce again over all ticks + let agg = agg.send_bincode_interleaved(&process).all_ticks().reduce_keyed(q!(|acc, x| *acc = *acc + x)); + + // 5. Print: "o_orderpriority", "order_count" + // Reduce to single output and then print all at once + agg.fold(q!(|| vec![]), q!(|acc, x| {acc.push(x);})) + .for_each(q!(|x: Vec<(String, i32)>| { + let mut x = x; + x.sort_by(|a, b| a.0.cmp(&b.0)); + println!("{:?}", x); + })); + + process +} + +#[stageleft::entry] +pub fn query_4_distributed_runtime<'a>( + flow: FlowBuilder<'a, CLIRuntime>, + cli: RuntimeData<&'a HydroCLI>, + lineitem: RuntimeData>, + orders: RuntimeData>, + intermediate_aggregation: bool, +) -> impl Quoted<'a, Hydroflow<'a>> { + query_4_distributed(&flow, &cli, &cli, lineitem, orders, intermediate_aggregation); + flow.extract() + .optimize_default() + .with_dynamic_id(q!(cli.meta.subgraph_id)) +} + +#[stageleft::entry] +pub fn query_4_distributed_partitioned_runtime<'a>( + flow: FlowBuilder<'a, CLIRuntime>, + cli: RuntimeData<&'a HydroCLI>, + lineitem: RuntimeData>, + orders: RuntimeData>, + intermediate_aggregation: bool, +) -> impl Quoted<'a, Hydroflow<'a>> { + query_4_distributed_partitioned(&flow, &cli, &cli, lineitem, orders, intermediate_aggregation); + flow.extract() + .optimize_default() + .with_dynamic_id(q!(cli.meta.subgraph_id)) +} \ No newline at end of file diff --git a/flow_macro/Cargo.toml b/flow_macro/Cargo.toml index 49ea9f9..ba1113f 100644 --- a/flow_macro/Cargo.toml +++ b/flow_macro/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [lib] proc-macro = true -path = "src/lib.rs" +path = "../flow/src/lib.rs" [features] default = ["macro"] @@ -17,6 +17,9 @@ hydroflow_plus = { git = "https://github.com/hydro-project/hydroflow.git" } tokio = { version = "1.16", features = [ "full" ] } stageleft = { git = "https://github.com/hydro-project/hydroflow.git" } hydroflow_plus_cli_integration = { git = "https://github.com/hydro-project/hydroflow.git" } +# XXX: Need to add dependencies here as well? +chrono = { version = "0.4.20", features = [ "serde" ], default-features = true } +base = {path = "../base"} [build-dependencies] stageleft_tool = { git = "https://github.com/hydro-project/hydroflow.git" } diff --git a/flow_macro/src/lib.rs b/flow_macro/src/lib.rs deleted file mode 100644 index da4d796..0000000 --- a/flow_macro/src/lib.rs +++ /dev/null @@ -1 +0,0 @@ -stageleft::stageleft_macro_crate!(); diff --git a/hydro_local_benchmarks/Cargo.toml b/hydro_local_benchmarks/Cargo.toml index e0bb965..cc7f4f9 100644 --- a/hydro_local_benchmarks/Cargo.toml +++ b/hydro_local_benchmarks/Cargo.toml @@ -27,4 +27,8 @@ harness = false [[bench]] name = "vectorized_sum" +harness = false + +[[bench]] +name = "tpch" harness = false \ No newline at end of file diff --git a/hydro_local_benchmarks/benches/tpch.rs b/hydro_local_benchmarks/benches/tpch.rs new file mode 100644 index 0000000..6922383 --- /dev/null +++ b/hydro_local_benchmarks/benches/tpch.rs @@ -0,0 +1,111 @@ +use base::tpch::{ + initialize::initialize_database, query_1::load as load_q1, query_19::load as load_q19, + query_4::load as load_q4, +}; +//use base::tpch::query_1::query as query_1_base; +use base::tpch::query_1::query_duckdb as query_1_duckdb; +use base::tpch::query_19::query_duckdb as query_19_duckdb; +use hydroflow_base::tpch::query_1::query as query_1_hf; +use hydroflow_base::tpch::query_1::query_base as query_1_base; +use hydroflow_base::tpch::query_19::query as query_19_hf; +use hydroflow_base::tpch::query_19::query_base as query_19_base; +//use base::tpch::query_4::query as query_4_base; +use base::tpch::query_4::query_duckdb as query_4_duckdb; +use criterion::{criterion_group, criterion_main, Criterion}; +use hydroflow_base::tpch::query_4::query as query_4_hf; +use hydroflow_base::tpch::query_4::query_base as query_4_base; + +/** +* Query 1 is a straight pipeline that is well suited for compiling. +* We expect HF to be faster than DuckDB. +*/ +fn tpch_sf1_query_1(c: &mut Criterion) { + let scale_factor = 1; + let conn = initialize_database(scale_factor); + + c.bench_function("query_1_baseline", |b| { + b.iter_batched( + || load_q1(&conn), + |line_items| query_1_base(line_items), + criterion::BatchSize::SmallInput, + ) + }); + + c.bench_function("query_1_hf", |b| { + b.iter_batched( + || load_q1(&conn), + |line_items| query_1_hf(line_items), + criterion::BatchSize::SmallInput, + ) + }); + + // Set duckdb for benchmarking to single thread + let _ = conn.execute("SET threads = 1;", []); + c.bench_function("query_1_duckdb", |b| b.iter(|| query_1_duckdb(&conn, None))); +} + +/** + * Query 4 is medium complex. All implementations should be on par. + */ +fn tpch_sf1_query_4(c: &mut Criterion) { + let scale_factor = 1; + let conn = initialize_database(scale_factor); + + c.bench_function("query_4_baseline", |b| { + b.iter_batched( + || load_q4(&conn), + |(line_items, orders)| query_4_base(line_items, orders), + criterion::BatchSize::SmallInput, + ) + }); + + c.bench_function("query_4_hf", |b| { + b.iter_batched( + || load_q4(&conn), + |(line_items, orders)| query_4_hf(line_items, orders), + criterion::BatchSize::SmallInput, + ) + }); + + // Set duckdb for benchmarking to single thread + let _ = conn.execute("SET threads = 1;", []); + c.bench_function("query_4_duckdb", |b| b.iter(|| query_4_duckdb(&conn, None))); +} + +/** + * Query 19 is well suited for vectorization and should be faster with DuckDB. + */ +fn tpch_sf1_query_19(c: &mut Criterion) { + let scale_factor = 1; + let conn = initialize_database(scale_factor); + + c.bench_function("query_19_baseline", |b| { + b.iter_batched( + || load_q19(&conn), + |(line_items, part)| query_19_base(line_items, part), + criterion::BatchSize::SmallInput, + ) + }); + + c.bench_function("query_19_hf", |b| { + b.iter_batched( + || load_q19(&conn), + |(line_items, part)| query_19_hf(line_items, part), + criterion::BatchSize::SmallInput, + ) + }); + + // Set duckdb for benchmarking to single thread + let _ = conn.execute("SET threads = 1;", []); + c.bench_function("query_19_duckdb", |b| { + b.iter(|| query_19_duckdb(&conn, None)) + }); +} + +criterion_group!( + benches, + tpch_sf1_query_1, + tpch_sf1_query_4, + tpch_sf1_query_19, +); +criterion_main!(benches); diff --git a/hydroflow_base/src/lib.rs b/hydroflow_base/src/lib.rs index 46f5815..e67938b 100644 --- a/hydroflow_base/src/lib.rs +++ b/hydroflow_base/src/lib.rs @@ -1,3 +1,4 @@ pub mod kmeans_hf; pub mod matrix_vector_multiply; -pub mod vectorized_sum; \ No newline at end of file +pub mod vectorized_sum; +pub mod tpch; \ No newline at end of file diff --git a/hydroflow_base/src/tpch/mod.rs b/hydroflow_base/src/tpch/mod.rs new file mode 100644 index 0000000..46161f9 --- /dev/null +++ b/hydroflow_base/src/tpch/mod.rs @@ -0,0 +1,3 @@ +pub mod query_1; +pub mod query_4; +pub mod query_19; \ No newline at end of file diff --git a/hydroflow_base/src/tpch/query_1.rs b/hydroflow_base/src/tpch/query_1.rs new file mode 100644 index 0000000..fdd07e6 --- /dev/null +++ b/hydroflow_base/src/tpch/query_1.rs @@ -0,0 +1,98 @@ +use chrono::NaiveDate; +use hydroflow::hydroflow_syntax; + +use base::tpch::query_1::{LineItem, LineItem2, LineItemAgg1, LineItemAgg2, query as query_base_original}; + +pub fn query(line_items: Vec) { + + let mut flow = hydroflow_syntax! { + // 1. Scan from lineitem: "l_returnflag", "l_linestatus", "l_quantity", "l_extendedprice", "l_discount", "l_tax", "l_shipdate" + line_items = source_iter(line_items); + + // 2. Filter l_shipdate <= date '1998-12-01' - interval '90' day i.e., l_shipdate <= '1998-09-02'. + // (2.1 Evaluate expression.) + // 2.2. Filter on the expression. Need everything apart from l_shipdate. + line_items_filtered = line_items -> filter(|x| x.l_shipdate <= NaiveDate::from_ymd_opt(1998, 9, 2).unwrap()); + + // 3. Evaluate expressions for the aggregations. + // XXX: Skipping projection of l_tax, could drop that column. + line_items_proj = line_items_filtered -> map(|x| { + // Project the fields. + let x: LineItem2 = x.into(); + // l_extendedprice * (1 - l_discount) AS disc_price, + let disc_price = x.l_extendedprice * (1.0 - x.l_discount); + // l_extendedprice * (1 - l_discount) * (1 + l_tax) AS charge, + let charge = disc_price * (1.0 + x.l_tax); + return (x, disc_price, charge) + }); + + // 4. Group by l_returnflag, l_linestatus & compute aggregates. + agg = line_items_proj -> map(|(x, disc_price, charge)| { + // Group by l_returnflag, l_linestatus + ((x.l_returnflag, x.l_linestatus), (x.l_quantity, x.l_extendedprice, disc_price, charge)) + }) + -> fold_keyed(Default::default, |acc: &mut LineItemAgg1, x| { + // Hash aggregate without average + let (l_quantity, l_extendedprice, disc_price, charge) = x; + acc.sum_qty += l_quantity; + acc.sum_base_price += l_extendedprice; + acc.sum_disc_price += disc_price; + acc.sum_charge += charge; + acc.count_order += 1; + }) + -> map(|(key, value)| { + // Finalize aggregation with average + let (l_returnflag, l_linestatus) = key; + let value: LineItemAgg2 = value.into(); + ((l_returnflag, l_linestatus), value) + }); + + // 5. Sort by l_returnflag, l_linestatus. + // XXX: InkFuse is skipping this step. + ordered = agg -> sort_by_key(|x| &x.0); + + // Attach the sink for printing. + ordered -> for_each(|((l_returnflag, l_linestatus), x): ((char, char), LineItemAgg2)| { + println!("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}", + l_returnflag, + l_linestatus, + x.sum_qty, + x.sum_base_price, + x.sum_disc_price, + x.sum_charge, + x.avg_qty, + x.avg_price, + x.avg_disc, + x.count_order, + ); + }); + }; + + flow.run_available(); +} + +pub fn query_base(line_items: Vec) { + + let mut flow = hydroflow_syntax! { + source_iter([line_items]) -> for_each(|line_items|{ + query_base_original(line_items); + }); + }; + + flow.run_available(); +} + +#[cfg(test)] +mod tests { + use super::*; + use base::tpch::initialize::initialize_database; + + #[test] + fn test_query() { + let conn = initialize_database(1); + + let line_items = LineItem::load(&conn, Some(1000)); + + super::query(line_items); + } +} \ No newline at end of file diff --git a/hydroflow_base/src/tpch/query_19.rs b/hydroflow_base/src/tpch/query_19.rs new file mode 100644 index 0000000..13ca145 --- /dev/null +++ b/hydroflow_base/src/tpch/query_19.rs @@ -0,0 +1,97 @@ +use hydroflow::hydroflow_syntax; + +use base::tpch::query_19::{Part, LineItem, query as query_base_original}; + +pub fn query(line_items: Vec, part: Vec) { + + let mut flow = hydroflow_syntax! { + // 1. Scan part. + part_filtered = source_iter(part) + // 2. Pushed down filter on part. + -> filter(|part| { + Part::filter_1(&part.p_brand, &part.p_size, &part.p_container) + || Part::filter_2(&part.p_brand, &part.p_size, &part.p_container) + || Part::filter_3(&part.p_brand, &part.p_size, &part.p_container) + }); + + // 3. Scan lineitem. + lineitem_filtered = source_iter(line_items) + // 4. Pushed down lineitem filter. + // l_shipinstruct = "DELIVER IN PERSON" + // l_shipmode = "AIR" or "AIR REG" + -> filter(|lineitem| lineitem.l_shipinstruct == "DELIVER IN PERSON" && (lineitem.l_shipmode == "AIR" || lineitem.l_shipmode == "AIR REG")) + -> filter(|lineitem| LineItem::filter_1(&lineitem.l_quantity) || LineItem::filter_2(&lineitem.l_quantity) || LineItem::filter_3(&lineitem.l_quantity)); + + // 5. Join the two + // Build: Part? + // Probe: LineItem? + + // Keys left (p_partkey) + // Payload left (p_brand, p_container, p_size) + part_filtered -> map(|p| (p.p_partkey, (p.p_brand, p.p_container, p.p_size))) -> [0]joined; + /* .fold(HashMap::new(), |mut map, (key, value)| { + map.insert(key, value); + map + }); */ + + // Keys right (l_partkey) + // Payload right (l_quantity, l_discount, l_extendedprice) + lineitem_filtered -> map(|l| (l.l_partkey, (l.l_quantity, l.l_discount, l.l_extendedprice))) -> [1]joined; + + joined = join_multiset() -> map(|(_key, (part_payload, lineitem_payload))| { + let (p_brand, p_container, p_size) = part_payload; + let (l_quantity, l_discount, l_extendedprice) = lineitem_payload; + (p_brand, p_container, p_size, l_quantity, l_discount, l_extendedprice) + }); + + // 6. Filter again, we need to make sure the right tuples survived. + join_filtered = joined -> filter(|(p_brand, p_container, p_size, l_quantity, _l_discount, _l_extendedprice)| { + (Part::filter_1(&p_brand, &p_size, &p_container) && LineItem::filter_1(&l_quantity)) + || (Part::filter_2(&p_brand, &p_size, &p_container) && LineItem::filter_2(&l_quantity)) + || (Part::filter_3(&p_brand, &p_size, &p_container) && LineItem::filter_3(&l_quantity)) + }); + + // 7. Aggregate the result. + // 7.1 Compute (l_extendedprice * (1 - l_discount)) + // 7.2. Aggregate sum + agg = join_filtered -> map(|(_p_brand, _p_container, _p_size, _l_quantity, l_discount, l_extendedprice)| { + l_extendedprice * (1.0 - l_discount) + }) -> reduce(|a, b| *a += b); + + + // 8. Print + agg -> for_each(|x| { + println!("{:?}", x); + }); + }; + + flow.run_available(); +} + +pub fn query_base(line_items: Vec, part: Vec) { + + let mut flow = hydroflow_syntax! { + source_iter([(line_items, part)]) -> for_each(|(line_items, part)|{ + query_base_original(line_items, part); + }); + }; + + flow.run_available(); +} + +#[cfg(test)] +mod tests { + use super::*; + use base::tpch::initialize::initialize_database; + + #[test] + fn test_query() { + let limit = None; + let conn = initialize_database(1); + + let line_items = LineItem::load(&conn, limit); + let part = Part::load(&conn, limit); + + super::query(line_items, part); + } +} \ No newline at end of file diff --git a/hydroflow_base/src/tpch/query_4.rs b/hydroflow_base/src/tpch/query_4.rs new file mode 100644 index 0000000..b92bf95 --- /dev/null +++ b/hydroflow_base/src/tpch/query_4.rs @@ -0,0 +1,84 @@ +use chrono::NaiveDate; +use hydroflow::hydroflow_syntax; + +use base::tpch::query_4::{LineItem, Order, query as query_base_original}; + +pub fn query(line_items: Vec, orders: Vec) { + + let mut flow = hydroflow_syntax! { + // 1. Scan orders + orders_filtered = source_iter(orders) + // 1.2 Filter orders on o_orderdate >= '1993-07-01' and < '1993-10-01' + -> filter(|order| { + order.order_date >= NaiveDate::from_ymd_opt(1993, 7, 1).unwrap() + && order.order_date < NaiveDate::from_ymd_opt(1993, 10, 1).unwrap() + }); + + // 2. Scan from lineitem. + line_items_filtered = source_iter(line_items) + // 2.2 Filter lineitem on l_commitdate < l_receiptdate + -> filter(|line_item| line_item.commit_date < line_item.receiptdate); + + // 3. Join the two. o_orderkey = l_orderkey, payload: o_orderpriority + // XXX: Semijoin? + // Build side: Orders? + // Probe side: LineItem? + orders_filtered -> map(|e|(e.order_key, e.order_priority)) -> [0]joined; + line_items_filtered -> map(|l|(l.order_key, None::)) -> [1]joined; + // Note: Implementing a semijoin using a hash join with unique output. + joined = join() -> map(|x| x.1.0); + + // 4. Aggregate. + // Hash aggregation in DuckDB + // 4.1 Group by: "o_orderpriority" + // 4.2 Count + // XXX: Why is it legal to return a value when it is not used afterwards? + agg = joined -> map(|x: String| (x, 1)) -> reduce_keyed(|acc, x| *acc = *acc + x); + + // 5. Print: "o_orderpriority", "order_count" + agg -> for_each(|x| println!("{:?}", x)); + }; + + flow.run_available(); +} + +pub fn query_base(line_items: Vec, orders: Vec) { + + let mut flow = hydroflow_syntax! { + source_iter([(line_items, orders)]) -> for_each(|(line_items, orders)|{ + query_base_original(line_items, orders); + }); + }; + + flow.run_available(); +} + +#[cfg(test)] +mod tests { + use super::*; + use base::tpch::initialize::initialize_database; + + #[test] + fn test_query() { + let conn = initialize_database(1); + + let limit = Some(1000); + + let line_items = LineItem::load(&conn, limit); + let orders = Order::load(&conn, limit); + + super::query(line_items, orders); + } + + #[test] + fn test_query_base() { + let conn = initialize_database(1); + + let limit = Some(1000); + + let line_items = LineItem::load(&conn, limit); + let orders = Order::load(&conn, limit); + + super::query(line_items, orders); + } +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e87f558 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +duckdb +jupysql +pandas +matplotlib +duckdb-engine \ No newline at end of file