Skip to content

Commit

Permalink
Streaming analytics benchmark initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Tiemo Bang committed May 15, 2024
1 parent 62ea5d3 commit 142d4ae
Show file tree
Hide file tree
Showing 16 changed files with 298 additions and 22 deletions.
38 changes: 19 additions & 19 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion base/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ pub mod kmeans_baseline;
pub mod point;
pub mod matrix_vector_multiply;
pub mod vectorized_sum;
pub mod tpch;
pub mod tpch;
pub mod nexmark;
2 changes: 2 additions & 0 deletions base/src/nexmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Original benchmark: https://datalab.cs.pdx.edu/niagara/pstream/nexmark.pdf
Baseline: https://hpi.de/rabl/publications/Document/puma-rabl/mod0513-grulichA-hm.pdf/981eadf59e5f3a27c40c54228c90b7e0.html?tx_extbibsonomycsl_publicationlist%5Baction%5D=view&tx_extbibsonomycsl_publicationlist%5Bpreview%5D=large&cHash=4caa993397abe8edfc23166e187545d0
8 changes: 8 additions & 0 deletions base/src/nexmark/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
pub mod query_1;
pub mod query_2;
pub mod query_3;
pub mod query_4;
pub mod query_5;
pub mod query_6;
pub mod query_7;
pub mod query_8;
5 changes: 5 additions & 0 deletions base/src/nexmark/query_1.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/*
SELECT itemid, DOLTOEUR(price),
bidderId, bidTime
FROM bid;
*/
9 changes: 9 additions & 0 deletions base/src/nexmark/query_2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/*
SELECT itemid, price
FROM bid
WHERE itemid = 1007 OR
itemid = 1020 OR
itemid = 2001 OR
itemid = 2019 OR
itemid = 1087;
*/
9 changes: 9 additions & 0 deletions base/src/nexmark/query_3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/*
SELECT person.name, person.city,
person.state, open auction.id
FROM open auction, person, item
WHERE open auction.sellerId = person.id
AND person.state = ‘OR’
AND open auction.itemid = item.id
AND item.categoryId = 10;
*/
7 changes: 7 additions & 0 deletions base/src/nexmark/query_4.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
SELECT C.id, AVG(CA.price)
FROM category C, item I, closed auction CA
WHERE C.id = I.categoryId
AND I.id = CA.itemid
GROUP BY C.id;
*/
90 changes: 90 additions & 0 deletions base/src/nexmark/query_5.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/* Original nexmark query
SELECT bid.itemid
FROM bid [RANGE 60 MINUTES PRECEDING]
WHERE (SELECT COUNT(bid.itemid)
FROM bid [PARTITION BY bid.itemid
RANGE 60 MINUTES PRECEDING])
>= ALL (SELECT COUNT(bid.itemid)
FROM bid [PARTITION BY bid.itemid
RANGE 60 MINUTES PRECEDING];
*/

/* Apache Flink query
-- -------------------------------------------------------------------------------------------------
-- Query 5: Hot Items
-- -------------------------------------------------------------------------------------------------
-- Which auctions have seen the most bids in the last period?
-- Illustrates sliding windows and combiners.
--
-- The original Nexmark Query5 calculate the hot items in the last hour (updated every minute).
-- To make things a bit more dynamic and easier to test we use much shorter windows,
-- i.e. in the last 10 seconds and update every 2 seconds.
-- -------------------------------------------------------------------------------------------------
CREATE TABLE nexmark_q5 (
auction BIGINT,
num BIGINT
) WITH (
'connector' = 'blackhole'
);
INSERT INTO nexmark_q5
SELECT AuctionBids.auction, AuctionBids.num
FROM (
SELECT
auction,
count(*) AS num,
window_start AS starttime,
window_end AS endtime
FROM TABLE(
HOP(TABLE bid, DESCRIPTOR(dateTime), INTERVAL '2' SECOND, INTERVAL '10' SECOND))
GROUP BY auction, window_start, window_end
) AS AuctionBids
JOIN (
SELECT
max(CountBids.num) AS maxn,
CountBids.starttime,
CountBids.endtime
FROM (
SELECT
count(*) AS num,
window_start AS starttime,
window_end AS endtime
FROM TABLE(
HOP(TABLE bid, DESCRIPTOR(dateTime), INTERVAL '2' SECOND, INTERVAL '10' SECOND))
GROUP BY auction, window_start, window_end
) AS CountBids
GROUP BY CountBids.starttime, CountBids.endtime
) AS MaxBids
ON AuctionBids.starttime = MaxBids.starttime AND
AuctionBids.endtime = MaxBids.endtime AND
AuctionBids.num >= MaxBids.maxn;
*/

/*
Grizzly parameters: 10s windows every 1s, sum aggregation
*/

use chrono::{Duration, NaiveDateTime};

struct SlidingWindow<T> {
window_size: Duration,
slide_size: Duration,
window: Vec<T>,
window_start: NaiveDateTime,
window_end: NaiveDateTime,
}

impl<T> SlidingWindow::<T> {
fn new(window_size: Duration, slide_size: Duration) -> Self {
SlidingWindow {
window_size,
slide_size,
window: Vec::new(),
window_start: NaiveDateTime::from_timestamp(0, 0),
window_end: NaiveDateTime::from_timestamp(0, 0),
}
}


}
6 changes: 6 additions & 0 deletions base/src/nexmark/query_6.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/*
SELECT AVG(CA.price), CA.sellerId
FROM closed auction CA
[PARTITION BY CA.sellerId
ROWS 10 PRECEDING];
*/
7 changes: 7 additions & 0 deletions base/src/nexmark/query_7.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
SELECT bid.price, bid.itemid
FROM bid where bid.price =
(SELECT MAX(bid.price)
FROM bid [FIXEDRANGE
10 MINUTES PRECEDING]);
*/
6 changes: 6 additions & 0 deletions base/src/nexmark/query_8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/*
SELECT person.id, person.name
FROM person [RANGE 12 HOURS PRECEDING],
open auction [RANGE 12 HOURS PRECEDING]
WHERE person.id = open auction.sellerId;
*/
3 changes: 2 additions & 1 deletion hydroflow_base/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod kmeans_hf;
pub mod matrix_vector_multiply;
pub mod vectorized_sum;
pub mod tpch;
pub mod tpch;
pub mod nexmark;
1 change: 1 addition & 0 deletions hydroflow_base/src/nexmark/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod sliding_window;
Loading

0 comments on commit 142d4ae

Please sign in to comment.