Skip to content

Commit

Permalink
update query perf tests
Browse files Browse the repository at this point in the history
  • Loading branch information
negrel committed Jun 11, 2024
1 parent ab839b5 commit 70d7d36
Show file tree
Hide file tree
Showing 12 changed files with 182 additions and 166 deletions.
2 changes: 1 addition & 1 deletion cmd/addevents/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func ProvideConfig() Config {
}

// Generate extra paths
for i := 0; i < 10_000; i++ {
for i := 0; i < 1000; i++ {
part := 1 + rand.Intn(8)
var path []string
for j := 0; j < part; j++ {
Expand Down
3 changes: 3 additions & 0 deletions tests/perf/clickhouse/query/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ test:
&& for query in ./queries/*.sh; do\
echo "$$query" \
&& bash "$$query" \
&& $(DOCKER) exec ${COMPOSE_PROJECT_NAME}-clickhouse-1 clickhouse-client \
--format Pretty --database "$$PRISME_CLICKHOUSE_DB" \
-q "$$(bash "$$query") LIMIT 10" \
&& $(DOCKER) exec ${COMPOSE_PROJECT_NAME}-clickhouse-1 clickhouse-benchmark \
--database "$$PRISME_CLICKHOUSE_DB" -i 10 -q "$$(bash "$$query")"; \
done
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
interval=43200 # seconds -> 12H
timestamp=$(($(date '+%s') - 7257600)) # 3 months ago
domain="'localhost', 'foo.mywebsite.localhost'"
path="'/', '/foo', '/blog'"
operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrer_domain="'direct', 'twitter.com', 'facebook.com'"
country_code="'FR', 'BG', 'US'"
domains="'localhost', 'foo.mywebsite.localhost'"
locations="'FR', 'BG', 'US'"
paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'"

cat <<EOF
SELECT toStartOfInterval(timestamp, INTERVAL 60 second) AS time, COUNT(*)
FROM events_pageviews
WHERE timestamp >= $timestamp
AND domain IN ($domain)
AND path IN ($path)
AND operating_system IN ($operating_system)
AND browser_family IN ($browser_family)
AND referrer_domain IN ($referrer_domain)
AND country_code IN ($country_code)
SELECT toStartOfInterval(timestamp, INTERVAL ${interval} second) AS time, COUNT(*)
FROM pageviews
WHERE timestamp >= toDateTime(${timestamp})
AND timestamp <= now()
AND domain IN (${domains})
AND path IN (${paths})
AND session_uuid IN (
SELECT session_uuid FROM sessions
WHERE (
(session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now())
OR
(exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now())
)
AND domain IN (${domains})
AND country_code IN (${locations})
GROUP BY session_uuid
)
GROUP BY time
ORDER BY time
EOF
35 changes: 19 additions & 16 deletions tests/perf/clickhouse/query/queries/02_select_sessions_timeserie.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
interval=43200 # seconds -> 12H
timestamp=$(($(date '+%s') - 7257600)) # 3 months ago
domain="'localhost', 'foo.mywebsite.localhost'"
path="'/', '/foo', '/blog'"
operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrer_domain="'direct', 'twitter.com', 'facebook.com'"
country_code="'FR', 'BG', 'US'"
domains="'localhost', 'foo.mywebsite.localhost'"
locations="'FR', 'BG', 'US'"

cat <<EOF
SELECT toStartOfInterval(timestamp, INTERVAL 60 second) AS time, COUNT(*)
FROM events_pageviews
WHERE timestamp >= $timestamp
AND is_entry = true
AND domain IN ($domain)
AND path IN ($path)
AND operating_system IN ($operating_system)
AND browser_family IN ($browser_family)
AND referrer_domain IN ($referrer_domain)
AND country_code IN ($country_code)
WITH exit_timestamps AS (
SELECT
argMax(exit_timestamp, pageviews) AS timestamp,
argMax(visitor_id, pageviews) AS visitor_id
FROM sessions
WHERE (
(session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now())
OR
(exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now())
)
AND domain IN (${domains})
AND country_code IN (${locations})
GROUP BY session_uuid
)
SELECT toStartOfInterval(timestamp, INTERVAL ${interval} second) AS time, COUNT(*)
FROM exit_timestamps
GROUP BY time
ORDER BY time
EOF
37 changes: 19 additions & 18 deletions tests/perf/clickhouse/query/queries/03_select_top_sources.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
interval=43200 # seconds -> 12H
timestamp=$(($(date '+%s') - 7257600)) # 3 months ago
domain="'localhost', 'foo.mywebsite.localhost'"
path="'/', '/foo', '/blog'"
operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrer_domain="'direct', 'twitter.com', 'facebook.com'"
country_code="'FR', 'BG', 'US'"
domains="'localhost', 'foo.mywebsite.localhost'"
operating_systems="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"

cat <<EOF
SELECT referrer_domain, COUNT(*) as count
FROM events_pageviews
WHERE timestamp >= $timestamp
AND is_entry = true
AND domain IN ($domain)
AND path IN ($path)
AND operating_system IN ($operating_system)
AND browser_family IN ($browser_family)
AND referrer_domain IN ($referrer_domain)
AND country_code IN ($country_code)
GROUP BY referrer_domain
ORDER BY count DESC
WITH referrals AS (
SELECT argMax(referrer_domain, pageviews) AS referrer
FROM sessions
WHERE (
(session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now())
OR
(exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now())
)
AND domain IN (${domains})
AND operating_system IN (${operating_systems})
GROUP BY session_uuid
)
SELECT referrer, COUNT(*) AS session_count
FROM referrals
GROUP BY referrer
ORDER BY session_count DESC
EOF
35 changes: 18 additions & 17 deletions tests/perf/clickhouse/query/queries/04_select_top_entry_pages.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
interval=43200 # seconds -> 12H
timestamp=$(($(date '+%s') - 7257600)) # 3 months ago
domain="'localhost', 'foo.mywebsite.localhost'"
path="'/', '/foo', '/blog'"
operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrer_domain="'direct', 'twitter.com', 'facebook.com'"
country_code="'FR', 'BG', 'US'"
domains="'localhost', 'foo.mywebsite.localhost'"
browsers="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"

cat <<EOF
SELECT path, COUNT(*) AS pageviews
FROM events_pageviews
WHERE timestamp >= $timestamp
AND is_entry = true
AND domain IN ($domain)
AND path IN ($path)
AND operating_system IN ($operating_system)
AND browser_family IN ($browser_family)
AND referrer_domain IN ($referrer_domain)
AND country_code IN ($country_code)
WITH entry_pageviews AS (
SELECT argMax(entry_path, pageviews) AS path
FROM sessions
WHERE (
(session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now())
OR
(exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now())
)
AND domain IN (${domains})
AND browser_family IN (${browsers})
GROUP BY session_uuid
)
SELECT path, COUNT(*) AS session_count
FROM entry_pageviews
GROUP BY path
ORDER BY pageviews DESC
ORDER BY session_count DESC
EOF
30 changes: 16 additions & 14 deletions tests/perf/clickhouse/query/queries/05_select_top_pages.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
interval=43200 # seconds -> 12H
timestamp=$(($(date '+%s') - 7257600)) # 3 months ago
domain="'localhost', 'foo.mywebsite.localhost'"
path="'/', '/foo', '/blog'"
operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrer_domain="'direct', 'twitter.com', 'facebook.com'"
country_code="'FR', 'BG', 'US'"
domains="'localhost', 'foo.mywebsite.localhost'"
entry_paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'"

cat <<EOF
SELECT path, COUNT(*) AS pageviews
FROM events_pageviews
WHERE timestamp >= $timestamp
AND domain IN ($domain)
AND path IN ($path)
AND operating_system IN ($operating_system)
AND browser_family IN ($browser_family)
AND referrer_domain IN ($referrer_domain)
AND country_code IN ($country_code)
FROM pageviews
WHERE (timestamp >= toDateTime(${timestamp}) AND timestamp <= now())
AND session_uuid IN (
SELECT argMax(session_uuid, pageviews) FROM sessions
WHERE (
(session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now())
OR
(exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now())
)
AND domain IN (${domains})
AND entry_path IN (${entry_paths})
GROUP BY session_uuid
)
GROUP BY path
ORDER BY pageviews DESC
EOF
42 changes: 26 additions & 16 deletions tests/perf/clickhouse/query/queries/06_select_top_exit_pages.sh
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
interval=43200 # seconds -> 12H
timestamp=$(($(date '+%s') - 7257600)) # 3 months ago
domain="'localhost', 'foo.mywebsite.localhost'"
path="'/', '/foo', '/blog'"
operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrer_domain="'direct', 'twitter.com', 'facebook.com'"
country_code="'FR', 'BG', 'US'"
domains="'localhost', 'foo.mywebsite.localhost'"
entry_paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'"
exit_paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'"
paths="'/blog'"
operating_systems="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browsers="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrals="'direct', 'twitter.com', 'facebook.com'"
locations="'FR', 'BG', 'US'"

cat <<EOF
WITH exit_pageviews AS (
SELECT max(timestamp) timestamp, session_id FROM events_pageviews GROUP BY session_id
SELECT argMax(exit_path, pageviews) AS path
FROM sessions
WHERE (
(session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now())
OR
(exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now())
)
AND domain IN (${domains})
AND entry_path IN (${entry_paths})
AND operating_system IN (${operating_systems})
AND browser_family IN (${browsers})
AND referrer_domain IN (${referrals})
AND country_code IN (${locations})
AND exit_path IN (${exit_paths})
GROUP BY session_uuid
)
SELECT session_id, path, COUNT(*) AS pageviews
FROM events_pageviews
WHERE timestamp >= $timestamp
AND domain IN ($domain)
AND path IN ($path)
AND operating_system IN ($operating_system)
AND browser_family IN ($browser_family)
AND referrer_domain IN ($referrer_domain)
AND country_code IN ($country_code)
SELECT path, COUNT(*) AS pageviews
FROM exit_pageviews
GROUP BY path
ORDER BY pageviews DESC
EOF
35 changes: 18 additions & 17 deletions tests/perf/clickhouse/query/queries/07_select_top_locations.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
interval=43200 # seconds -> 12H
timestamp=$(($(date '+%s') - 7257600)) # 3 months ago
domain="'localhost', 'foo.mywebsite.localhost'"
path="'/', '/foo', '/blog'"
operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrer_domain="'direct', 'twitter.com', 'facebook.com'"
country_code="'FR', 'BG', 'US'"
domains="'localhost', 'foo.mywebsite.localhost'"
referrals="'direct', 'twitter.com', 'facebook.com'"

cat <<EOF
SELECT DISTINCT(name) AS country, COUNT(*) AS pageview
FROM entry_pageviews
JOIN countries ON entry_pageviews.country_code = countries.code
WHERE timestamp >= $timestamp
AND domain IN ($domain)
AND path IN ($path)
AND operating_system IN ($operating_system)
AND browser_family IN ($browser_family)
AND referrer_domain IN ($referrer_domain)
AND country_code IN ($country_code)
WITH sessions_locations AS (
SELECT argMax(country_code, pageviews) AS code
FROM sessions
WHERE (
(session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now())
OR
(exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now())
)
AND referrer_domain IN (${referrals})
GROUP BY session_uuid
)
SELECT name AS country, COUNT(*) AS session_count
FROM sessions_locations
JOIN countries ON sessions_locations.code = countries.code
GROUP BY country
ORDER BY pageview DESC
ORDER BY session_count DESC
EOF
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
interval=43200 # seconds -> 12H
timestamp=$(($(date '+%s') - 7257600)) # 3 months ago
domain="'localhost', 'foo.mywebsite.localhost'"
path="'/', '/foo', '/blog'"
operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrer_domain="'direct', 'twitter.com', 'facebook.com'"
country_code="'FR', 'BG', 'US'"
domains="'localhost', 'foo.mywebsite.localhost'"
entry_paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'"

cat <<EOF
WITH exit_pageviews AS (
SELECT max(timestamp) timestamp, session_id FROM events_pageviews GROUP BY session_id
WITH sessions_duration AS (
SELECT argMax(exit_timestamp, pageviews) - argMax(session_timestamp, pageviews) AS duration
FROM sessions
WHERE (
(session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now())
OR
(exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now())
)
AND domain IN (${domains})
AND entry_path IN (${entry_paths})
AND session_timestamp != exit_timestamp
GROUP BY session_uuid
)
SELECT avg(timestamp - entry_timestamp) AS "Visit duration"
FROM events_pageviews
WHERE timestamp >= $timestamp
AND timestamp IN (SELECT timestamp FROM exit_pageviews WHERE exit_pageviews.session_id = events_pageviews.session_id )
AND domain IN ($domain)
AND path IN ($path)
AND operating_system IN ($operating_system)
AND browser_family IN ($browser_family)
AND referrer_domain IN ($referrer_domain)
AND country_code IN ($country_code)
SELECT avg(duration) AS "Average session duration"
FROM sessions_duration
EOF
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
interval=43200 # seconds -> 12H
timestamp=$(($(date '+%s') - 7257600)) # 3 months ago
domain="'localhost', 'foo.mywebsite.localhost'"
path="'/', '/foo', '/blog'"
operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"
browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'"
referrer_domain="'direct', 'twitter.com', 'facebook.com'"
country_code="'FR', 'BG', 'US'"
domains="'localhost', 'foo.mywebsite.localhost'"
operating_systems="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'"

cat <<EOF
WITH visitor_visits AS (
SELECT visitor_id, COUNT(*) AS visits
FROM pageviews
WHERE timestamp >= $timestamp
AND domain IN ($domain)
AND path IN ($path)
AND operating_system IN ($operating_system)
AND browser_family IN ($browser_family)
AND referrer_domain IN ($referrer_domain)
AND country_code IN ($country_code)
GROUP BY visitor_id
WITH bounces AS (
SELECT argMax(pageviews, pageviews) AS pageviews
FROM sessions
WHERE (
(session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now())
OR
(exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now())
)
AND domain IN (${domains})
AND operating_system IN (${operating_systems})
GROUP BY session_uuid
HAVING pageviews = 1
)
SELECT COUNT(*) FROM visitor_visits WHERE visits = 1
SELECT COUNT(*) AS bounces FROM bounces
EOF
Loading

0 comments on commit 70d7d36

Please sign in to comment.