diff --git a/cmd/addevents/config.go b/cmd/addevents/config.go index a92e2ce..50836cd 100644 --- a/cmd/addevents/config.go +++ b/cmd/addevents/config.go @@ -51,7 +51,7 @@ func ProvideConfig() Config { } // Generate extra paths - for i := 0; i < 10_000; i++ { + for i := 0; i < 1000; i++ { part := 1 + rand.Intn(8) var path []string for j := 0; j < part; j++ { diff --git a/tests/perf/clickhouse/query/Makefile b/tests/perf/clickhouse/query/Makefile index 354dc30..406cd88 100644 --- a/tests/perf/clickhouse/query/Makefile +++ b/tests/perf/clickhouse/query/Makefile @@ -30,6 +30,9 @@ test: && for query in ./queries/*.sh; do\ echo "$$query" \ && bash "$$query" \ + && $(DOCKER) exec ${COMPOSE_PROJECT_NAME}-clickhouse-1 clickhouse-client \ + --format Pretty --database "$$PRISME_CLICKHOUSE_DB" \ + -q "$$(bash "$$query") LIMIT 10" \ && $(DOCKER) exec ${COMPOSE_PROJECT_NAME}-clickhouse-1 clickhouse-benchmark \ --database "$$PRISME_CLICKHOUSE_DB" -i 10 -q "$$(bash "$$query")"; \ done diff --git a/tests/perf/clickhouse/query/queries/01_select_pageviews_timeserie.sh b/tests/perf/clickhouse/query/queries/01_select_pageviews_timeserie.sh index 7f49ff0..a402bdb 100644 --- a/tests/perf/clickhouse/query/queries/01_select_pageviews_timeserie.sh +++ b/tests/perf/clickhouse/query/queries/01_select_pageviews_timeserie.sh @@ -1,21 +1,27 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +locations="'FR', 'BG', 'US'" +paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'" cat <= $timestamp - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) +SELECT toStartOfInterval(timestamp, INTERVAL ${interval} second) AS time, COUNT(*) +FROM pageviews +WHERE timestamp >= toDateTime(${timestamp}) +AND timestamp <= now() +AND domain IN (${domains}) +AND path IN (${paths}) +AND session_uuid IN ( + SELECT session_uuid FROM sessions + WHERE ( + (session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now()) + OR + (exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now()) + ) + AND domain IN (${domains}) + AND country_code IN (${locations}) + GROUP BY session_uuid +) GROUP BY time ORDER BY time EOF diff --git a/tests/perf/clickhouse/query/queries/02_select_sessions_timeserie.sh b/tests/perf/clickhouse/query/queries/02_select_sessions_timeserie.sh index f2134da..a23de11 100644 --- a/tests/perf/clickhouse/query/queries/02_select_sessions_timeserie.sh +++ b/tests/perf/clickhouse/query/queries/02_select_sessions_timeserie.sh @@ -1,22 +1,25 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +locations="'FR', 'BG', 'US'" cat <= $timestamp - AND is_entry = true - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) +WITH exit_timestamps AS ( + SELECT + argMax(exit_timestamp, pageviews) AS timestamp, + argMax(visitor_id, pageviews) AS visitor_id + FROM sessions + WHERE ( + (session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now()) + OR + (exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now()) + ) + AND domain IN (${domains}) + AND country_code IN (${locations}) + GROUP BY session_uuid +) +SELECT toStartOfInterval(timestamp, INTERVAL ${interval} second) AS time, COUNT(*) +FROM exit_timestamps GROUP BY time ORDER BY time EOF diff --git a/tests/perf/clickhouse/query/queries/03_select_top_sources.sh b/tests/perf/clickhouse/query/queries/03_select_top_sources.sh index 502c139..58b7b6c 100644 --- a/tests/perf/clickhouse/query/queries/03_select_top_sources.sh +++ b/tests/perf/clickhouse/query/queries/03_select_top_sources.sh @@ -1,22 +1,23 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +operating_systems="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" cat <= $timestamp - AND is_entry = true - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) -GROUP BY referrer_domain -ORDER BY count DESC +WITH referrals AS ( + SELECT argMax(referrer_domain, pageviews) AS referrer + FROM sessions + WHERE ( + (session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now()) + OR + (exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now()) + ) + AND domain IN (${domains}) + AND operating_system IN (${operating_systems}) + GROUP BY session_uuid +) +SELECT referrer, COUNT(*) AS session_count +FROM referrals +GROUP BY referrer +ORDER BY session_count DESC EOF diff --git a/tests/perf/clickhouse/query/queries/04_select_top_entry_pages.sh b/tests/perf/clickhouse/query/queries/04_select_top_entry_pages.sh index 1c246b5..b3b951f 100644 --- a/tests/perf/clickhouse/query/queries/04_select_top_entry_pages.sh +++ b/tests/perf/clickhouse/query/queries/04_select_top_entry_pages.sh @@ -1,22 +1,23 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +browsers="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" cat <= $timestamp - AND is_entry = true - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) +WITH entry_pageviews AS ( + SELECT argMax(entry_path, pageviews) AS path + FROM sessions + WHERE ( + (session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now()) + OR + (exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now()) + ) + AND domain IN (${domains}) + AND browser_family IN (${browsers}) + GROUP BY session_uuid +) +SELECT path, COUNT(*) AS session_count +FROM entry_pageviews GROUP BY path -ORDER BY pageviews DESC +ORDER BY session_count DESC EOF diff --git a/tests/perf/clickhouse/query/queries/05_select_top_pages.sh b/tests/perf/clickhouse/query/queries/05_select_top_pages.sh index 949d2c3..b1f8745 100644 --- a/tests/perf/clickhouse/query/queries/05_select_top_pages.sh +++ b/tests/perf/clickhouse/query/queries/05_select_top_pages.sh @@ -1,21 +1,23 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +entry_paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'" cat <= $timestamp - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) +FROM pageviews +WHERE (timestamp >= toDateTime(${timestamp}) AND timestamp <= now()) +AND session_uuid IN ( + SELECT argMax(session_uuid, pageviews) FROM sessions + WHERE ( + (session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now()) + OR + (exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now()) + ) + AND domain IN (${domains}) + AND entry_path IN (${entry_paths}) + GROUP BY session_uuid +) GROUP BY path ORDER BY pageviews DESC EOF diff --git a/tests/perf/clickhouse/query/queries/06_select_top_exit_pages.sh b/tests/perf/clickhouse/query/queries/06_select_top_exit_pages.sh index 9138fc8..ea13c0b 100644 --- a/tests/perf/clickhouse/query/queries/06_select_top_exit_pages.sh +++ b/tests/perf/clickhouse/query/queries/06_select_top_exit_pages.sh @@ -1,24 +1,34 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +entry_paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'" +exit_paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'" +paths="'/blog'" +operating_systems="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" +browsers="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" +referrals="'direct', 'twitter.com', 'facebook.com'" +locations="'FR', 'BG', 'US'" cat <= toDateTime(${timestamp}) AND session_timestamp <= now()) + OR + (exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now()) + ) + AND domain IN (${domains}) + AND entry_path IN (${entry_paths}) + AND operating_system IN (${operating_systems}) + AND browser_family IN (${browsers}) + AND referrer_domain IN (${referrals}) + AND country_code IN (${locations}) + AND exit_path IN (${exit_paths}) + GROUP BY session_uuid ) -SELECT session_id, path, COUNT(*) AS pageviews -FROM events_pageviews -WHERE timestamp >= $timestamp - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) +SELECT path, COUNT(*) AS pageviews +FROM exit_pageviews GROUP BY path ORDER BY pageviews DESC EOF diff --git a/tests/perf/clickhouse/query/queries/07_select_top_locations.sh b/tests/perf/clickhouse/query/queries/07_select_top_locations.sh index 53a7c04..53cfae6 100644 --- a/tests/perf/clickhouse/query/queries/07_select_top_locations.sh +++ b/tests/perf/clickhouse/query/queries/07_select_top_locations.sh @@ -1,22 +1,23 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +referrals="'direct', 'twitter.com', 'facebook.com'" cat <= $timestamp - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) +WITH sessions_locations AS ( + SELECT argMax(country_code, pageviews) AS code + FROM sessions + WHERE ( + (session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now()) + OR + (exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now()) + ) + AND referrer_domain IN (${referrals}) + GROUP BY session_uuid +) +SELECT name AS country, COUNT(*) AS session_count +FROM sessions_locations +JOIN countries ON sessions_locations.code = countries.code GROUP BY country -ORDER BY pageview DESC +ORDER BY session_count DESC EOF diff --git a/tests/perf/clickhouse/query/queries/08_select_avg_session_duration.sh b/tests/perf/clickhouse/query/queries/08_select_avg_session_duration.sh index e290d20..44ddb53 100644 --- a/tests/perf/clickhouse/query/queries/08_select_avg_session_duration.sh +++ b/tests/perf/clickhouse/query/queries/08_select_avg_session_duration.sh @@ -1,24 +1,22 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +entry_paths="'/foo', '/foo/bar', '/blog', '/blog/misc/a-nice-post'" cat <= toDateTime(${timestamp}) AND session_timestamp <= now()) + OR + (exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now()) + ) + AND domain IN (${domains}) + AND entry_path IN (${entry_paths}) + AND session_timestamp != exit_timestamp + GROUP BY session_uuid ) - -SELECT avg(timestamp - entry_timestamp) AS "Visit duration" -FROM events_pageviews -WHERE timestamp >= $timestamp - AND timestamp IN (SELECT timestamp FROM exit_pageviews WHERE exit_pageviews.session_id = events_pageviews.session_id ) - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) +SELECT avg(duration) AS "Average session duration" +FROM sessions_duration EOF diff --git a/tests/perf/clickhouse/query/queries/09_select_single_pageview_sessions.sh b/tests/perf/clickhouse/query/queries/09_select_single_pageview_sessions.sh index 76de3c2..dc0ed11 100644 --- a/tests/perf/clickhouse/query/queries/09_select_single_pageview_sessions.sh +++ b/tests/perf/clickhouse/query/queries/09_select_single_pageview_sessions.sh @@ -1,23 +1,21 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +operating_systems="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" cat <= $timestamp - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) - GROUP BY visitor_id +WITH bounces AS ( + SELECT argMax(pageviews, pageviews) AS pageviews + FROM sessions + WHERE ( + (session_timestamp >= toDateTime(${timestamp}) AND session_timestamp <= now()) + OR + (exit_timestamp >= toDateTime(${timestamp}) AND exit_timestamp <= now()) + ) + AND domain IN (${domains}) + AND operating_system IN (${operating_systems}) + GROUP BY session_uuid + HAVING pageviews = 1 ) -SELECT COUNT(*) FROM visitor_visits WHERE visits = 1 +SELECT COUNT(*) AS bounces FROM bounces EOF diff --git a/tests/perf/clickhouse/query/queries/10_select_visitors_count.sh b/tests/perf/clickhouse/query/queries/10_select_visitors_count.sh index bd11765..30d47bc 100644 --- a/tests/perf/clickhouse/query/queries/10_select_visitors_count.sh +++ b/tests/perf/clickhouse/query/queries/10_select_visitors_count.sh @@ -1,19 +1,12 @@ +interval=43200 # seconds -> 12H timestamp=$(($(date '+%s') - 7257600)) # 3 months ago -domain="'localhost', 'foo.mywebsite.localhost'" -path="'/', '/foo', '/blog'" -operating_system="'Windows', 'Linux', 'Mac OS X', 'iOS', 'Android'" -browser_family="'Firefox', 'Chrome', 'Edge', 'Opera', 'Safari'" -referrer_domain="'direct', 'twitter.com', 'facebook.com'" -country_code="'FR', 'BG', 'US'" +domains="'localhost', 'foo.mywebsite.localhost'" +referrals="'direct', 'twitter.com', 'facebook.com'" cat <= $timestamp - AND domain IN ($domain) - AND path IN ($path) - AND operating_system IN ($operating_system) - AND browser_family IN ($browser_family) - AND referrer_domain IN ($referrer_domain) - AND country_code IN ($country_code) +SELECT COUNT(DISTINCT(visitor_id)) AS "Live visitors" +FROM sessions +WHERE addMinutes(exit_timestamp, 15) > now() +AND domain IN (${domains}) +AND referrer_domain IN (${referrals}) EOF