From 823249772587a7dc4d96b7932a3894a254d0067a Mon Sep 17 00:00:00 2001 From: clee2000 <44682903+clee2000@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:26:47 -0800 Subject: [PATCH] [CH] Remove more unused queries (#5940) Follow ups to https://github.com/pytorch/test-infra/pull/5824/files https://github.com/pytorch/test-infra/pull/5882/files Also somehow missed some in https://github.com/pytorch/test-infra/pull/5871/files? --- .../ci_wait_time/params.json | 5 - .../clickhouse_queries/ci_wait_time/query.sql | 75 --------- .../completed_pr_jobs_aggregate/params.json | 4 - .../completed_pr_jobs_aggregate/query.sql | 114 ------------- .../queue_times_historical_pct/query.sql | 1 - .../params.json | 6 - .../test_time_and_price_per_oncall/query.sql | 159 ------------------ .../test_time_per_oncall/params.json | 6 - .../test_time_per_oncall/query.sql | 61 ------- .../test_times_per_workflow_type/params.json | 4 - .../test_times_per_workflow_type/query.sql | 33 ---- .../commons/__sql/test_insights_overview.sql | 57 ------- .../__sql/test_time_and_price_per_oncall.sql | 158 ----------------- .../commons/__sql/test_time_per_oncall.sql | 60 ------- .../__sql/test_times_per_workflow_type.sql | 32 ---- ...test_time_and_price_per_oncall.lambda.json | 26 --- .../commons/test_time_per_oncall.lambda.json | 26 --- .../test_times_per_workflow_type.lambda.json | 16 -- torchci/rockset/prodVersions.json | 3 - 19 files changed, 846 deletions(-) delete mode 100644 torchci/clickhouse_queries/ci_wait_time/params.json delete mode 100644 torchci/clickhouse_queries/ci_wait_time/query.sql delete mode 100644 torchci/clickhouse_queries/completed_pr_jobs_aggregate/params.json delete mode 100644 torchci/clickhouse_queries/completed_pr_jobs_aggregate/query.sql delete mode 100644 torchci/clickhouse_queries/test_time_and_price_per_oncall/params.json delete mode 100644 torchci/clickhouse_queries/test_time_and_price_per_oncall/query.sql delete mode 100644 torchci/clickhouse_queries/test_time_per_oncall/params.json delete mode 100644 torchci/clickhouse_queries/test_time_per_oncall/query.sql delete mode 100644 torchci/clickhouse_queries/test_times_per_workflow_type/params.json delete mode 100644 torchci/clickhouse_queries/test_times_per_workflow_type/query.sql delete mode 100644 torchci/rockset/commons/__sql/test_insights_overview.sql delete mode 100644 torchci/rockset/commons/__sql/test_time_and_price_per_oncall.sql delete mode 100644 torchci/rockset/commons/__sql/test_time_per_oncall.sql delete mode 100644 torchci/rockset/commons/__sql/test_times_per_workflow_type.sql delete mode 100644 torchci/rockset/commons/test_time_and_price_per_oncall.lambda.json delete mode 100644 torchci/rockset/commons/test_time_per_oncall.lambda.json delete mode 100644 torchci/rockset/commons/test_times_per_workflow_type.lambda.json diff --git a/torchci/clickhouse_queries/ci_wait_time/params.json b/torchci/clickhouse_queries/ci_wait_time/params.json deleted file mode 100644 index 8a20197c2b..0000000000 --- a/torchci/clickhouse_queries/ci_wait_time/params.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "granularity": "String", - "startTime": "DateTime64(3)", - "stopTime": "DateTime64(3)" -} \ No newline at end of file diff --git a/torchci/clickhouse_queries/ci_wait_time/query.sql b/torchci/clickhouse_queries/ci_wait_time/query.sql deleted file mode 100644 index 5049ae113f..0000000000 --- a/torchci/clickhouse_queries/ci_wait_time/query.sql +++ /dev/null @@ -1,75 +0,0 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted -WITH percentiles_desired AS ( - -- All the percentiles that we want the query to return - SELECT 'p25' as percentile, 0.25 as percentile_num - UNION ALL - SELECT 'p50', 0.50 - UNION ALL - SELECT 'p75', 0.75 - UNION ALL - SELECT 'p90', 0.90 -), --- Set the bucket to the desired granularity -granular_pr_stats as ( - SELECT - DATE_TRUNC(:granularity, end_time) AS bucket, - * - FROM metrics.pr_stats -), --- Within each bucket, figure out what percentile duration and num_commits each PR falls under -percentiles as ( - SELECT - pr_number, - bucket, - duration_mins, - PERCENT_RANK() OVER( - PARTITION BY bucket - ORDER by duration_mins - ) as duration_percentile, - num_commits, - PERCENT_RANK() OVER( - PARTITION BY bucket - ORDER by num_commits - ) as num_commits_percentile - FROM - granular_pr_stats - WHERE 1 = 1 - AND PARSE_DATETIME_ISO8601(:startTime) <= bucket - AND DATE(PARSE_DATETIME_ISO8601(:stopTime)) >= bucket -), --- For each bucket, get just the durations corresponding to the desired percentiles -duration_percentile as ( - SELECT - p.bucket, - pd.percentile, - MIN(p.duration_mins) as duration_mins - FROM percentiles p - CROSS JOIN percentiles_desired pd - WHERE p.duration_percentile >= pd.percentile_num - GROUP BY - p.bucket, pd.percentile -), --- For each bucket, get just the number of commits corresponding to the desired percentiles -num_commits_percentile as ( - SELECT - p.bucket, - pd.percentile, - MIN(p.num_commits) as num_commits - FROM percentiles p - CROSS JOIN percentiles_desired pd - WHERE p.num_commits_percentile >= pd.percentile_num - GROUP BY - p.bucket, pd.percentile -) --- Keep the percentiles on the same row so that this one query can give the results for both types of data -SELECT - FORMAT_TIMESTAMP('%Y-%m-%d', d.bucket) as bucket, - d.percentile, - d.duration_mins, - c.num_commits -FROM - duration_percentile d - INNER JOIN num_commits_percentile c on d.bucket = c.bucket and d.percentile = c.percentile -WHERE - d.bucket < (SELECT max(bucket) from granular_pr_stats) -- discard the latest bucket, which will have noisy, partial data -ORDER BY bucket DESC, duration_mins diff --git a/torchci/clickhouse_queries/completed_pr_jobs_aggregate/params.json b/torchci/clickhouse_queries/completed_pr_jobs_aggregate/params.json deleted file mode 100644 index 28cd0df237..0000000000 --- a/torchci/clickhouse_queries/completed_pr_jobs_aggregate/params.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "from_days_ago": "Int64", - "to_days_ago": "Int64" -} \ No newline at end of file diff --git a/torchci/clickhouse_queries/completed_pr_jobs_aggregate/query.sql b/torchci/clickhouse_queries/completed_pr_jobs_aggregate/query.sql deleted file mode 100644 index f89d949e2a..0000000000 --- a/torchci/clickhouse_queries/completed_pr_jobs_aggregate/query.sql +++ /dev/null @@ -1,114 +0,0 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted --- This query is used to generate the CI Wait Time KPI for the pytorch/pytorch repo --- It's not the full kpi. Rather, this performs some early data processing and aggregation, which --- is then used by a python script to generate the final KPI, which gets uploaded back to rockset --- to be generally queryable -WITH - -- Get all PRs that were merged into master, and get all the SHAs for commits from that PR which CI jobs ran against - -- We need the shas because some jobs (like trunk) don't have a PR they explicitly ran against, but they _were_ run against - -- a commit from a PR - pr_shas AS ( - SELECT - r.pull_requests[1].number AS pr_number, - CONCAT( - 'https://github.com/pytorch/pytorch/pull/', - r.pull_requests[1].number - ) AS url, - j.head_sha AS sha, - FROM - commons.workflow_job j - INNER JOIN commons.workflow_run r on j.run_id = r.id - WHERE - 1 = 1 - AND j._event_time > ( - CURRENT_DATETIME() - DAYS(:from_days_ago) - ) - AND r._event_time > ( - CURRENT_DATETIME() - DAYS(:from_days_ago) - ) - AND j._event_time < (CURRENT_DATETIME() - DAYS(:to_days_ago)) - AND r._event_time < (CURRENT_DATETIME() - DAYS(:to_days_ago)) - AND LENGTH(r.pull_requests) = 1 - AND r.head_branch NOT IN ('master', 'main', 'nightly', 'viable/strict') - AND r.pull_requests[1].head.repo.name = 'pytorch' - AND r.name IN ('pull', 'trunk', 'Lint') - -- Ensure we don't pull in random PRs we don't care about - AND ( - r.pull_requests[1].base.ref = 'master' - OR r.pull_requests[1].base.ref = 'main' - OR r.pull_requests[1].base.ref like 'gh/%/base' - ) - GROUP BY - pr_number, - url, - sha - ), - -- Now filter the list to just closed PRs - merged_pr_shas AS ( - SELECT - DISTINCT s.pr_number, - s.url, - s.sha - FROM - pr_shas s - INNER JOIN commons.pull_request pr on s.pr_number = pr.number - WHERE - pr.closed_at IS NOT NULL - -- Ensure the PR was actaully merged - AND 'Merged' IN ( - SELECT - name - FROM - UNNEST(pr.labels) - ) - ), - -- Get all the workflows and partially aggregate the jobs run against each commit (based on the job's conclusion) - commit_job_durations AS ( - SELECT - s.pr_number, - r.name AS workflow_name, - s.sha, - j.conclusion AS conclusion, - j.conclusion = 'cancelled' AS was_cancelled, -- For convenience - j.run_attempt, -- the attemp # this job was run on - r.run_attempt AS total_attempts, - r.id AS workflow_run_id, - min(r.run_started_at) AS start_time, - max(PARSE_TIMESTAMP_ISO8601(j.completed_at)) AS end_time, - DATE_DIFF( - 'MINUTE', - min(j._event_time), - max(PARSE_TIMESTAMP_ISO8601(j.completed_at)) - ) AS duration_mins, - r.html_url AS workflow_url, -- for debugging - s.url, -- for debugging - FROM - commons.workflow_job j - INNER JOIN merged_pr_shas s on j.head_sha = s.sha - INNER JOIN commons.workflow_run r on j.run_id = r.id - WHERE - 1 = 1 - AND ( - r.name IN ('pull', 'trunk', 'Lint') - OR r.name like 'linux-binary%' - OR r.name like 'windows-binary%' - ) - -- skipped jobs are irrelevant to us - AND j.conclusion NOT IN ('skipped') - GROUP BY - pr_number, - workflow_name, - url, - sha, - run_attempt, - total_attempts, - conclusion, - was_cancelled, - workflow_run_id, - workflow_url - ) -SELECT - * -FROM - commit_job_durations -ORDER BY pr_number DESC \ No newline at end of file diff --git a/torchci/clickhouse_queries/queue_times_historical_pct/query.sql b/torchci/clickhouse_queries/queue_times_historical_pct/query.sql index 99bfbf7743..33952ae30f 100644 --- a/torchci/clickhouse_queries/queue_times_historical_pct/query.sql +++ b/torchci/clickhouse_queries/queue_times_historical_pct/query.sql @@ -1,4 +1,3 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted SELECT toStartOfHour( toDateTime(q.time, {timezone: String}) diff --git a/torchci/clickhouse_queries/test_time_and_price_per_oncall/params.json b/torchci/clickhouse_queries/test_time_and_price_per_oncall/params.json deleted file mode 100644 index 8e9ccd8f75..0000000000 --- a/torchci/clickhouse_queries/test_time_and_price_per_oncall/params.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "endDate": "String", - "oncall": "String", - "startDate": "String", - "workflow_type": "String" -} \ No newline at end of file diff --git a/torchci/clickhouse_queries/test_time_and_price_per_oncall/query.sql b/torchci/clickhouse_queries/test_time_and_price_per_oncall/query.sql deleted file mode 100644 index 0bf117e04c..0000000000 --- a/torchci/clickhouse_queries/test_time_and_price_per_oncall/query.sql +++ /dev/null @@ -1,159 +0,0 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted -With - workflow_summed_table AS ( - SELECT - workflow_id, - -- sum by job name to get total over all shards - SUM(sum_duration_in_second) as sum_duration_in_second, - oncalls, - date, - workflow_name, - test_class, - test_file, - config_job_name, - config_shard_name, - FROM - metrics.aggregated_test_metrics_with_preproc - WHERE - DATE_TRUNC('DAY', date) >= DATE_TRUNC('DAY', PARSE_DATETIME_ISO8601(: startDate)) - AND DATE_TRUNC('DAY', date) < DATE_TRUNC('DAY', PARSE_DATETIME_ISO8601(: endDate)) - AND workflow_name LIKE: workflow_type - GROUP BY - workflow_id, - workflow_name, - test_class, - test_file, - date, - oncalls, - config_job_name, - config_shard_name - ), - filtered_table AS ( - SELECT - AVG(sum_duration_in_second) as avg_duration_in_second, - COUNT(DISTINCT(workflow_id)) as workflow_occurences, - oncalls, - date, - workflow_name, - test_class, - test_file, - config_job_name, - config_shard_name, - FROM - workflow_summed_table - GROUP BY - workflow_name, - test_class, - test_file, - date, - oncalls, - config_job_name, - config_shard_name - ), - filtered_with_costs as ( - SELECT - t.avg_duration_in_second as avg_duration_in_second, - t.oncalls as oncalls, - t.date as date, - t.workflow_name as workflow_name, - t.test_class as test_class, - t.test_file as test_file, - t.config_job_name as config_job_name, - t.config_shard_name as config_shard_name, - t.workflow_occurences, - CASE - WHEN p.price IS NULL THEN 0 - ELSE CAST(p.price AS float) - END as price_per_hour - FROM - filtered_table t - LEFT JOIN commons.price_per_config p ON ( - t.config_job_name = p.job_name - AND t.config_shard_name = p.shard_name - ) - ), - total_table as ( - SELECT - date, - workflow_name, - SUM(avg_duration_in_second) as total_duration_per_workflow_per_run, - SUM(price_per_hour * avg_duration_in_second / 60 / 60) as total_price_per_workflow_per_run - FROM - filtered_with_costs - GROUP BY - date, - workflow_name - ), - filtered_with_oncalls as ( - SELECT - * - FROM - ( - filtered_with_costs - CROSS JOIN UNNEST(oncalls AS oncall) - ) - WHERE - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') LIKE: oncall - ), - filtered_with_oncalls_and_totals as ( - SELECT - avg_duration_in_second, - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') as oncall, - filtered_with_oncalls.date as date, - filtered_with_oncalls.workflow_name as workflow_name, - test_class, - test_file, - total_duration_per_workflow_per_run, - total_price_per_workflow_per_run, - workflow_occurences, - price_per_hour, - FROM - filtered_with_oncalls - INNER JOIN total_table ON filtered_with_oncalls.date = total_table.date - AND filtered_with_oncalls.workflow_name = total_table.workflow_name - WHERE - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') LIKE: oncall - ), - t as ( - SELECT - oncall, - workflow_name as workflow_type, - SUM(avg_duration_in_second) as time_in_seconds, - TRUNC( - SUM(price_per_hour * avg_duration_in_second / 60 / 60), - 2 - ) as estimated_price_per_run_in_dollars, - TRUNC( - SUM( - price_per_hour * avg_duration_in_second * workflow_occurences / 60 / 60 - ), - 2 - ) as estimated_price_per_day_in_dollars, - CAST(date as STRING) as granularity_bucket, - TRUNC( - SUM(avg_duration_in_second) / ARBITRARY( - -- add noise to avoid divide by 0 - total_duration_per_workflow_per_run + 0.000001 - ) * 100, - 2 - ) as percentage_of_time, - TRUNC( - SUM(price_per_hour * avg_duration_in_second / 60 / 60) / ARBITRARY( - -- add noise to avoid divide by 0 - total_price_per_workflow_per_run + 0.000001 - ) * 100, - 2 - ) as percentage_of_cost - FROM - filtered_with_oncalls_and_totals as t - GROUP BY - t.oncall, - t.date, - t.workflow_name - ) -SELECT - *, -FROM - t -ORDER BY - time_in_seconds DESC diff --git a/torchci/clickhouse_queries/test_time_per_oncall/params.json b/torchci/clickhouse_queries/test_time_per_oncall/params.json deleted file mode 100644 index 8e9ccd8f75..0000000000 --- a/torchci/clickhouse_queries/test_time_per_oncall/params.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "endDate": "String", - "oncall": "String", - "startDate": "String", - "workflow_type": "String" -} \ No newline at end of file diff --git a/torchci/clickhouse_queries/test_time_per_oncall/query.sql b/torchci/clickhouse_queries/test_time_per_oncall/query.sql deleted file mode 100644 index cb2205af72..0000000000 --- a/torchci/clickhouse_queries/test_time_per_oncall/query.sql +++ /dev/null @@ -1,61 +0,0 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted -With - workflow_summed_table AS ( - SELECT - workflow_id, - -- sum by job name to get total over all shards - SUM(sum_duration_in_second) as sum_duration_in_second, - oncalls, - date, - workflow_name - FROM - metrics.aggregated_test_metrics_with_preproc - WHERE - DATE_TRUNC('DAY', date) >= DATE_TRUNC('DAY', PARSE_DATETIME_ISO8601(: startDate)) - AND DATE_TRUNC('DAY', date) < DATE_TRUNC('DAY', PARSE_DATETIME_ISO8601(: endDate)) - AND workflow_name LIKE: workflow_type - GROUP BY - workflow_id, - workflow_name, - date, - oncalls - ), - filtered_table AS ( - SELECT - AVG(sum_duration_in_second) as avg_duration_in_second, - oncalls, - date, - workflow_name, - FROM - workflow_summed_table - GROUP BY - workflow_name, - date, - oncalls - ), - filtered_with_oncalls as ( - SELECT - * - FROM - ( - filtered_table - CROSS JOIN UNNEST(oncalls AS oncall) - ) - WHERE - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') LIKE: oncall - ), - t as ( - SELECT - avg_duration_in_second as time_in_seconds, - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') as oncall, - CAST(filtered_with_oncalls.date AS STRING) as granularity_bucket, - filtered_with_oncalls.workflow_name as workflow_name, - FROM - filtered_with_oncalls - ) -SELECT - *, -FROM - t -ORDER BY - time_in_seconds DESC diff --git a/torchci/clickhouse_queries/test_times_per_workflow_type/params.json b/torchci/clickhouse_queries/test_times_per_workflow_type/params.json deleted file mode 100644 index 16e9729e37..0000000000 --- a/torchci/clickhouse_queries/test_times_per_workflow_type/params.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "startDate": "String", - "workflow_type": "String" -} \ No newline at end of file diff --git a/torchci/clickhouse_queries/test_times_per_workflow_type/query.sql b/torchci/clickhouse_queries/test_times_per_workflow_type/query.sql deleted file mode 100644 index fee2de7221..0000000000 --- a/torchci/clickhouse_queries/test_times_per_workflow_type/query.sql +++ /dev/null @@ -1,33 +0,0 @@ --- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted -WITH sum_table AS (SELECT - t.workflow_name as workflow_type, - SUM( - t.sum_duration_in_second - ) as time_in_seconds, - t.date AS granularity_bucket, - t.workflow_id -FROM - metrics.aggregated_test_metrics AS t -WHERE - CAST( - PARSE_DATETIME_ISO8601(t.date) as date - ) >= PARSE_DATETIME_ISO8601(: startDate) - AND t.workflow_name != 'inductor' - AND t.workflow_name != 'unstable' - AND t.workflow_id IS NOT NULL -GROUP BY - t.date, - t.workflow_name, - t.workflow_id -) -SELECT - workflow_type, - AVG( - time_in_seconds - ) as time_in_seconds, - granularity_bucket -FROM - sum_table -GROUP BY - granularity_bucket, - workflow_type \ No newline at end of file diff --git a/torchci/rockset/commons/__sql/test_insights_overview.sql b/torchci/rockset/commons/__sql/test_insights_overview.sql deleted file mode 100644 index 820b634501..0000000000 --- a/torchci/rockset/commons/__sql/test_insights_overview.sql +++ /dev/null @@ -1,57 +0,0 @@ -WITH test_runs AS ( - SELECT - workflow_run.name AS workflow_name, - workflow_job.name AS job_name, - test_run_summary.invoking_file AS test_file, - test_run_summary.classname AS test_class, - test_run_summary.tests AS tests, - test_run_summary.errors AS errors, - test_run_summary.failures AS failures, - test_run_summary.skipped AS skipped, - test_run_summary.time AS duration_in_second, - FROM - commons.test_run_summary - JOIN commons.workflow_run on test_run_summary.workflow_id = CAST(workflow_run.id as string) - JOIN commons.workflow_job on test_run_summary.job_id = workflow_job.id - WHERE - test_run_summary._event_time >= PARSE_DATETIME_ISO8601(: startTime) - AND test_run_summary._event_time < PARSE_DATETIME_ISO8601(: stopTime) - AND test_run_summary.workflow_run_attempt = 1 - AND workflow_run.name = : workflowName - AND workflow_run.head_branch = 'main' - AND test_run_summary.invoking_file LIKE : testFile - AND test_run_summary.classname LIKE : testClass -), -aggregated_test_runs AS ( - SELECT - workflow_name, - job_name, - test_file, - test_class, - CAST( - AVG(duration_in_second) AS int - ) avg_duration_in_second, - CAST( - AVG(tests) AS int - ) AS avg_tests, - MAX(failures) AS max_failures, - MAX(errors) AS max_errors, - CAST( - AVG(skipped) AS int - ) AS avg_skipped, - FROM - test_runs - GROUP BY - workflow_name, - job_name, - test_file, - test_class -) -SELECT - * -FROM - aggregated_test_runs -WHERE - avg_duration_in_second >= : thresholdInSecond -ORDER BY - avg_duration_in_second DESC diff --git a/torchci/rockset/commons/__sql/test_time_and_price_per_oncall.sql b/torchci/rockset/commons/__sql/test_time_and_price_per_oncall.sql deleted file mode 100644 index b84fe361ca..0000000000 --- a/torchci/rockset/commons/__sql/test_time_and_price_per_oncall.sql +++ /dev/null @@ -1,158 +0,0 @@ -With - workflow_summed_table AS ( - SELECT - workflow_id, - -- sum by job name to get total over all shards - SUM(sum_duration_in_second) as sum_duration_in_second, - oncalls, - date, - workflow_name, - test_class, - test_file, - config_job_name, - config_shard_name, - FROM - metrics.aggregated_test_metrics_with_preproc - WHERE - DATE_TRUNC('DAY', date) >= DATE_TRUNC('DAY', PARSE_DATETIME_ISO8601(: startDate)) - AND DATE_TRUNC('DAY', date) < DATE_TRUNC('DAY', PARSE_DATETIME_ISO8601(: endDate)) - AND workflow_name LIKE: workflow_type - GROUP BY - workflow_id, - workflow_name, - test_class, - test_file, - date, - oncalls, - config_job_name, - config_shard_name - ), - filtered_table AS ( - SELECT - AVG(sum_duration_in_second) as avg_duration_in_second, - COUNT(DISTINCT(workflow_id)) as workflow_occurences, - oncalls, - date, - workflow_name, - test_class, - test_file, - config_job_name, - config_shard_name, - FROM - workflow_summed_table - GROUP BY - workflow_name, - test_class, - test_file, - date, - oncalls, - config_job_name, - config_shard_name - ), - filtered_with_costs as ( - SELECT - t.avg_duration_in_second as avg_duration_in_second, - t.oncalls as oncalls, - t.date as date, - t.workflow_name as workflow_name, - t.test_class as test_class, - t.test_file as test_file, - t.config_job_name as config_job_name, - t.config_shard_name as config_shard_name, - t.workflow_occurences, - CASE - WHEN p.price IS NULL THEN 0 - ELSE CAST(p.price AS float) - END as price_per_hour - FROM - filtered_table t - LEFT JOIN commons.price_per_config p ON ( - t.config_job_name = p.job_name - AND t.config_shard_name = p.shard_name - ) - ), - total_table as ( - SELECT - date, - workflow_name, - SUM(avg_duration_in_second) as total_duration_per_workflow_per_run, - SUM(price_per_hour * avg_duration_in_second / 60 / 60) as total_price_per_workflow_per_run - FROM - filtered_with_costs - GROUP BY - date, - workflow_name - ), - filtered_with_oncalls as ( - SELECT - * - FROM - ( - filtered_with_costs - CROSS JOIN UNNEST(oncalls AS oncall) - ) - WHERE - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') LIKE: oncall - ), - filtered_with_oncalls_and_totals as ( - SELECT - avg_duration_in_second, - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') as oncall, - filtered_with_oncalls.date as date, - filtered_with_oncalls.workflow_name as workflow_name, - test_class, - test_file, - total_duration_per_workflow_per_run, - total_price_per_workflow_per_run, - workflow_occurences, - price_per_hour, - FROM - filtered_with_oncalls - INNER JOIN total_table ON filtered_with_oncalls.date = total_table.date - AND filtered_with_oncalls.workflow_name = total_table.workflow_name - WHERE - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') LIKE: oncall - ), - t as ( - SELECT - oncall, - workflow_name as workflow_type, - SUM(avg_duration_in_second) as time_in_seconds, - TRUNC( - SUM(price_per_hour * avg_duration_in_second / 60 / 60), - 2 - ) as estimated_price_per_run_in_dollars, - TRUNC( - SUM( - price_per_hour * avg_duration_in_second * workflow_occurences / 60 / 60 - ), - 2 - ) as estimated_price_per_day_in_dollars, - CAST(date as STRING) as granularity_bucket, - TRUNC( - SUM(avg_duration_in_second) / ARBITRARY( - -- add noise to avoid divide by 0 - total_duration_per_workflow_per_run + 0.000001 - ) * 100, - 2 - ) as percentage_of_time, - TRUNC( - SUM(price_per_hour * avg_duration_in_second / 60 / 60) / ARBITRARY( - -- add noise to avoid divide by 0 - total_price_per_workflow_per_run + 0.000001 - ) * 100, - 2 - ) as percentage_of_cost - FROM - filtered_with_oncalls_and_totals as t - GROUP BY - t.oncall, - t.date, - t.workflow_name - ) -SELECT - *, -FROM - t -ORDER BY - time_in_seconds DESC diff --git a/torchci/rockset/commons/__sql/test_time_per_oncall.sql b/torchci/rockset/commons/__sql/test_time_per_oncall.sql deleted file mode 100644 index 8775ed2c61..0000000000 --- a/torchci/rockset/commons/__sql/test_time_per_oncall.sql +++ /dev/null @@ -1,60 +0,0 @@ -With - workflow_summed_table AS ( - SELECT - workflow_id, - -- sum by job name to get total over all shards - SUM(sum_duration_in_second) as sum_duration_in_second, - oncalls, - date, - workflow_name - FROM - metrics.aggregated_test_metrics_with_preproc - WHERE - DATE_TRUNC('DAY', date) >= DATE_TRUNC('DAY', PARSE_DATETIME_ISO8601(: startDate)) - AND DATE_TRUNC('DAY', date) < DATE_TRUNC('DAY', PARSE_DATETIME_ISO8601(: endDate)) - AND workflow_name LIKE: workflow_type - GROUP BY - workflow_id, - workflow_name, - date, - oncalls - ), - filtered_table AS ( - SELECT - AVG(sum_duration_in_second) as avg_duration_in_second, - oncalls, - date, - workflow_name, - FROM - workflow_summed_table - GROUP BY - workflow_name, - date, - oncalls - ), - filtered_with_oncalls as ( - SELECT - * - FROM - ( - filtered_table - CROSS JOIN UNNEST(oncalls AS oncall) - ) - WHERE - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') LIKE: oncall - ), - t as ( - SELECT - avg_duration_in_second as time_in_seconds, - REPLACE(REPLACE(oncall, 'module: ', ''), 'oncall: ', '') as oncall, - CAST(filtered_with_oncalls.date AS STRING) as granularity_bucket, - filtered_with_oncalls.workflow_name as workflow_name, - FROM - filtered_with_oncalls - ) -SELECT - *, -FROM - t -ORDER BY - time_in_seconds DESC diff --git a/torchci/rockset/commons/__sql/test_times_per_workflow_type.sql b/torchci/rockset/commons/__sql/test_times_per_workflow_type.sql deleted file mode 100644 index bce3a1a8b4..0000000000 --- a/torchci/rockset/commons/__sql/test_times_per_workflow_type.sql +++ /dev/null @@ -1,32 +0,0 @@ -WITH sum_table AS (SELECT - t.workflow_name as workflow_type, - SUM( - t.sum_duration_in_second - ) as time_in_seconds, - t.date AS granularity_bucket, - t.workflow_id -FROM - metrics.aggregated_test_metrics AS t -WHERE - CAST( - PARSE_DATETIME_ISO8601(t.date) as date - ) >= PARSE_DATETIME_ISO8601(: startDate) - AND t.workflow_name != 'inductor' - AND t.workflow_name != 'unstable' - AND t.workflow_id IS NOT NULL -GROUP BY - t.date, - t.workflow_name, - t.workflow_id -) -SELECT - workflow_type, - AVG( - time_in_seconds - ) as time_in_seconds, - granularity_bucket -FROM - sum_table -GROUP BY - granularity_bucket, - workflow_type \ No newline at end of file diff --git a/torchci/rockset/commons/test_time_and_price_per_oncall.lambda.json b/torchci/rockset/commons/test_time_and_price_per_oncall.lambda.json deleted file mode 100644 index 9374ffad65..0000000000 --- a/torchci/rockset/commons/test_time_and_price_per_oncall.lambda.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "sql_path": "__sql/test_time_and_price_per_oncall.sql", - "default_parameters": [ - { - "name": "endDate", - "type": "string", - "value": "2023-05-09T03:46:31.152Z" - }, - { - "name": "oncall", - "type": "string", - "value": "functorch" - }, - { - "name": "startDate", - "type": "string", - "value": "2023-04-09T03:46:31.152Z" - }, - { - "name": "workflow_type", - "type": "string", - "value": "%" - } - ], - "description": "" -} \ No newline at end of file diff --git a/torchci/rockset/commons/test_time_per_oncall.lambda.json b/torchci/rockset/commons/test_time_per_oncall.lambda.json deleted file mode 100644 index e7c1e970e8..0000000000 --- a/torchci/rockset/commons/test_time_per_oncall.lambda.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "sql_path": "__sql/test_time_per_oncall.sql", - "default_parameters": [ - { - "name": "endDate", - "type": "string", - "value": "2023-04-10T00:06:32.839Z" - }, - { - "name": "oncall", - "type": "string", - "value": "%" - }, - { - "name": "startDate", - "type": "string", - "value": "2023-04-09T00:06:32.839Z" - }, - { - "name": "workflow_type", - "type": "string", - "value": "pull" - } - ], - "description": "" -} \ No newline at end of file diff --git a/torchci/rockset/commons/test_times_per_workflow_type.lambda.json b/torchci/rockset/commons/test_times_per_workflow_type.lambda.json deleted file mode 100644 index d785f6b2d7..0000000000 --- a/torchci/rockset/commons/test_times_per_workflow_type.lambda.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "sql_path": "__sql/test_times_per_workflow_type.sql", - "default_parameters": [ - { - "name": "startDate", - "type": "string", - "value": "2023-03-29T00:06:32.839Z" - }, - { - "name": "workflow_type", - "type": "string", - "value": "pull" - } - ], - "description": "" -} \ No newline at end of file diff --git a/torchci/rockset/prodVersions.json b/torchci/rockset/prodVersions.json index a857ad71d2..76e97c7f9c 100644 --- a/torchci/rockset/prodVersions.json +++ b/torchci/rockset/prodVersions.json @@ -14,9 +14,6 @@ "get_workflow_jobs": "6ed2029b19691a4b", "test_time_per_file": "219d8bcff926d6c8", "test_time_per_file_periodic_jobs": "fd632fe67c910f3a", - "test_time_per_oncall": "a85f4d3243d90f51", - "test_time_and_price_per_oncall": "7af6d14035a19439", - "test_times_per_workflow_type": "3ab0de839b95d22c", "issue_query": "e4d338de89980044", "failure_samples_query": "7940a636284d0752", "num_commits_master": "e4a864147cf3bf44",