diff --git a/configs/public_api/http_endpoints/config.json b/configs/public_api/http_endpoints/config.json index 84e8570b291..816bc7075b3 100644 --- a/configs/public_api/http_endpoints/config.json +++ b/configs/public_api/http_endpoints/config.json @@ -1,9 +1,9 @@ [ { - "name": "/repos/pull_requests/open_to_mer_copy", + "name": "/repos/pull_requests/history/siz", "description": "", "method": "GET", - "endpoint": "/repos/pull_requests/open_to_merge_dump_uyQXjT", + "endpoint": "/repos/pull_requests/history/size", "data_source": { "cluster_id": 1379661944642684098 }, @@ -46,7 +46,7 @@ }, "tag": "Default", "batch_operation": 0, - "sql_file": "sql/GET-repos-pull_requests-open_to_merge_dump_uyQXjT.sql", + "sql_file": "sql/GET-repos-pull_requests-history-size.sql", "type": "sql_endpoint", "return_type": "json" }, diff --git a/configs/public_api/http_endpoints/sql/GET-repos-pull_requests-history-size.sql b/configs/public_api/http_endpoints/sql/GET-repos-pull_requests-history-size.sql new file mode 100644 index 00000000000..76367331ad0 --- /dev/null +++ b/configs/public_api/http_endpoints/sql/GET-repos-pull_requests-history-size.sql @@ -0,0 +1,33 @@ +USE gharchive_dev; + +SELECT + t_month AS event_month, xs, s, m, l, xl, xxl, all_size +FROM ( + SELECT + t_month, + SUM(CASE WHEN (additions + deletions) < 10 THEN 1 ELSE 0 END) OVER(PARTITION BY t_month) AS xs, + SUM(CASE WHEN (additions + deletions) >= 10 AND (additions + deletions) < 30 THEN 1 ELSE 0 END) OVER(PARTITION BY t_month) AS s, + SUM(CASE WHEN (additions + deletions) >= 30 AND (additions + deletions) < 100 THEN 1 ELSE 0 END) OVER(PARTITION BY t_month) AS m, + SUM(CASE WHEN (additions + deletions) >= 100 AND (additions + deletions) < 500 THEN 1 ELSE 0 END) OVER(PARTITION BY t_month) AS l, + SUM(CASE WHEN (additions + deletions) >= 500 AND (additions + deletions) < 1000 THEN 1 ELSE 0 END) OVER(PARTITION BY t_month) AS xl, + SUM(CASE WHEN (additions + deletions) >= 1000 THEN 1 ELSE 0 END) OVER (PARTITION BY t_month) AS xxl, + COUNT(*) OVER (PARTITION BY t_month) AS all_size, + ROW_NUMBER() OVER (PARTITION BY t_month) AS row_num + FROM ( + SELECT + DATE_FORMAT(created_at, '%Y-%m-01') as t_month, + additions, + deletions + FROM + github_events + WHERE + type = 'PullRequestEvent' + AND repo_id = (SELECT repo_id FROM github_repos WHERE repo_name = CONCAT(${owner}, '/', ${repo}) LIMIT 1) + AND action = 'opened' + AND created_at >= ${from} + AND created_at <= ${to} + ) sub +) sub +WHERE row_num = 1 +ORDER BY t_month +; \ No newline at end of file diff --git a/configs/public_api/http_endpoints/sql/GET-repos-pull_requests-open_to_merge_dump_uyQXjT.sql b/configs/public_api/http_endpoints/sql/GET-repos-pull_requests-open_to_merge_dump_uyQXjT.sql deleted file mode 100644 index c9a8819e47e..00000000000 --- a/configs/public_api/http_endpoints/sql/GET-repos-pull_requests-open_to_merge_dump_uyQXjT.sql +++ /dev/null @@ -1,87 +0,0 @@ -USE gharchive_dev; - -with repo AS ( - SELECT repo_id - FROM github_repos - WHERE repo_name = CONCAT(${owner}, '/', ${repo}) - LIMIT 1 -), pr_with_merged_at AS ( - SELECT - number, DATE_FORMAT(created_at, '%Y-%m-01') AS t_month, created_at AS merged_at - FROM - github_events ge - WHERE - type = 'PullRequestEvent' - -- Considering that some repositories accept the code of the contributor by closing the PR and push commit directly, - -- here is not distinguished whether it is the merged event. - -- See: https://github.com/mongodb/mongo/pulls?q=is%3Apr+is%3Aclosed - AND action = 'closed' - AND repo_id = (SELECT repo_id FROM repo) - AND created_at >= ${from} - AND created_at <= ${to} -), pr_with_opened_at AS ( - SELECT - number, created_at AS opened_at - FROM - github_events ge - WHERE - type = 'PullRequestEvent' - AND action = 'opened' - -- Exclude Bots - -- AND actor_login NOT LIKE '%bot%' - -- AND actor_login NOT IN (SELECT login FROM blacklist_users bu) - AND repo_id = (SELECT repo_id FROM repo) - AND created_at >= ${from} - AND created_at <= ${to} -), tdiff AS ( - SELECT - t_month, - (UNIX_TIMESTAMP(pwm.merged_at) - UNIX_TIMESTAMP(pwo.opened_at)) AS diff - FROM - pr_with_opened_at pwo - JOIN pr_with_merged_at pwm ON pwo.number = pwm.number AND pwm.merged_at > pwo.opened_at -), tdiff_with_rank AS ( - SELECT - tdiff.t_month, - diff / 60 / 60 AS diff, - ROW_NUMBER() OVER (PARTITION BY tdiff.t_month ORDER BY diff) AS r, - COUNT(*) OVER (PARTITION BY tdiff.t_month) AS cnt, - FIRST_VALUE(diff / 60 / 60) OVER (PARTITION BY tdiff.t_month ORDER BY diff) AS p0, - FIRST_VALUE(diff / 60 / 60) OVER (PARTITION BY tdiff.t_month ORDER BY diff DESC) AS p100 - FROM tdiff -), tdiff_p25 AS ( - SELECT - t_month, diff AS p25 - FROM - tdiff_with_rank tr - WHERE - r = ROUND(cnt * 0.25) -), tdiff_p50 AS ( - SELECT - t_month, diff AS p50 - FROM - tdiff_with_rank tr - WHERE - r = ROUND(cnt * 0.5) -), tdiff_p75 AS ( - SELECT - t_month, diff AS p75 - FROM - tdiff_with_rank tr - WHERE - r = ROUND(cnt * 0.75) -) -SELECT - tr.t_month AS event_month, - ROUND(p0, 2) AS p0, - ROUND(p25, 2) AS p25, - ROUND(p50, 2) AS p50, - ROUND(p75, 2) AS p75, - ROUND(p100, 2) AS p100 -FROM tdiff_with_rank tr -LEFT JOIN tdiff_p25 p25 ON tr.t_month = p25.t_month -LEFT JOIN tdiff_p50 p50 ON tr.t_month = p50.t_month -LEFT JOIN tdiff_p75 p75 ON tr.t_month = p75.t_month -WHERE r = 1 -ORDER BY event_month -; \ No newline at end of file