From 42f7019b9a3003f1191c53ce5f75f0a771b3820c Mon Sep 17 00:00:00 2001 From: Innocent Ndubuisi-Obi Jr Date: Tue, 26 Mar 2024 16:34:40 -0700 Subject: [PATCH 1/2] DBT models for Protocol Labs churn analysis --- .../models/playground/protocollabs__churn.sql | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 warehouse/dbt/models/playground/protocollabs__churn.sql diff --git a/warehouse/dbt/models/playground/protocollabs__churn.sql b/warehouse/dbt/models/playground/protocollabs__churn.sql new file mode 100644 index 000000000..ca8a649cd --- /dev/null +++ b/warehouse/dbt/models/playground/protocollabs__churn.sql @@ -0,0 +1,113 @@ +WITH queries AS ( + + SELECT to_name, + year, + month, + contributors, + MAX(lost) AS lost, + MAX(gained) AS gained, + COUNT(DISTINCT(cum_contributors)) AS cum_contributors_count, + lag_contributors, + STRING_AGG(DISTINCT(cum_contributors), ', ') AS cum_contributors, + lag_contributors_count, + contributors_count + FROM ( + SELECT to_name, + year, + contributors, + contributors_count, + lag_contributors, + lost, + gained, + month, + cum_contributors, + lag_contributors_count + FROM ( + SELECT to_name, + year, + month, + STRING_AGG(contributors, ', ') OVER (PARTITION BY to_name, + year + ORDER BY month rows between unbounded preceding and current row) AS cum_contributors, + contributors, + COUNT(SPLIT(contributors)) AS contributors_count, + COALESCE(SUM(lost), 0) AS lost, + gained, + STRING_AGG(DISTINCT(lag_contributors), ', ') AS lag_contributors, + COUNT(lag_contributors) AS lag_contributors_count + FROM ( + SELECT *, + (CASE WHEN strpos(contributors, lag_contributors) > 0 THEN 0 + ELSE 1 END) AS lost + FROM ( + SELECT to_name, + year, + month, + lag_contributors, + contributors, + gained + FROM ( + SELECT to_name, + year, + month, + COALESCE(lag_contributors, '') AS lag_contributors, + COALESCE(SUM(gained), 0) AS gained, + STRING_AGG(contributors, ', ') AS contributors + FROM ( + SELECT *, + (CASE WHEN strpos(lag_contributors, contributors) > 0 THEN 0 + ELSE 1 END) AS gained + FROM ( + SELECT to_name, + year, + month, + contributors, + lag_contributors + FROM ( + SELECT *, + STRING_AGG(contributors, ', ') OVER (PARTITION BY to_name, year, month + ORDER BY month rows between unbounded preceding and current row) AS cum_contributors + FROM ( + SELECT *, + LAG(month, 1) OVER (PARTITION BY to_name ORDER BY month) lag_month, + LAG(contributors, 1) OVER (PARTITION BY to_name ORDER BY month) lag_contributors + FROM ( + SELECT to_name, + year, + month, + STRING_AGG(DISTINCT(from_name), ', ') AS contributors + FROM ( + SELECT *, + EXTRACT(isoyear FROM time) AS year, + EXTRACT(week FROM time) AS week, + EXTRACT(day FROM time) AS day, + EXTRACT(month FROM time) AS month + FROM {{ ref("int_events") }} AS q1) AS q2 + GROUP BY to_name, year, month + ORDER BY to_name, year, month) AS q3) AS q4) AS q5 + CROSS JOIN UNNEST(SPLIT(contributors)) AS contributors ) AS q6 ) AS q7 + GROUP BY to_name, year, lag_contributors, month + ORDER BY to_name, year, month ) AS q11 + CROSS JOIN UNNEST(SPLIT(lag_contributors)) AS lag_contributors ) AS q12 ) AS q13 + GROUP BY to_name, year, gained, contributors, month + ORDER BY to_name, year, month ) AS q14 + CROSS JOIN UNNEST(SPLIT(cum_contributors))) AS q15 + GROUP BY to_name, year, contributors, lag_contributors, contributors_count, lag_contributors_count, month + ORDER BY to_name, year, month + ) + + + SELECT to_name, + year, + month, + lost, + gained, + cum_contributors, + cum_contributors_count, + lag_contributors, + lag_contributors_count, + contributors, + contributors_count, + (CAST(lost AS FLOAT64)/cum_contributors_count)*100 AS churn_prior, + ((CAST(cum_contributors_count AS FLOAT64) - CAST(contributors_count AS FLOAT64))/CAST(cum_contributors_count AS FLOAT64))*100 AS churn_total + FROM queries From 65d5d04d44cf89f9b861bab2711932aa3ac5ab33 Mon Sep 17 00:00:00 2001 From: popcorny Date: Mon, 27 May 2024 10:15:05 +0800 Subject: [PATCH 2/2] Auto-sync by GithubPrSyncer --- .github/workflows-recce/hello.yml | 16 ++++++++++++++++ profiles.yml | 26 ++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 .github/workflows-recce/hello.yml create mode 100644 profiles.yml diff --git a/.github/workflows-recce/hello.yml b/.github/workflows-recce/hello.yml new file mode 100644 index 000000000..37594b996 --- /dev/null +++ b/.github/workflows-recce/hello.yml @@ -0,0 +1,16 @@ +name: Hello World Workflow + +on: + pull_request: + types: [opened, synchronize] + +jobs: + hello-world-job: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Print Hello, World! + run: echo "Hello, World! Test permissions." diff --git a/profiles.yml b/profiles.yml new file mode 100644 index 000000000..44f94cf68 --- /dev/null +++ b/profiles.yml @@ -0,0 +1,26 @@ +opensource_observer: + outputs: + playground: + type: bigquery + method: service-account + keyfile: "{{ env_var('DBT_GOOGLE_KEYFILE') }}" + project: "{{ env_var('DBT_GOOGLE_PROJECT') }}" + dataset: "{{ env_var('DBT_GOOGLE_DATASET') }}" + job_execution_time_seconds: 300 + job_retries: 1 + location: US + threads: 32 + dev: + type: bigquery + method: service-account + keyfile: "{{ env_var('DBT_GOOGLE_KEYFILE') }}" + project: "{{ env_var('DBT_GOOGLE_PROJECT') }}" + dataset: "{{ env_var('DBT_GOOGLE_DEV_DATASET') }}" + job_execution_time_seconds: 300 + job_retries: 1 + location: US + threads: 32 + # By default we target the playground. it's less costly and also safer to write + # there while developing + target: playground +