Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PRSync] DBT models for Protocol Labs churn analysis #22

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .github/workflows-recce/hello.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Hello World Workflow

on:
pull_request:
types: [opened, synchronize]

jobs:
hello-world-job:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Print Hello, World!
run: echo "Hello, World! Test permissions."
26 changes: 26 additions & 0 deletions profiles.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
opensource_observer:
outputs:
playground:
type: bigquery
method: service-account
keyfile: "{{ env_var('DBT_GOOGLE_KEYFILE') }}"
project: "{{ env_var('DBT_GOOGLE_PROJECT') }}"
dataset: "{{ env_var('DBT_GOOGLE_DATASET') }}"
job_execution_time_seconds: 300
job_retries: 1
location: US
threads: 32
dev:
type: bigquery
method: service-account
keyfile: "{{ env_var('DBT_GOOGLE_KEYFILE') }}"
project: "{{ env_var('DBT_GOOGLE_PROJECT') }}"
dataset: "{{ env_var('DBT_GOOGLE_DEV_DATASET') }}"
job_execution_time_seconds: 300
job_retries: 1
location: US
threads: 32
# By default we target the playground. it's less costly and also safer to write
# there while developing
target: playground

113 changes: 113 additions & 0 deletions warehouse/dbt/models/playground/protocollabs__churn.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
WITH queries AS (

SELECT to_name,
year,
month,
contributors,
MAX(lost) AS lost,
MAX(gained) AS gained,
COUNT(DISTINCT(cum_contributors)) AS cum_contributors_count,
lag_contributors,
STRING_AGG(DISTINCT(cum_contributors), ', ') AS cum_contributors,
lag_contributors_count,
contributors_count
FROM (
SELECT to_name,
year,
contributors,
contributors_count,
lag_contributors,
lost,
gained,
month,
cum_contributors,
lag_contributors_count
FROM (
SELECT to_name,
year,
month,
STRING_AGG(contributors, ', ') OVER (PARTITION BY to_name,
year
ORDER BY month rows between unbounded preceding and current row) AS cum_contributors,
contributors,
COUNT(SPLIT(contributors)) AS contributors_count,
COALESCE(SUM(lost), 0) AS lost,
gained,
STRING_AGG(DISTINCT(lag_contributors), ', ') AS lag_contributors,
COUNT(lag_contributors) AS lag_contributors_count
FROM (
SELECT *,
(CASE WHEN strpos(contributors, lag_contributors) > 0 THEN 0
ELSE 1 END) AS lost
FROM (
SELECT to_name,
year,
month,
lag_contributors,
contributors,
gained
FROM (
SELECT to_name,
year,
month,
COALESCE(lag_contributors, '') AS lag_contributors,
COALESCE(SUM(gained), 0) AS gained,
STRING_AGG(contributors, ', ') AS contributors
FROM (
SELECT *,
(CASE WHEN strpos(lag_contributors, contributors) > 0 THEN 0
ELSE 1 END) AS gained
FROM (
SELECT to_name,
year,
month,
contributors,
lag_contributors
FROM (
SELECT *,
STRING_AGG(contributors, ', ') OVER (PARTITION BY to_name, year, month
ORDER BY month rows between unbounded preceding and current row) AS cum_contributors
FROM (
SELECT *,
LAG(month, 1) OVER (PARTITION BY to_name ORDER BY month) lag_month,
LAG(contributors, 1) OVER (PARTITION BY to_name ORDER BY month) lag_contributors
FROM (
SELECT to_name,
year,
month,
STRING_AGG(DISTINCT(from_name), ', ') AS contributors
FROM (
SELECT *,
EXTRACT(isoyear FROM time) AS year,
EXTRACT(week FROM time) AS week,
EXTRACT(day FROM time) AS day,
EXTRACT(month FROM time) AS month
FROM {{ ref("int_events") }} AS q1) AS q2
GROUP BY to_name, year, month
ORDER BY to_name, year, month) AS q3) AS q4) AS q5
CROSS JOIN UNNEST(SPLIT(contributors)) AS contributors ) AS q6 ) AS q7
GROUP BY to_name, year, lag_contributors, month
ORDER BY to_name, year, month ) AS q11
CROSS JOIN UNNEST(SPLIT(lag_contributors)) AS lag_contributors ) AS q12 ) AS q13
GROUP BY to_name, year, gained, contributors, month
ORDER BY to_name, year, month ) AS q14
CROSS JOIN UNNEST(SPLIT(cum_contributors))) AS q15
GROUP BY to_name, year, contributors, lag_contributors, contributors_count, lag_contributors_count, month
ORDER BY to_name, year, month
)


SELECT to_name,
year,
month,
lost,
gained,
cum_contributors,
cum_contributors_count,
lag_contributors,
lag_contributors_count,
contributors,
contributors_count,
(CAST(lost AS FLOAT64)/cum_contributors_count)*100 AS churn_prior,
((CAST(cum_contributors_count AS FLOAT64) - CAST(contributors_count AS FLOAT64))/CAST(cum_contributors_count AS FLOAT64))*100 AS churn_total
FROM queries