From 6c407cefdadaede139569836c6a93df6d168fe8c Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Fri, 19 Apr 2024 22:05:17 -0400 Subject: [PATCH] dbt: refactor projects mart to use int model (#1262) * refactor: create int_projects * update projects mart * change comments --- .../directory/int_project_owners.sql | 5 +-- .../intermediate/directory/int_projects.sql | 36 +++++++++++++++++++ .../dbt/models/marts/directory/projects.sql | 17 +++++---- 3 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 warehouse/dbt/models/intermediate/directory/int_projects.sql diff --git a/warehouse/dbt/models/intermediate/directory/int_project_owners.sql b/warehouse/dbt/models/intermediate/directory/int_project_owners.sql index 0da2d8609..853ba66f5 100644 --- a/warehouse/dbt/models/intermediate/directory/int_project_owners.sql +++ b/warehouse/dbt/models/intermediate/directory/int_project_owners.sql @@ -6,13 +6,14 @@ WITH ranked_repos AS ( ROW_NUMBER() OVER ( PARTITION BY project_id ORDER BY star_count DESC ) AS row_number, - COUNT(DISTINCT owner) OVER (PARTITION BY project_id) AS num_github_owners + COUNT(DISTINCT owner) OVER (PARTITION BY project_id) AS count_github_owners FROM {{ ref('stg_ossd__repositories_by_project') }} ) SELECT project_id, - num_github_owners, + count_github_owners, LOWER(owner) AS primary_github_owner + --TODO: is_git_organization FROM ranked_repos WHERE row_number = 1 diff --git a/warehouse/dbt/models/intermediate/directory/int_projects.sql b/warehouse/dbt/models/intermediate/directory/int_projects.sql new file mode 100644 index 000000000..91b6d228f --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_projects.sql @@ -0,0 +1,36 @@ +WITH ranked_repos AS ( + SELECT + project_id, + owner, + star_count, + ROW_NUMBER() OVER ( + PARTITION BY project_id ORDER BY star_count DESC + ) AS row_number, + COUNT(DISTINCT owner) OVER (PARTITION BY project_id) AS count_github_owners + FROM {{ ref('stg_ossd__repositories_by_project') }} +), + +project_owners AS ( + SELECT + project_id, + count_github_owners, + LOWER(owner) AS primary_github_owner + {# TODO: is_git_organization #} + FROM ranked_repos + WHERE row_number = 1 +) + +SELECT + p.id AS project_id, + p.slug AS project_slug, + {# TODO: description AS project_description #} + p.name AS project_name, + p.namespace AS namespace, + po.primary_github_owner, + po.count_github_owners, + ARRAY_LENGTH(JSON_EXTRACT_ARRAY(p.github)) AS count_github_artifacts, + ARRAY_LENGTH(JSON_EXTRACT_ARRAY(p.blockchain)) AS count_blockchain_artifacts, + ARRAY_LENGTH(JSON_EXTRACT_ARRAY(p.npm)) AS count_npm_artifacts +FROM {{ ref('stg_ossd__current_projects') }} AS p +LEFT JOIN project_owners AS po + ON p.id = po.project_id diff --git a/warehouse/dbt/models/marts/directory/projects.sql b/warehouse/dbt/models/marts/directory/projects.sql index 35526d240..68a388c00 100644 --- a/warehouse/dbt/models/marts/directory/projects.sql +++ b/warehouse/dbt/models/marts/directory/projects.sql @@ -5,12 +5,11 @@ }} SELECT - id AS project_id, - slug AS project_slug, - -- description AS project_description, - name AS project_name, - namespace AS user_namespace, - ARRAY_LENGTH(JSON_EXTRACT_ARRAY(github)) AS count_github_artifacts, - ARRAY_LENGTH(JSON_EXTRACT_ARRAY(blockchain)) AS count_blockchain_artifacts, - ARRAY_LENGTH(JSON_EXTRACT_ARRAY(npm)) AS count_npm_artifacts -FROM {{ ref('stg_ossd__current_projects') }} + project_id, + project_slug, + project_name, + count_github_owners, + count_github_artifacts, + count_blockchain_artifacts, + count_npm_artifacts +FROM {{ ref('int_projects') }}