From 7264214ae69902293d8f9be050e09d74ed8d19ed Mon Sep 17 00:00:00 2001 From: Carl Cervone <42869436+ccerv1@users.noreply.github.com> Date: Tue, 7 May 2024 16:05:15 -0400 Subject: [PATCH] reconnect int models to new schema and naming conventions (#1363) * refactor: int models in directory * fix artifact_name references * fix: naming for repos by project table * big fixes to artifact source and namespace fields * fix: int events union * fix linting error * (broken) fixes to namespaces and event source in int models * fix invocation of macro * complete int model refactor * remove log file --------- Co-authored-by: Reuven V. Gonzales --- .gitignore | 1 + .../intermediate/directory/int_artifacts.sql | 67 ++-- .../directory/int_artifacts_history.sql | 35 ++ .../int_ossd__artifacts_by_project.sql | 178 ++++++++++ ...l => int_ossd__projects_by_collection.sql} | 2 +- ... => int_ossd__repositories_by_project.sql} | 2 +- .../directory/int_project_owners.sql | 2 +- .../intermediate/directory/int_projects.sql | 4 +- .../directory/int_projects_by_collection.sql | 4 +- .../directory/int_repos_by_project.sql | 42 ++- .../stg_ossd__artifacts_by_project.sql | 116 ------- .../events/int_event_totals_by_project.sql | 24 +- .../models/intermediate/events/int_events.sql | 306 ++++++++++-------- .../events/int_events_from_collection.sql | 10 - .../events/int_events_from_project.sql | 13 - .../events/int_events_to_collection.sql | 16 +- .../events/int_events_to_project.sql | 18 +- .../events/int_events_with_artifact_id.sql | 25 +- .../int_code_metrics_by_collection.sql | 193 ----------- .../metrics/int_code_metrics_by_project.sql | 67 ++-- ...int_onchain_metrics_by_collection_depr.sql | 220 ------------- .../int_onchain_metrics_by_project.sql | 142 ++++---- .../int_onchain_metrics_by_project_depr.sql | 211 ------------ .../metrics/int_pm_contributors.sql | 16 - .../metrics/int_pm_new_contribs.sql | 19 -- .../{ => pms}/int_pm_busfactor_by_project.sql | 22 +- .../{ => pms}/int_pm_contracts_deployed.sql | 7 +- .../metrics/pms/int_pm_contributors.sql | 14 + .../metrics/{ => pms}/int_pm_dev_months.sql | 2 - .../metrics/pms/int_pm_new_contribs.sql | 30 ++ .../{ => pms}/int_pm_trusted_transactions.sql | 5 +- .../int_active_addresses_daily_to_project.sql | 19 +- ...nt_active_addresses_monthly_to_project.sql | 28 +- .../int_active_devs_monthly_to_collection.sql | 4 +- .../int_active_devs_monthly_to_project.sql | 3 +- .../int_address_rfm_segments_by_project.sql | 37 ++- .../users/int_address_totals_by_project.sql | 62 ++-- .../intermediate/users/int_addresses.sql | 14 - .../users/int_addresses_daily_activity.sql | 27 +- .../users/int_addresses_to_project.sql | 20 ++ .../users/int_contributors_to_project.sql | 27 ++ .../models/intermediate/users/int_devs.sql | 24 -- .../int_first_contribution_to_project.sql | 13 - .../int_last_contribution_to_project.sql | 13 - .../intermediate/users/int_user_addresses.sql | 6 +- .../int_user_events_daily_to_collection.sql | 8 +- .../int_user_events_daily_to_project.sql | 8 +- .../int_user_events_monthly_to_collection.sql | 8 +- .../int_user_events_monthly_to_project.sql | 8 +- ...ser_events_to_project_by_time_interval.sql | 30 +- .../users/int_users_monthly_to_project.sql | 10 +- .../directory/artifacts_by_project_v1.sql | 5 +- .../models/marts/directory/artifacts_v1.sql | 2 +- .../daily/events_daily_from_artifact.sql | 16 - .../daily/events_daily_from_collection.sql | 14 - .../daily/events_daily_from_project.sql | 17 - .../events/daily/events_daily_to_artifact.sql | 15 +- .../events_daily_to_artifact_by_source.sql | 17 - .../daily/events_daily_to_collection.sql | 15 +- .../events_daily_to_collection_by_source.sql | 18 +- .../events/daily/events_daily_to_project.sql | 15 +- .../events_daily_to_project_by_source.sql | 18 +- .../event_indexing_status_by_project_v1.sql | 4 +- .../monthly/events_monthly_from_artifact.sql | 16 - .../events_monthly_from_collection.sql | 17 - .../monthly/events_monthly_from_project.sql | 17 - .../monthly/events_monthly_to_artifact.sql | 15 +- .../events_monthly_to_artifact_by_source.sql | 17 - .../monthly/events_monthly_to_collection.sql | 15 +- ...events_monthly_to_collection_by_source.sql | 18 +- .../monthly/events_monthly_to_project.sql | 15 +- .../events_monthly_to_project_by_source.sql | 18 +- .../weekly/events_weekly_from_artifact.sql | 16 - .../weekly/events_weekly_from_collection.sql | 17 - .../weekly/events_weekly_from_project.sql | 17 - .../weekly/events_weekly_to_artifact.sql | 15 +- .../events_weekly_to_artifact_by_source.sql | 17 - .../weekly/events_weekly_to_collection.sql | 15 +- .../events_weekly_to_collection_by_source.sql | 18 +- .../weekly/events_weekly_to_project.sql | 15 +- .../events_weekly_to_project_by_source.sql | 18 +- .../metrics/code_metrics_by_collection_v1.sql | 47 --- .../metrics/code_metrics_by_project_v1.sql | 1 - .../metrics/onchain_metrics_by_project_v1.sql | 5 +- .../oss-directory/stg_ossd__schema.yml | 6 +- 85 files changed, 999 insertions(+), 1694 deletions(-) create mode 100644 warehouse/dbt/models/intermediate/directory/int_artifacts_history.sql create mode 100644 warehouse/dbt/models/intermediate/directory/int_ossd__artifacts_by_project.sql rename warehouse/dbt/models/intermediate/directory/{stg_ossd__projects_by_collection.sql => int_ossd__projects_by_collection.sql} (87%) rename warehouse/dbt/models/intermediate/directory/{stg_ossd__repositories_by_project.sql => int_ossd__repositories_by_project.sql} (88%) delete mode 100644 warehouse/dbt/models/intermediate/directory/stg_ossd__artifacts_by_project.sql delete mode 100644 warehouse/dbt/models/intermediate/events/int_events_from_collection.sql delete mode 100644 warehouse/dbt/models/intermediate/events/int_events_from_project.sql delete mode 100644 warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_collection.sql delete mode 100644 warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_collection_depr.sql delete mode 100644 warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project_depr.sql delete mode 100644 warehouse/dbt/models/intermediate/metrics/int_pm_contributors.sql delete mode 100644 warehouse/dbt/models/intermediate/metrics/int_pm_new_contribs.sql rename warehouse/dbt/models/intermediate/metrics/{ => pms}/int_pm_busfactor_by_project.sql (93%) rename warehouse/dbt/models/intermediate/metrics/{ => pms}/int_pm_contracts_deployed.sql (85%) create mode 100644 warehouse/dbt/models/intermediate/metrics/pms/int_pm_contributors.sql rename warehouse/dbt/models/intermediate/metrics/{ => pms}/int_pm_dev_months.sql (92%) create mode 100644 warehouse/dbt/models/intermediate/metrics/pms/int_pm_new_contribs.sql rename warehouse/dbt/models/intermediate/metrics/{ => pms}/int_pm_trusted_transactions.sql (82%) delete mode 100644 warehouse/dbt/models/intermediate/users/int_addresses.sql create mode 100644 warehouse/dbt/models/intermediate/users/int_addresses_to_project.sql create mode 100644 warehouse/dbt/models/intermediate/users/int_contributors_to_project.sql delete mode 100644 warehouse/dbt/models/intermediate/users/int_devs.sql delete mode 100644 warehouse/dbt/models/intermediate/users/int_first_contribution_to_project.sql delete mode 100644 warehouse/dbt/models/intermediate/users/int_last_contribution_to_project.sql delete mode 100644 warehouse/dbt/models/marts/events/daily/events_daily_from_artifact.sql delete mode 100644 warehouse/dbt/models/marts/events/daily/events_daily_from_collection.sql delete mode 100644 warehouse/dbt/models/marts/events/daily/events_daily_from_project.sql delete mode 100644 warehouse/dbt/models/marts/events/daily/events_daily_to_artifact_by_source.sql delete mode 100644 warehouse/dbt/models/marts/events/monthly/events_monthly_from_artifact.sql delete mode 100644 warehouse/dbt/models/marts/events/monthly/events_monthly_from_collection.sql delete mode 100644 warehouse/dbt/models/marts/events/monthly/events_monthly_from_project.sql delete mode 100644 warehouse/dbt/models/marts/events/monthly/events_monthly_to_artifact_by_source.sql delete mode 100644 warehouse/dbt/models/marts/events/weekly/events_weekly_from_artifact.sql delete mode 100644 warehouse/dbt/models/marts/events/weekly/events_weekly_from_collection.sql delete mode 100644 warehouse/dbt/models/marts/events/weekly/events_weekly_from_project.sql delete mode 100644 warehouse/dbt/models/marts/events/weekly/events_weekly_to_artifact_by_source.sql delete mode 100644 warehouse/dbt/models/marts/metrics/code_metrics_by_collection_v1.sql diff --git a/.gitignore b/.gitignore index 93a503dd6..e78628001 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ data/ *.env .env*.local *.log +logs/ coverage.json # typescript diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts.sql index 837cb379e..7392a03e1 100644 --- a/warehouse/dbt/models/intermediate/directory/int_artifacts.sql +++ b/warehouse/dbt/models/intermediate/directory/int_artifacts.sql @@ -1,47 +1,40 @@ -with ossd_artifacts as ( - select distinct - artifact_source_id, - artifact_namespace, - artifact_type, - artifact_url, - LOWER(artifact_name) as artifact_name - from {{ ref('stg_ossd__artifacts_by_project') }} -), - -from_artifacts as ( - {# `from` actor artifacts derived from all events #} - select - from_source_id as artifact_source_id, - from_namespace as artifact_namespace, - from_type as artifact_type, - "" as artifact_url, {# for now this is blank #} - LOWER(from_name) as artifact_name, - MAX(e.time) as last_used - from {{ ref('int_events') }} as e - group by 1, 2, 3, 4, 5 -), - -all_artifacts as ( +with all_artifacts as ( {# The `last_used` value is later used in this query to determine what the most _current_ name is. However, oss-directory names are considered canonical so - we will use those by setting `last_used` to be the current timestamp. + `last_used` is only relevent for `git_user` artifacts. #} select - oa.*, - CURRENT_TIMESTAMP() as last_used - from ossd_artifacts as oa + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_url, + artifact_name + from {{ ref('int_ossd__artifacts_by_project') }} union all - select * from from_artifacts + select + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_url, + MAX_BY(artifact_name, last_used) as artifact_name + from {{ ref('int_artifacts_history') }} + group by + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_url ) -select +select distinct {{ oso_artifact_id("artifact") }} as artifact_id, - artifact_source_id as artifact_source_id, - artifact_namespace as artifact_namespace, - artifact_type as artifact_type, - artifact_url as artifact_url, - TO_JSON(ARRAY_AGG(distinct artifact_name)) as artifact_names, - MAX_BY(artifact_name, last_used) as artifact_latest_name + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url from all_artifacts -group by 1, 2, 3, 4, 5 diff --git a/warehouse/dbt/models/intermediate/directory/int_artifacts_history.sql b/warehouse/dbt/models/intermediate/directory/int_artifacts_history.sql new file mode 100644 index 000000000..12b547668 --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_artifacts_history.sql @@ -0,0 +1,35 @@ +{# + Currently this only captures the history of git_users. + It does not capture git_repo naming histories. +#} + +with git_user_events as ( + {# `from` actor artifacts derived from all events #} + select + event_source as artifact_source, + from_artifact_source_id as artifact_source_id, + from_artifact_type as artifact_type, + from_artifact_namespace as artifact_namespace, + from_artifact_name as artifact_name, + "" as artifact_url, + time + from {{ ref('int_events') }} +) + +select + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_url, + artifact_name, + MAX(time) as last_used, + MIN(time) as first_used +from git_user_events +group by + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_url, + artifact_name diff --git a/warehouse/dbt/models/intermediate/directory/int_ossd__artifacts_by_project.sql b/warehouse/dbt/models/intermediate/directory/int_ossd__artifacts_by_project.sql new file mode 100644 index 000000000..ecc14ecb4 --- /dev/null +++ b/warehouse/dbt/models/intermediate/directory/int_ossd__artifacts_by_project.sql @@ -0,0 +1,178 @@ +{# + This model is responsible for generating a list of all artifacts associated with a project. + This includes repositories, npm packages, blockchain addresses, and contracts. + + Currently, the source and namespace for blockchain artifacts are the same. This may change + in the future. +#} + +with all_repos as ( + select + "GITHUB" as artifact_source, + "REPOSITORY" as artifact_type, + repos.project_id as project_id, + repos.owner as artifact_namespace, + repos.name_with_owner as artifact_name, + repos.url as artifact_url, + CAST(repos.id as STRING) as artifact_source_id + from {{ ref('int_ossd__repositories_by_project') }} as repos +), + +all_npm_raw as ( + select + "NPM" as artifact_source, + "PACKAGE" as artifact_type, + projects.project_id, + JSON_VALUE(npm.url) as artifact_source_id, + case + when + JSON_VALUE(npm.url) like "https://npmjs.com/package/%" + then SUBSTR(JSON_VALUE(npm.url), 28) + when + JSON_VALUE(npm.url) like "https://www.npmjs.com/package/%" + then SUBSTR(JSON_VALUE(npm.url), 31) + end as artifact_name, + JSON_VALUE(npm.url) as artifact_url + from + {{ ref('stg_ossd__current_projects') }} as projects + cross join + UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm +), + +all_npm as ( + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_name, + artifact_url, + SPLIT(REPLACE(artifact_name, "@", ""), "/")[SAFE_OFFSET(0)] + as artifact_namespace + from all_npm_raw +), + +ossd_blockchain as ( + select + projects.project_id, + tag as artifact_type, + network as artifact_namespace, + network as artifact_source, + JSON_VALUE(blockchains.address) as artifact_source_id, + JSON_VALUE(blockchains.address) as artifact_name, + JSON_VALUE(blockchains.address) as artifact_url + from + {{ ref('stg_ossd__current_projects') }} as projects + cross join + UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains + cross join + UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network + cross join + UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag +), + +all_deployers as ( + select + *, + "OPTIMISM" as artifact_namespace, + "OPTIMISM" as artifact_source + from {{ ref("stg_optimism__deployers") }} + union all + select + *, + "MAINNET" as artifact_namespace, + "MAINNET" as artifact_source + from {{ ref("stg_ethereum__deployers") }} + union all + select + *, + "ARBITRUM_ONE" as artifact_namespace, + "ARBITRUM_ONE" as artifact_source + from {{ ref("stg_arbitrum__deployers") }} +), + +discovered_contracts as ( + select + "CONTRACT" as artifact_type, + ob.project_id, + ad.contract_address as artifact_source_id, + ob.artifact_namespace, + ob.artifact_namespace as artifact_source, + ad.contract_address as artifact_name, + ad.contract_address as artifact_url + from ossd_blockchain as ob + inner join all_deployers as ad + on + ob.artifact_source_id = ad.deployer_address + and ob.artifact_namespace = ad.artifact_namespace + and ob.artifact_type in ("EOA", "DEPLOYER", "FACTORY") +), + +all_artifacts as ( + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + all_repos + union all + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + ossd_blockchain + union all + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + discovered_contracts + union all + select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url + from + all_npm +), + +all_unique_artifacts as ( + select distinct + project_id, + LOWER(artifact_source_id) as artifact_source_id, + UPPER(artifact_source) as artifact_source, + UPPER(artifact_type) as artifact_type, + UPPER(artifact_namespace) as artifact_namespace, + LOWER(artifact_name) as artifact_name, + LOWER(artifact_url) as artifact_url + from all_artifacts +) + +select + project_id, + artifact_source_id, + artifact_source, + artifact_type, + artifact_namespace, + artifact_name, + artifact_url, + {{ oso_artifact_id("artifact", "a") }} as `artifact_id` +from all_unique_artifacts as a diff --git a/warehouse/dbt/models/intermediate/directory/stg_ossd__projects_by_collection.sql b/warehouse/dbt/models/intermediate/directory/int_ossd__projects_by_collection.sql similarity index 87% rename from warehouse/dbt/models/intermediate/directory/stg_ossd__projects_by_collection.sql rename to warehouse/dbt/models/intermediate/directory/int_ossd__projects_by_collection.sql index 9975e7511..e6b42fbd6 100644 --- a/warehouse/dbt/models/intermediate/directory/stg_ossd__projects_by_collection.sql +++ b/warehouse/dbt/models/intermediate/directory/int_ossd__projects_by_collection.sql @@ -8,4 +8,4 @@ select from {{ ref('stg_ossd__current_collections') }} as collections cross join UNNEST(collections.projects) as project_name inner join {{ ref('stg_ossd__current_projects') }} as projects - on projects.name = project_name + on projects.project_name = project_name diff --git a/warehouse/dbt/models/intermediate/directory/stg_ossd__repositories_by_project.sql b/warehouse/dbt/models/intermediate/directory/int_ossd__repositories_by_project.sql similarity index 88% rename from warehouse/dbt/models/intermediate/directory/stg_ossd__repositories_by_project.sql rename to warehouse/dbt/models/intermediate/directory/int_ossd__repositories_by_project.sql index 32b55d2bd..a2748b463 100644 --- a/warehouse/dbt/models/intermediate/directory/stg_ossd__repositories_by_project.sql +++ b/warehouse/dbt/models/intermediate/directory/int_ossd__repositories_by_project.sql @@ -7,7 +7,7 @@ select repos.*, projects.project_id, "GITHUB" as repository_source, - {{ oso_id("'GITHUB'", "'GIT_REPOSITORY'", "CAST(repos.id AS STRING)") }} + {{ oso_id("'GITHUB'", "'REPOSITORY'", "CAST(repos.id AS STRING)") }} as artifact_id from {{ ref('stg_ossd__current_projects') }} as projects diff --git a/warehouse/dbt/models/intermediate/directory/int_project_owners.sql b/warehouse/dbt/models/intermediate/directory/int_project_owners.sql index f2b34ec5c..336258a71 100644 --- a/warehouse/dbt/models/intermediate/directory/int_project_owners.sql +++ b/warehouse/dbt/models/intermediate/directory/int_project_owners.sql @@ -9,7 +9,7 @@ with ranked_repos as ( COUNT(distinct owner) over (partition by project_id) as count_github_owners - from {{ ref('stg_ossd__repositories_by_project') }} + from {{ ref('int_ossd__repositories_by_project') }} ) select diff --git a/warehouse/dbt/models/intermediate/directory/int_projects.sql b/warehouse/dbt/models/intermediate/directory/int_projects.sql index a63dade31..ad854690f 100644 --- a/warehouse/dbt/models/intermediate/directory/int_projects.sql +++ b/warehouse/dbt/models/intermediate/directory/int_projects.sql @@ -9,7 +9,7 @@ with ranked_repos as ( COUNT(distinct owner) over (partition by project_id) as github_owners_count - from {{ ref('stg_ossd__repositories_by_project') }} + from {{ ref('int_ossd__repositories_by_project') }} ), project_owners as ( @@ -39,4 +39,4 @@ select as npm_artifact_count from {{ ref('stg_ossd__current_projects') }} as projects left join project_owners as project_owners - on projects.id = project_owners.project_id + on projects.project_id = project_owners.project_id diff --git a/warehouse/dbt/models/intermediate/directory/int_projects_by_collection.sql b/warehouse/dbt/models/intermediate/directory/int_projects_by_collection.sql index cf68bb442..06cfcb6c0 100644 --- a/warehouse/dbt/models/intermediate/directory/int_projects_by_collection.sql +++ b/warehouse/dbt/models/intermediate/directory/int_projects_by_collection.sql @@ -4,6 +4,6 @@ select projects.project_source, projects.project_namespace, projects.project_name -from {{ ref('stg_ossd__projects_by_collection') }} as projects_by_collection +from {{ ref('int_ossd__projects_by_collection') }} as projects_by_collection inner join {{ ref('stg_ossd__current_projects') }} as projects - on projects_by_collection.project_id = projects.id + on projects_by_collection.project_id = projects.project_id diff --git a/warehouse/dbt/models/intermediate/directory/int_repos_by_project.sql b/warehouse/dbt/models/intermediate/directory/int_repos_by_project.sql index d6ec700cf..49c18b1a4 100644 --- a/warehouse/dbt/models/intermediate/directory/int_repos_by_project.sql +++ b/warehouse/dbt/models/intermediate/directory/int_repos_by_project.sql @@ -1,33 +1,27 @@ with github_stats as ( select - to_id as artifact_id, + to_artifact_id as artifact_id, MIN(time) as first_commit_time, MAX(time) as last_commit_time, COUNT(distinct TIMESTAMP_TRUNC(time, day)) as days_with_commits_count, - COUNT(distinct from_id) as contributors_to_repo_count - from {{ ref('int_events_to_project') }} + COUNT(distinct from_artifact_id) as contributors_to_repo_count + from {{ ref('int_events_with_artifact_id') }} where event_type = 'COMMIT_CODE' - group by to_id + group by to_artifact_id ) select - p.project_id, - p.project_source, - p.project_namespace, - p.project_name, - r.repository_source, - r.artifact_id, - r.is_fork as repo_is_fork, - r.fork_count as repo_fork_count, - r.star_count as repo_star_count, - s.first_commit_time, - s.last_commit_time, - s.days_with_commits_count, - s.contributors_to_repo_count, - LOWER(r.name_with_owner) as repo_name_with_owner -from {{ ref('stg_ossd__repositories_by_project') }} as r -left join {{ ref('int_projects') }} as p - on r.project_id = p.project_id -left join github_stats as s - on r.artifact_id = s.artifact_id -where r.repository_source = 'GITHUB' + int_ossd__repositories_by_project.project_id, + int_ossd__repositories_by_project.artifact_id, + int_ossd__repositories_by_project.owner as repo_owner, + int_ossd__repositories_by_project.name as repo_name, + int_ossd__repositories_by_project.is_fork, + int_ossd__repositories_by_project.fork_count, + int_ossd__repositories_by_project.star_count, + github_stats.first_commit_time, + github_stats.last_commit_time, + github_stats.days_with_commits_count, + github_stats.contributors_to_repo_count +from {{ ref('int_ossd__repositories_by_project') }} +left join github_stats + on int_ossd__repositories_by_project.artifact_id = github_stats.artifact_id diff --git a/warehouse/dbt/models/intermediate/directory/stg_ossd__artifacts_by_project.sql b/warehouse/dbt/models/intermediate/directory/stg_ossd__artifacts_by_project.sql deleted file mode 100644 index 7ab619cc5..000000000 --- a/warehouse/dbt/models/intermediate/directory/stg_ossd__artifacts_by_project.sql +++ /dev/null @@ -1,116 +0,0 @@ -with all_repos as ( - select - repos.project_id as project_id, - 'GITHUB' as artifact_namespace, - 'GIT_REPOSITORY' as artifact_type, - LOWER(repos.name_with_owner) as artifact_name, - LOWER(repos.url) as artifact_url, - CAST(repos.id as STRING) as artifact_source_id - from {{ ref('stg_ossd__repositories_by_project') }} as repos - group by - 1, - 2, - 3, - 4, - 5, - 6 -), - -all_npm as ( - select - projects.project_id, - 'NPM' as artifact_namespace, - 'PACKAGE' as artifact_type, - case - when - LOWER(JSON_VALUE(npm.url)) like 'https://npmjs.com/package/%' - then SUBSTR(LOWER(JSON_VALUE(npm.url)), 28) - when - LOWER(JSON_VALUE(npm.url)) like 'https://www.npmjs.com/package/%' - then SUBSTR(LOWER(JSON_VALUE(npm.url)), 31) - end as artifact_name, - LOWER(JSON_VALUE(npm.url)) as artifact_url, - LOWER(JSON_VALUE(npm.url)) as artifact_source_id - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm -), - -ossd_blockchain as ( - select - projects.project_id, - UPPER(network) as artifact_namespace, - UPPER(tag) as artifact_type, - JSON_VALUE(blockchains.address) as artifact_name, - JSON_VALUE(blockchains.address) as artifact_url, - JSON_VALUE(blockchains.address) as artifact_source_id - from - {{ ref('stg_ossd__current_projects') }} as projects - cross join - UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains - cross join - UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network - cross join - UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag -), - -all_deployers as ( - select - *, - 'OPTIMISM' as network - from {{ ref("stg_optimism__deployers") }} - union all - select - *, - 'MAINNET' as network - from {{ ref("stg_ethereum__deployers") }} - union all - select - *, - 'ARBITRUM' as network - from {{ ref("stg_arbitrum__deployers") }} -), - -discovered_contracts as ( - select - ob.project_id, - ob.artifact_namespace, - 'CONTRACT' as artifact_type, - ad.contract_address as artifact_name, - ad.contract_address as artifact_url, - ad.contract_address as artifact_source_id - from ossd_blockchain as ob - inner join all_deployers as ad - on - ob.artifact_source_id = ad.deployer_address - and ob.artifact_namespace = ad.network - and ob.artifact_type in ('EOA', 'DEPLOYER', 'FACTORY') -), - -all_artifacts as ( - select * - from - all_repos - union all - select * - from - ossd_blockchain - union all - select * - from - discovered_contracts - union all - select * - from - all_npm -), - -all_unique_artifacts as ( - select * from all_artifacts group by 1, 2, 3, 4, 5, 6 -) - -select - a.*, - {{ oso_artifact_id("artifact", "a") }} as `artifact_id` -from all_unique_artifacts as a diff --git a/warehouse/dbt/models/intermediate/events/int_event_totals_by_project.sql b/warehouse/dbt/models/intermediate/events/int_event_totals_by_project.sql index 79724584d..fab80f025 100644 --- a/warehouse/dbt/models/intermediate/events/int_event_totals_by_project.sql +++ b/warehouse/dbt/models/intermediate/events/int_event_totals_by_project.sql @@ -1,6 +1,6 @@ {# This model calculates the total amount of events for each project and namespace - for different time intervals. The time intervals are defined in the `time_intervals` table. + for different time intervals. The time intervals are defined in the `time_intervals` tablint_events_to_project. The `aggregated_data` CTE calculates the total amount of events for each project and namespace for each time interval. The final select statement calculates the total amount of events for each project and namespace for each event type and time interval, creating a normalized @@ -8,12 +8,16 @@ #} select - e.project_id, - e.to_namespace as artifact_namespace, - t.time_interval, - CONCAT(e.event_type, '_TOTAL') as impact_metric, - SUM(e.amount) as amount -from {{ ref('int_events_to_project') }} as e -cross join {{ ref('int_time_intervals') }} as t -where DATE(e.time) >= t.start_date -group by e.project_id, e.to_namespace, t.time_interval, e.event_type + int_events_to_project.project_id, + int_time_intervals.time_interval, + int_events_to_project.event_source, + CONCAT(int_events_to_project.event_type, '_TOTAL') as impact_metric, + SUM(int_events_to_project.amount) as amount +from {{ ref('int_events_to_project') }} +cross join {{ ref('int_time_intervals') }} +where DATE(int_events_to_project.time) >= int_time_intervals.start_date +group by + int_events_to_project.project_id, + int_time_intervals.time_interval, + int_events_to_project.event_source, + int_events_to_project.event_type diff --git a/warehouse/dbt/models/intermediate/events/int_events.sql b/warehouse/dbt/models/intermediate/events/int_events.sql index 95f4a81e5..a3c71632d 100644 --- a/warehouse/dbt/models/intermediate/events/int_events.sql +++ b/warehouse/dbt/models/intermediate/events/int_events.sql @@ -4,19 +4,20 @@ SCHEMA time - type + event_type source_id + event_source - to_name - to_namespace - to_type - to_source_id + to_artifact_name + to_artifact_namespace + to_artifact_type + to_artifact_source_id - from_name - from_namespace - from_type - from_source_id + from_artifact_name + from_artifact_namespace + from_artifact_type + from_artifact_source_id amount #} @@ -32,23 +33,24 @@ }} with contract_invocation_daily_count as ( - select - cii.time, - "CONTRACT_INVOCATION_DAILY_COUNT" as `event_type`, - cii.source_id as event_source_id, - cii.to_name, - cii.to_namespace, - cii.to_type, - cii.to_source_id, - cii.from_name, - cii.from_namespace, - cii.from_type, - cii.from_source_id, - cii.tx_count as `amount` - from {{ ref('stg_dune__contract_invocation') }} as cii + select -- noqa: ST06 + time, + "CONTRACT_INVOCATION_DAILY_COUNT" as event_type, + CAST(source_id as STRING) as event_source_id, + from_namespace as event_source, + to_name, + to_namespace, + to_type, + CAST(to_source_id as STRING) as to_source_id, + from_name, + from_namespace, + from_type, + CAST(from_source_id as STRING) as from_source_id, + tx_count as amount + from {{ ref('stg_dune__contract_invocation') }} {# a bit of a hack for now to keep this table small for dev and playground #} {% if target.name in ['dev', 'playground'] %} - where cii.time >= TIMESTAMP_SUB( + where time >= TIMESTAMP_SUB( CURRENT_TIMESTAMP(), interval {{ env_var("PLAYGROUND_DAYS", '14') }} day ) @@ -56,22 +58,23 @@ with contract_invocation_daily_count as ( ), contract_invocation_daily_l2_gas_used as ( - select - cii.time, - "CONTRACT_INVOCATION_DAILY_L2_GAS_USED" as `event_type`, - cii.source_id as event_source_id, - cii.to_name, - cii.to_namespace, - cii.to_type, - cii.to_source_id, - cii.from_name, - cii.from_namespace, - cii.from_type, - cii.from_source_id, - cii.l2_gas as `amount` - from {{ ref('stg_dune__contract_invocation') }} as cii + select -- noqa: ST06 + time, + "CONTRACT_INVOCATION_DAILY_L2_GAS_USED" as event_type, + CAST(source_id as STRING) as event_source_id, + from_namespace as event_source, + to_name, + to_namespace, + to_type, + CAST(to_source_id as STRING) as to_source_id, + from_name, + from_namespace, + from_type, + CAST(from_source_id as STRING) as from_source_id, + l2_gas as amount + from {{ ref('stg_dune__contract_invocation') }} {% if target.name in ['dev', 'playground'] %} - where cii.time >= TIMESTAMP_SUB( + where time >= TIMESTAMP_SUB( CURRENT_TIMESTAMP(), interval {{ env_var("PLAYGROUND_DAYS", '14') }} day ) @@ -79,22 +82,23 @@ contract_invocation_daily_l2_gas_used as ( ), contract_invocation_daily_l1_gas_used as ( - select - cii.time, - "CONTRACT_INVOCATION_DAILY_L1_GAS_USED" as `event_type`, - cii.source_id as event_source_id, - cii.to_name, - cii.to_namespace, - cii.to_type, - cii.to_source_id, - cii.from_name, - cii.from_namespace, - cii.from_type, - cii.from_source_id, - cii.l1_gas as `amount` - from {{ ref('stg_dune__contract_invocation') }} as cii + select -- noqa: ST06 + time, + "CONTRACT_INVOCATION_DAILY_L1_GAS_USED" as event_type, + CAST(source_id as STRING) as event_source_id, + from_namespace as event_source, + to_name, + to_namespace, + to_type, + CAST(to_source_id as STRING) as to_source_id, + from_name, + from_namespace, + from_type, + CAST(from_source_id as STRING) as from_source_id, + l1_gas as amount + from {{ ref('stg_dune__contract_invocation') }} {% if target.name in ['dev', 'playground'] %} - where cii.time >= TIMESTAMP_SUB( + where time >= TIMESTAMP_SUB( CURRENT_TIMESTAMP(), interval {{ env_var("PLAYGROUND_DAYS", '14') }} day ) @@ -103,107 +107,135 @@ contract_invocation_daily_l1_gas_used as ( github_commits as ( select -- noqa: ST06 - gc.created_at as `time`, - "COMMIT_CODE" as `event_type`, - gc.push_id as `event_source_id`, - gc.repository_name as `to_name`, - "GITHUB" as `to_namespace`, - "GIT_REPOSITORY" as `to_type`, - CAST(gc.repository_id as STRING) as `to_source_id`, - COALESCE(gc.actor_login, gc.author_email) as `from_name`, - "GITHUB" as `from_namespace`, + created_at as `time`, + "COMMIT_CODE" as event_type, + CAST(push_id as STRING) as event_source_id, + "GITHUB" as event_source, + repository_name as to_name, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(0)] + as to_namespace, + "REPOSITORY" as to_type, + CAST(repository_id as STRING) as to_source_id, + COALESCE(actor_login, author_email) as from_name, + COALESCE(actor_login, author_email) as from_namespace, case - when gc.actor_login is not null then "GIT_USER" + when actor_login is not null then "GIT_USER" else "GIT_EMAIL" - end as `from_type`, + end as from_type, case - when gc.actor_login is not null then CAST(gc.actor_id as STRING) - else gc.author_email - end as `from_source_id`, - CAST(1 as FLOAT64) as `amount` - from {{ ref('stg_github__distinct_commits_resolved_mergebot') }} as gc + when actor_login is not null then CAST(actor_id as STRING) + else author_email + end as from_source_id, + CAST(1 as FLOAT64) as amount + from {{ ref('stg_github__distinct_commits_resolved_mergebot') }} ), github_issues as ( select -- noqa: ST06 - gi.created_at as `time`, - gi.type as `event_type`, - CAST(gi.id as STRING) as `event_source_id`, - gi.repository_name as `to_name`, - "GITHUB" as `to_namespace`, - "GIT_REPOSITORY" as `to_type`, - CAST(gi.repository_id as STRING) as `to_source_id`, - gi.actor_login as `from_name`, - "GITHUB" as `from_namespace`, - "GIT_USER" as `from_type`, - CAST(gi.actor_id as STRING) as `from_source_id`, - CAST(1 as FLOAT64) as `amount` - from {{ ref('stg_github__issues') }} as gi + created_at as `time`, + type as event_type, + CAST(id as STRING) as event_source_id, + "GITHUB" as event_source, + repository_name as to_name, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(0)] + as to_namespace, + "REPOSITORY" as to_type, + CAST(repository_id as STRING) as to_source_id, + actor_login as from_name, + actor_login as from_namespace, + "GIT_USER" as from_type, + CAST(actor_id as STRING) as from_source_id, + CAST(1 as FLOAT64) as amount + from {{ ref('stg_github__issues') }} ), github_pull_requests as ( select -- noqa: ST06 - gh.created_at as `time`, - gh.type as `event_type`, - CAST(gh.id as STRING) as `event_source_id`, - gh.repository_name as `to_name`, - "GITHUB" as `to_namespace`, - "GIT_REPOSITORY" as `to_type`, - CAST(gh.repository_id as STRING) as `to_source_id`, - gh.actor_login as `from_name`, - "GITHUB" as `from_namespace`, - "GIT_USER" as `from_type`, - CAST(gh.actor_id as STRING) as `from_source_id`, - CAST(1 as FLOAT64) as `amount` - from {{ ref('stg_github__pull_requests') }} as gh + created_at as `time`, + type as event_type, + CAST(id as STRING) as event_source_id, + "GITHUB" as event_source, + repository_name as to_name, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(0)] + as to_namespace, + "REPOSITORY" as to_type, + CAST(repository_id as STRING) as to_source_id, + actor_login as from_name, + actor_login as from_namespace, + "GIT_USER" as from_type, + CAST(actor_id as STRING) as from_source_id, + CAST(1 as FLOAT64) as amount + from {{ ref('stg_github__pull_requests') }} ), github_pull_request_merge_events as ( select -- noqa: ST06 - gh.created_at as `time`, - gh.type as `event_type`, - CAST(gh.id as STRING) as `event_source_id`, - gh.repository_name as `to_name`, - "GITHUB" as `to_namespace`, - "GIT_REPOSITORY" as `to_type`, - CAST(gh.repository_id as STRING) as `to_source_id`, - gh.actor_login as `from_name`, - "GITHUB" as `from_namespace`, - "GIT_USER" as `from_type`, - CAST(gh.actor_id as STRING) as `from_source_id`, - CAST(1 as FLOAT64) as `amount` - from {{ ref('stg_github__pull_request_merge_events') }} as gh + created_at as `time`, + type as event_type, + CAST(id as STRING) as event_source_id, + "GITHUB" as event_source, + repository_name as to_name, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(0)] + as to_namespace, + "REPOSITORY" as to_type, + CAST(repository_id as STRING) as to_source_id, + actor_login as from_name, + actor_login as from_namespace, + "GIT_USER" as from_type, + CAST(actor_id as STRING) as from_source_id, + CAST(1 as FLOAT64) as amount + from {{ ref('stg_github__pull_request_merge_events') }} ), github_stars_and_forks as ( select -- noqa: ST06 - gh.created_at as `time`, - gh.type as `event_type`, - CAST(gh.id as STRING) as `event_source_id`, - gh.repository_name as `to_name`, - "GITHUB" as `to_namespace`, - "GIT_REPOSITORY" as `to_type`, - CAST(gh.repository_id as STRING) as `to_source_id`, - gh.actor_login as `from_name`, - "GITHUB" as `from_namespace`, - "GIT_USER" as `from_type`, - CAST(gh.actor_id as STRING) as `from_source_id`, - CAST(1 as FLOAT64) as `amount` - from {{ ref('stg_github__stars_and_forks') }} as gh + created_at as `time`, + type as event_type, + CAST(id as STRING) as event_source_id, + "GITHUB" as event_source, + repository_name as to_name, + SPLIT(REPLACE(repository_name, "@", ""), "/")[SAFE_OFFSET(0)] + as to_namespace, + "REPOSITORY" as to_type, + CAST(repository_id as STRING) as to_source_id, + actor_login as from_name, + actor_login as from_namespace, + "GIT_USER" as from_type, + CAST(actor_id as STRING) as from_source_id, + CAST(1 as FLOAT64) as amount + from {{ ref('stg_github__stars_and_forks') }} +), + +all_events as ( + select * from contract_invocation_daily_count + union all + select * from contract_invocation_daily_l1_gas_used + union all + select * from contract_invocation_daily_l2_gas_used + union all + select * from github_commits + union all + select * from github_issues + union all + select * from github_pull_requests + union all + select * from github_pull_request_merge_events + union all + select * from github_stars_and_forks ) -select * from contract_invocation_daily_count -union all -select * from contract_invocation_daily_l1_gas_used -union all -select * from contract_invocation_daily_l2_gas_used -union all -select * from github_commits -union all -select * from github_issues -union all -select * from github_pull_requests -union all -select * from github_pull_request_merge_events -union all -select * from github_stars_and_forks +select + time, + UPPER(event_type) as event_type, + CAST(event_source_id as STRING) as event_source_id, + UPPER(event_source) as event_source, + LOWER(to_name) as to_artifact_name, + UPPER(to_namespace) as to_artifact_namespace, + UPPER(to_type) as to_artifact_type, + CAST(to_source_id as STRING) as to_artifact_source_id, + LOWER(from_name) as from_artifact_name, + UPPER(from_namespace) as from_artifact_namespace, + UPPER(from_type) as from_artifact_type, + CAST(from_source_id as STRING) as from_artifact_source_id, + CAST(amount as FLOAT64) as amount +from all_events diff --git a/warehouse/dbt/models/intermediate/events/int_events_from_collection.sql b/warehouse/dbt/models/intermediate/events/int_events_from_collection.sql deleted file mode 100644 index 5a37eb6be..000000000 --- a/warehouse/dbt/models/intermediate/events/int_events_from_collection.sql +++ /dev/null @@ -1,10 +0,0 @@ -{# - All events from a collection -#} - -select - e.*, - pbc.collection_id -from {{ ref('int_events_from_project') }} as e -left join {{ ref('int_projects_by_collection') }} as pbc - on e.project_id = pbc.project_id diff --git a/warehouse/dbt/models/intermediate/events/int_events_from_project.sql b/warehouse/dbt/models/intermediate/events/int_events_from_project.sql deleted file mode 100644 index 71e4ac2ab..000000000 --- a/warehouse/dbt/models/intermediate/events/int_events_from_project.sql +++ /dev/null @@ -1,13 +0,0 @@ -{# - All events from a project -#} - -select - e.*, - a.project_id -from {{ ref('int_events_with_artifact_id') }} as e -left join {{ ref('stg_ossd__artifacts_by_project') }} as a - on - e.from_source_id = a.artifact_source_id - and e.from_namespace = a.artifact_namespace - and e.from_type = a.artifact_type diff --git a/warehouse/dbt/models/intermediate/events/int_events_to_collection.sql b/warehouse/dbt/models/intermediate/events/int_events_to_collection.sql index feda6c28a..b14f63aa2 100644 --- a/warehouse/dbt/models/intermediate/events/int_events_to_collection.sql +++ b/warehouse/dbt/models/intermediate/events/int_events_to_collection.sql @@ -3,8 +3,14 @@ #} select - e.*, - pbc.collection_id -from {{ ref('int_events_to_project') }} as e -inner join {{ ref('int_projects_by_collection') }} as pbc - on e.project_id = pbc.project_id + int_projects_by_collection.collection_id, + int_events_to_project.project_id, + int_events_to_project.from_artifact_id, + int_events_to_project.to_artifact_id, + int_events_to_project.time, + int_events_to_project.event_source, + int_events_to_project.event_type, + int_events_to_project.amount +from {{ ref('int_events_to_project') }} +inner join {{ ref('int_projects_by_collection') }} + on int_events_to_project.project_id = int_projects_by_collection.project_id diff --git a/warehouse/dbt/models/intermediate/events/int_events_to_project.sql b/warehouse/dbt/models/intermediate/events/int_events_to_project.sql index b0ade50ce..92c34bc21 100644 --- a/warehouse/dbt/models/intermediate/events/int_events_to_project.sql +++ b/warehouse/dbt/models/intermediate/events/int_events_to_project.sql @@ -3,11 +3,15 @@ #} select - e.*, - a.project_id -from {{ ref('int_events_with_artifact_id') }} as e -inner join {{ ref('stg_ossd__artifacts_by_project') }} as a + int_ossd__artifacts_by_project.project_id, + int_events_with_artifact_id.from_artifact_id, + int_events_with_artifact_id.to_artifact_id, + int_events_with_artifact_id.time, + int_events_with_artifact_id.event_source, + int_events_with_artifact_id.event_type, + int_events_with_artifact_id.amount +from {{ ref('int_events_with_artifact_id') }} +inner join {{ ref('int_ossd__artifacts_by_project') }} on - e.to_source_id = a.artifact_source_id - and e.to_namespace = a.artifact_namespace - and e.to_type = a.artifact_type + int_events_with_artifact_id.to_artifact_id + = int_ossd__artifacts_by_project.artifact_id diff --git a/warehouse/dbt/models/intermediate/events/int_events_with_artifact_id.sql b/warehouse/dbt/models/intermediate/events/int_events_with_artifact_id.sql index 23d0666ad..3faf18aa7 100644 --- a/warehouse/dbt/models/intermediate/events/int_events_with_artifact_id.sql +++ b/warehouse/dbt/models/intermediate/events/int_events_with_artifact_id.sql @@ -1,21 +1,22 @@ -{{ +{# config( materialized='ephemeral', ) -}} +#} select time, event_type, event_source_id, - to_name, - to_namespace, - to_type, - to_source_id, - {{ oso_artifact_id("to") }} as to_id, - from_name, - from_namespace, - from_type, - from_source_id, - {{ oso_artifact_id("from") }} as from_id, + event_source, + to_artifact_name, + to_artifact_namespace, + to_artifact_type, + to_artifact_source_id, + {{ oso_artifact_id("to_artifact") }} as to_artifact_id, + from_artifact_name, + from_artifact_namespace, + from_artifact_type, + from_artifact_source_id, + {{ oso_artifact_id("from_artifact") }} as from_artifact_id, amount from {{ ref('int_events') }} diff --git a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_collection.sql b/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_collection.sql deleted file mode 100644 index 28e8f65c6..000000000 --- a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_collection.sql +++ /dev/null @@ -1,193 +0,0 @@ -{# - Summary GitHub metrics for a collection: - - first_commit_date: The date of the first commit to the collection - - last_commit_date: The date of the last commit to the collection - - repos: The number of repositories in the collection - - stars: The number of stars the collection has - - forks: The number of forks the collection has - - contributors: The number of contributors to the collection - - contributors_6_months: The number of contributors to the collection in the last 6 months - - new_contributors_6_months: The number of new contributors to the collection in the last 6 months - - avg_fulltime_devs_6_months: The number of full-time developers in the last 6 months - - avg_active_devs_6_months: The average number of active developers in the last 6 months - - commits_6_months: The number of commits to the collection in the last 6 months - - issues_opened_6_months: The number of issues opened in the collection in the last 6 months - - issues_closed_6_months: The number of issues closed in the collection in the last 6 months - - pull_requests_opened_6_months: The number of pull requests opened in the collection in the last 6 months - - pull_requests_merged_6_months: The number of pull requests merged in the collection in the last 6 months -#} - --- CTE for calculating the first and last commit date for each collection, --- ignoring forked repos -with collection_commit_dates as ( - select - pbc.collection_id, - r.repository_source, - MIN(e.time) as first_commit_date, - MAX(e.time) as last_commit_date - from {{ ref('int_events_to_project') }} as e - inner join - {{ ref('stg_ossd__repositories_by_project') }} as r - on e.project_id = r.project_id - inner join - {{ ref('stg_ossd__projects_by_collection') }} as pbc - on r.project_id = pbc.project_id - where - e.event_type = 'COMMIT_CODE' - and r.is_fork = false - group by pbc.collection_id, r.repository_source -), - --- CTE for aggregating stars, forks, and repository counts by collection -collection_repos_summary as ( - select - c.collection_id, - c.collection_source, - c.collection_namespace, - c.collection_name, - r.repository_source, - COUNT(distinct r.id) as repositories, - SUM(r.star_count) as stars, - SUM(r.fork_count) as forks - from {{ ref('stg_ossd__repositories_by_project') }} as r - inner join - {{ ref('stg_ossd__projects_by_collection') }} as pbc - on r.project_id = pbc.project_id - inner join {{ ref('collections_v1') }} as c - on pbc.collection_id = c.collection_id - where r.is_fork = false - group by - c.collection_id, - c.collection_source, - c.collection_namespace, - c.collection_name, - r.repository_source -), - --- CTE for calculating contributor counts and new contributors in the last 6 --- months at collection level -collection_contributors as ( - select - d.collection_id, - d.repository_source, - COUNT(distinct d.from_id) as contributors, - COUNT( - distinct case - when - d.bucket_month - >= CAST( - DATE_SUB(CURRENT_DATE(), interval 6 month) as TIMESTAMP - ) - then d.from_id - end - ) as contributors_6_months, - COUNT( - distinct case - when - d.bucket_month - >= CAST( - DATE_SUB(CURRENT_DATE(), interval 6 month) as TIMESTAMP - ) - and d.user_segment_type = 'FULL_TIME_DEV' - then CONCAT(d.from_id, '_', d.bucket_month) - end - ) - / 6 as avg_fulltime_devs_6_months, - COUNT( - distinct case - when - d.bucket_month - >= CAST( - DATE_SUB(CURRENT_DATE(), interval 6 month) as TIMESTAMP - ) - and d.user_segment_type in ( - 'FULL_TIME_DEV', 'PART_TIME_DEV' - ) - then CONCAT(d.from_id, '_', d.bucket_month) - end - ) - / 6 as avg_active_devs_6_months, - COUNT( - distinct case - when - d.first_contribution_date - >= CAST( - DATE_SUB(CURRENT_DATE(), interval 6 month) as TIMESTAMP - ) - then d.from_id - end - ) as new_contributors_6_months - from ( - select - from_id, - collection_id, - repository_source, - bucket_month, - user_segment_type, - MIN(bucket_month) - over (partition by from_id, collection_id) - as first_contribution_date - from {{ ref('int_active_devs_monthly_to_collection') }} - ) as d - group by d.collection_id, d.repository_source -), - --- CTE for summarizing collection activity metrics over the past 6 months -collection_activity as ( - select - pbc.collection_id, - e.to_namespace as repository_source, - SUM(case when e.event_type = 'COMMIT_CODE' then e.amount end) - as commits_6_months, - SUM(case when e.event_type = 'ISSUE_OPENED' then e.amount end) - as issues_opened_6_months, - SUM(case when e.event_type = 'ISSUE_CLOSED' then e.amount end) - as issues_closed_6_months, - SUM(case when e.event_type = 'PULL_REQUEST_OPENED' then e.amount end) - as pull_requests_opened_6_months, - SUM(case when e.event_type = 'PULL_REQUEST_MERGED' then e.amount end) - as pull_requests_merged_6_months - from {{ ref('int_events_to_project') }} as e - inner join - {{ ref('stg_ossd__projects_by_collection') }} as pbc - on e.project_id = pbc.project_id - where - e.time >= CAST(DATE_ADD(CURRENT_DATE(), interval -6 month) as TIMESTAMP) - group by pbc.collection_id, repository_source -) - --- Final query to join all the metrics together for collections -select - c.collection_id, - c.collection_source, - c.collection_namespace, - c.collection_name, - c.repository_source as `artifact_source`, - ccd.first_commit_date, - ccd.last_commit_date, - c.repositories, - c.stars, - c.forks, - cc.contributors, - cc.contributors_6_months, - cc.new_contributors_6_months, - cc.avg_fulltime_devs_6_months, - cc.avg_active_devs_6_months, - ca.commits_6_months, - ca.issues_opened_6_months, - ca.issues_closed_6_months, - ca.pull_requests_opened_6_months, - ca.pull_requests_merged_6_months -from collection_repos_summary as c -inner join collection_commit_dates as ccd - on - c.collection_id = ccd.collection_id - and c.repository_source = ccd.repository_source -inner join collection_contributors as cc - on - c.collection_id = cc.collection_id - and c.repository_source = cc.repository_source -inner join collection_activity as ca - on - c.collection_id = ca.collection_id - and c.repository_source = ca.repository_source diff --git a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql index 54c162ca0..a3c6c70d7 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_code_metrics_by_project.sql @@ -17,57 +17,58 @@ - pull_requests_merged_6_months: The number of pull requests merged in the project in the last 6 months #} -with project_repos_summary as ( +with repos as ( select project_id, - project_source, - project_namespace, - project_name, - repository_source, MIN(first_commit_time) as first_commit_date, MAX(last_commit_time) as last_commit_date, COUNT(distinct artifact_id) as repositories, - SUM(repo_star_count) as stars, - SUM(repo_fork_count) as forks + SUM(star_count) as stars, + SUM(fork_count) as forks from {{ ref('int_repos_by_project') }} --WHERE r.is_fork = false - group by - project_id, - project_source, - project_namespace, - project_name, - repository_source + group by project_id +), + +project_repos_summary as ( + select + repos.project_id, + repos.first_commit_date, + repos.last_commit_date, + repos.repositories, + repos.stars, + repos.forks, + int_projects.project_source, + int_projects.project_namespace, + int_projects.project_name, + int_projects.display_name + from repos + left join {{ ref('int_projects') }} + on repos.project_id = int_projects.project_id ), n_cte as ( select project_id, - namespace as repository_source, SUM(case when time_interval = 'ALL' then amount end) as contributors, SUM(case when time_interval = '6M' then amount end) as new_contributors_6_months from {{ ref('int_pm_new_contribs') }} - group by - project_id, - namespace + group by project_id ), c_cte as ( select project_id, - namespace as repository_source, SUM(amount) as contributors_6_months from {{ ref('int_pm_contributors') }} where time_interval = '6M' - group by - project_id, - namespace + group by project_id ), d_cte as ( select project_id, - namespace as repository_source, SUM( case when impact_metric = 'FULL_TIME_DEV_TOTAL' then amount / 6 @@ -82,15 +83,12 @@ d_cte as ( ) as avg_pts_6_months from {{ ref('int_pm_dev_months') }} where time_interval = '6M' - group by - project_id, - namespace + group by project_id ), contribs_cte as ( select n.project_id, - n.repository_source, n.contributors, n.new_contributors_6_months, c.contributors_6_months, @@ -100,17 +98,14 @@ contribs_cte as ( left join c_cte as c on n.project_id = c.project_id - and n.repository_source = c.repository_source left join d_cte as d on n.project_id = d.project_id - and n.repository_source = d.repository_source ), activity_cte as ( select project_id, - artifact_namespace, SUM( case when impact_metric = 'COMMIT_CODE_TOTAL' then amount @@ -146,18 +141,14 @@ activity_cte as ( 'PULL_REQUEST_OPENED_TOTAL', 'PULL_REQUEST_MERGED_TOTAL' ) - group by - project_id, - artifact_namespace + group by project_id ) - select p.project_id, p.project_source, p.project_namespace, p.project_name, - p.repository_source as `artifact_source`, p.first_commit_date, p.last_commit_date, p.repositories, @@ -175,10 +166,6 @@ select a.pull_requests_merged_6_months from project_repos_summary as p left join contribs_cte as c - on - p.project_id = c.project_id - and p.repository_source = c.repository_source + on p.project_id = c.project_id left join activity_cte as a - on - p.project_id = a.project_id - and p.repository_source = a.artifact_namespace + on p.project_id = a.project_id diff --git a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_collection_depr.sql b/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_collection_depr.sql deleted file mode 100644 index 70396e860..000000000 --- a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_collection_depr.sql +++ /dev/null @@ -1,220 +0,0 @@ -{# - Arbitrum Onchain Metrics - Summary onchain metrics for a collection: - - num_projects: The number of projects in the collection - - num_contracts: The number of contracts in the collection - - first_txn_date: The date of the first transaction to the collection - - total_txns: The total number of transactions to the collection - - total_l2_gas: The total L2 gas used by the collection - - total_users: The number of unique users interacting with the collection - - txns_6_months: The total number of transactions to the collection in the last 6 months - - l2_gas_6_months: The total L2 gas used by the collection in the last 6 months - - users_6_months: The number of unique users interacting with the collection in the last 6 months - - new_users: The number of users interacting with the collection for the first time in the last 3 months - - active_users: The number of active users interacting with the collection in the last 3 months - - high_frequency_users: The number of users who have made 1000+ transactions with the collection in the last 3 months - - more_active_users: The number of users who have made 10-999 transactions with the collection in the last 3 months - - less_active_users: The number of users who have made 1-9 transactions with the collection in the last 3 months - - multi_project_users: The number of users who have interacted with 3+ projects in the last 3 months -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - --- CTE for grabbing the onchain transaction data we care about, --- including project-collection mapping -with txns as ( - select - pbc.collection_id, - c.to_namespace as onchain_network, - a.project_id, - c.from_source_id as from_id, - c.l2_gas, - c.tx_count, - DATE(TIMESTAMP_TRUNC(c.time, month)) as bucket_month - from {{ ref('stg_dune__contract_invocation') }} as c - inner join {{ ref('stg_ossd__artifacts_by_project') }} as a - on c.to_source_id = a.artifact_source_id - inner join {{ ref('stg_ossd__projects_by_collection') }} as pbc - on a.project_id = pbc.project_id -), - --- CTEs for calculating all-time and 6-month collection metrics across all --- contracts -metrics_all_time as ( - select - collection_id, - onchain_network, - COUNT(distinct project_id) as total_projects, - MIN(bucket_month) as first_txn_date, - COUNT(distinct from_id) as total_users, - SUM(l2_gas) as total_l2_gas, - SUM(tx_count) as total_txns - from txns - group by collection_id, onchain_network -), - -metrics_6_months as ( - select - collection_id, - onchain_network, - COUNT(distinct from_id) as users_6_months, - SUM(l2_gas) as l2_gas_6_months, - SUM(tx_count) as txns_6_months - from txns - where bucket_month >= DATE_ADD(CURRENT_DATE(), interval -6 month) - group by collection_id, onchain_network -), - --- CTE for identifying new users to the collection in the last 3 months -new_users as ( - select - collection_id, - onchain_network, - SUM(is_new_user) as new_user_count - from ( - select - collection_id, - onchain_network, - from_id, - case - when - MIN(bucket_month) - >= DATE_ADD(CURRENT_DATE(), interval -3 month) - then 1 - else 0 - end as is_new_user - from txns - group by collection_id, onchain_network, from_id - ) - group by collection_id, onchain_network -), - --- CTEs for segmenting different types of active users based on txn volume at --- collection level -user_txns_aggregated as ( - select - collection_id, - onchain_network, - from_id, - SUM(tx_count) as total_tx_count - from txns - where bucket_month >= DATE_ADD(CURRENT_DATE(), interval -3 month) - group by collection_id, onchain_network, from_id -), - -multi_project_users as ( - select - onchain_network, - from_id, - COUNT(distinct project_id) as projects_transacted_on - from txns - where bucket_month >= DATE_ADD(CURRENT_DATE(), interval -3 month) - group by onchain_network, from_id -), - -user_segments as ( - select - collection_id, - onchain_network, - COUNT(distinct case - when user_segment = 'HIGH_FREQUENCY_USER' then from_id - end) as high_frequency_users, - COUNT(distinct case - when user_segment = 'MORE_ACTIVE_USER' then from_id - end) as more_active_users, - COUNT(distinct case - when user_segment = 'LESS_ACTIVE_USER' then from_id - end) as less_active_users, - COUNT(distinct case - when projects_transacted_on >= 3 then from_id - end) as multi_project_users - from ( - select - uta.collection_id, - uta.onchain_network, - uta.from_id, - mpu.projects_transacted_on, - case - when uta.total_tx_count >= 1000 then 'HIGH_FREQUENCY_USER' - when uta.total_tx_count >= 10 then 'MORE_ACTIVE_USER' - else 'LESS_ACTIVE_USER' - end as user_segment - from user_txns_aggregated as uta - inner join multi_project_users as mpu - on uta.from_id = mpu.from_id - ) - group by collection_id, onchain_network -), - --- CTE to count the number of contracts deployed by projects in a collection -contracts as ( - select - pbc.collection_id, - a.artifact_namespace as onchain_network, - COUNT(distinct a.artifact_source_id) as num_contracts - from {{ ref('stg_ossd__artifacts_by_project') }} as a - inner join {{ ref('stg_ossd__projects_by_collection') }} as pbc - on a.project_id = pbc.project_id - group by pbc.collection_id, onchain_network -), - -collection_by_network as ( - select - c.collection_id, - c.collection_source, - c.collection_namespace, - c.collection_name, - ctx.onchain_network - from {{ ref('collections_v1') }} as c - inner join contracts as ctx - on c.collection_id = ctx.collection_id -) - --- Final query to join all the metrics together for collections -select - c.collection_id, - c.collection_source, - c.collection_namespace, - c.collection_name, - c.onchain_network as `artifact_source`, - ma.total_projects as `total_project_count`, - co.num_contracts as `total_contract_count`, - ma.first_txn_date as `first_transaction_date`, - ma.total_txns as `total_transaction_count`, - m6.txns_6_months as `transaction_count_6_months`, - ma.total_l2_gas, - m6.l2_gas_6_months, - ma.total_users as `total_user_address_count`, - m6.users_6_months as `user_address_count_6_months`, - nu.new_user_count as `new_user_count_3_months`, - us.high_frequency_users as `high_frequency_address_count`, - us.more_active_users as `more_active_user_address_count`, - us.less_active_users as `less_active_user_address_count`, - us.multi_project_users as `multi_project_user_address_count`, - ( - us.high_frequency_users + us.more_active_users + us.less_active_users - ) as `total_active_user_address_count` -from collection_by_network as c -inner join metrics_all_time as ma - on - c.collection_id = ma.collection_id - and c.onchain_network = ma.onchain_network -inner join metrics_6_months as m6 - on - c.collection_id = m6.collection_id - and c.onchain_network = m6.onchain_network -inner join new_users as nu - on - c.collection_id = nu.collection_id - and c.onchain_network = nu.onchain_network -inner join user_segments as us - on - c.collection_id = us.collection_id - and c.onchain_network = us.onchain_network -inner join contracts as co - on - c.collection_id = co.collection_id - and c.onchain_network = co.onchain_network diff --git a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql b/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql index b49eecae4..d10e732b6 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project.sql @@ -7,144 +7,140 @@ with txns as ( select project_id, - artifact_namespace as network, - case + event_source, + SUM(case when impact_metric = 'CONTRACT_INVOCATION_DAILY_COUNT_TOTAL' and time_interval = 'ALL' then amount - end as total_txns, - case + end) as total_txns, + SUM(case when impact_metric = 'CONTRACT_INVOCATION_DAILY_L2_GAS_USED_TOTAL' and time_interval = 'ALL' then amount - end as total_l2_gas, - case + end) as total_l2_gas, + SUM(case when impact_metric = 'CONTRACT_INVOCATION_DAILY_COUNT_TOTAL' and time_interval = '6M' then amount - end as txns_6_months, - case + end) as txns_6_months, + SUM(case when impact_metric = 'CONTRACT_INVOCATION_DAILY_L2_GAS_USED_TOTAL' and time_interval = '6M' then amount - end as l2_gas_6_months + end) as l2_gas_6_months from {{ ref('int_event_totals_by_project') }} + group by + project_id, + event_source ), addresses as ( select project_id, - network, - case + event_source, + SUM(case when - impact_metric = 'NEW_ADDRESSES_TOTAL' + impact_metric = 'NEW_ADDRESSES' and time_interval = 'ALL' then amount - end as total_addresses, - case + end) as total_addresses, + SUM(case when - impact_metric = 'NEW_ADDRESSES_TOTAL' + impact_metric = 'NEW_ADDRESSES' and time_interval = '3M' then amount - end as new_addresses, - case + end) as new_addresses, + SUM(case when - impact_metric = 'RETURNING_ADDRESSES_TOTAL' + impact_metric = 'RETURNING_ADDRESSES' and time_interval = '3M' then amount - end as returning_addresses, - case + end) as returning_addresses, + SUM(case when - impact_metric = 'LOW_ACTIVITY_ADDRESSES_TOTAL' + impact_metric = 'LOW_ACTIVITY_ADDRESSES' and time_interval = '3M' then amount - end as low_activity_addresses, - case + end) as low_activity_addresses, + SUM(case when - impact_metric = 'MED_ACTIVITY_ADDRESSES_TOTAL' + impact_metric = 'MED_ACTIVITY_ADDRESSES' and time_interval = '3M' then amount - end as med_activity_addresses, - case + end) as med_activity_addresses, + SUM(case when - impact_metric = 'HIGH_ACTIVITY_ADDRESSES_TOTAL' + impact_metric = 'HIGH_ACTIVITY_ADDRESSES' and time_interval = '3M' then amount - end as high_activity_addresses + end) as high_activity_addresses from {{ ref('int_address_totals_by_project') }} + group by + project_id, + event_source ), first_txn as ( select project_id, - from_namespace as network, + event_source, MIN(bucket_day) as date_first_txn from {{ ref('int_addresses_daily_activity') }} - group by 1, 2 -), - -{# This needs to be refactored to use a new `artifact_types` table #} -{# -contracts AS ( - SELECT + group by project_id, - artifact_source AS network, - COUNT(DISTINCT artifact_name) AS num_contracts - FROM {{ ref('artifacts_by_project_v1') }} - WHERE artifact_type IN ('CONTRACT', 'FACTORY') - GROUP BY 1, 2 + event_source ), -#} -multi_project_addresses as ( +contracts as ( select project_id, - network, - COUNT( - distinct case - when - rfm_ecosystem > 2 - and rfm_recency > 3 - then from_id - end - ) as multi_project_addresses - from - {{ ref('int_address_rfm_segments_by_project') }} + artifact_namespace as event_source, + COUNT(distinct artifact_name) as num_contracts + from {{ ref('artifacts_by_project_v1') }} + where artifact_type in ('CONTRACT', 'FACTORY') group by - 1, 2 + project_id, + artifact_namespace ), metrics as ( select - f.*, - t.* except (project_id, network), - a.* except (project_id, network), - m.* except (project_id, network) + c.*, + f.* except (project_id, event_source), + t.* except (project_id, event_source), + a.* except (project_id, event_source) from + contracts as c + inner join txns as t + on + c.project_id = t.project_id + and c.event_source = t.event_source left join first_txn as f - on t.project_id = f.project_id and t.network = f.network + on + t.project_id = f.project_id + and t.event_source = f.event_source left join addresses as a - on t.project_id = a.project_id and t.network = a.network - left join - multi_project_addresses as m - on t.project_id = m.project_id and t.network = m.network + on + t.project_id = a.project_id + and t.event_source = a.event_source ) select - metrics.* except (project_id, network), - metrics.network as artifact_source, - projects.project_id, - projects.project_source, - projects.project_namespace, - projects.project_name + metrics.*, + p.project_source, + p.project_namespace, + p.project_name, + p.display_name from - {{ ref('int_projects') }} as projects -left join metrics - on projects.project_id = metrics.project_id + {{ ref('projects_v1') }} as p +left join + metrics on p.project_id = metrics.project_id +where + metrics.total_txns is not null diff --git a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project_depr.sql b/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project_depr.sql deleted file mode 100644 index 00f7f1d3b..000000000 --- a/warehouse/dbt/models/intermediate/metrics/int_onchain_metrics_by_project_depr.sql +++ /dev/null @@ -1,211 +0,0 @@ -{# - Summary onchain metrics for a project: - - project_id: The unique identifier for the project - - network: The network the project is deployed on - - num_contracts: The number of contracts in the project - - first_txn_date: The date of the first transaction to the project - - total_txns: The total number of transactions to the project - - total_l2_gas: The total L2 gas used by the project - - total_users: The number of unique users interacting with the project - - txns_6_months: The total number of transactions to the project in the last 6 months - - l2_gas_6_months: The total L2 gas used by the project in the last 6 months - - users_6_months: The number of unique users interacting with the project in the last 6 months - - new_users: The number of users interacting with the project for the first time in the last 3 months - - active_users: The number of active users interacting with the project in the last 3 months - - high_frequency_users: The number of users who have made 1000+ transactions with the project in the last 3 months - - more_active_users: The number of users who have made 10-999 transactions with the project in the last 3 months - - less_active_users: The number of users who have made 1-9 transactions with the project in the last 3 months - - multi_project_users: The number of users who have interacted with 3+ projects in the last 3 months -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - --- CTE for grabbing the onchain transaction data we care about -with txns as ( - select - a.project_id, - c.to_namespace as onchain_network, - c.from_source_id as from_id, - c.l2_gas, - c.tx_count, - DATE(TIMESTAMP_TRUNC(c.time, month)) as bucket_month - from {{ ref('stg_dune__contract_invocation') }} as c - inner join {{ ref('stg_ossd__artifacts_by_project') }} as a - on c.to_source_id = a.artifact_source_id -), - --- CTEs for calculating all time and 6 month project metrics across all --- contracts -metrics_all_time as ( - select - project_id, - onchain_network, - MIN(bucket_month) as first_txn_date, - COUNT(distinct from_id) as total_users, - SUM(l2_gas) as total_l2_gas, - SUM(tx_count) as total_txns - from txns - group by project_id, onchain_network -), - -metrics_6_months as ( - select - project_id, - onchain_network, - COUNT(distinct from_id) as users_6_months, - SUM(l2_gas) as l2_gas_6_months, - SUM(tx_count) as txns_6_months - from txns - where bucket_month >= DATE_ADD(CURRENT_DATE(), interval -6 month) - group by project_id, onchain_network -), - --- CTE for identifying new users to the project in the last 3 months -new_users as ( - select - project_id, - onchain_network, - SUM(is_new_user) as new_user_count - from ( - select - project_id, - onchain_network, - from_id, - case - when - MIN(bucket_month) - >= DATE_ADD(CURRENT_DATE(), interval -3 month) - then - 1 - end as is_new_user - from txns - group by project_id, onchain_network, from_id - ) - group by project_id, onchain_network -), - --- CTEs for segmenting different types of active users based on txn volume -user_txns_aggregated as ( - select - project_id, - onchain_network, - from_id, - SUM(tx_count) as total_tx_count - from txns - where bucket_month >= DATE_ADD(CURRENT_DATE(), interval -3 month) - group by project_id, onchain_network, from_id -), - -multi_project_users as ( - select - onchain_network, - from_id, - COUNT(distinct project_id) as projects_transacted_on - from user_txns_aggregated - group by onchain_network, from_id -), - -user_segments as ( - select - project_id, - onchain_network, - COUNT(distinct case - when user_segment = 'HIGH_FREQUENCY_USER' then from_id - end) as high_frequency_users, - COUNT(distinct case - when user_segment = 'MORE_ACTIVE_USER' then from_id - end) as more_active_users, - COUNT(distinct case - when user_segment = 'LESS_ACTIVE_USER' then from_id - end) as less_active_users, - COUNT(distinct case - when projects_transacted_on >= 3 then from_id - end) as multi_project_users - from ( - select - uta.project_id, - uta.onchain_network, - uta.from_id, - mpu.projects_transacted_on, - case - when uta.total_tx_count >= 1000 then 'HIGH_FREQUENCY_USER' - when uta.total_tx_count >= 10 then 'MORE_ACTIVE_USER' - else 'LESS_ACTIVE_USER' - end as user_segment - from user_txns_aggregated as uta - inner join multi_project_users as mpu - on uta.from_id = mpu.from_id - ) - group by project_id, onchain_network -), - --- CTE to count the number of contracts deployed by a project -contracts as ( - select - project_id, - artifact_namespace as onchain_network, - COUNT(artifact_source_id) as num_contracts - from {{ ref('stg_ossd__artifacts_by_project') }} - group by project_id, onchain_network -), - -project_by_network as ( - select - p.project_id, - p.project_source, - p.project_namespace, - p.project_name, - ctx.onchain_network - from {{ ref('projects_v1') }} as p - inner join contracts as ctx - on p.project_id = ctx.project_id -) - --- Final query to join all the metrics together -select - p.project_id, - p.project_source, - p.project_namespace, - p.project_name, - p.onchain_network as `artifact_source`, - -- TODO: add deployers owned by project - c.num_contracts as `total_contract_count`, - ma.first_txn_date as `first_transaction_date`, - ma.total_txns as `total_transaction_count`, - m6.txns_6_months as `transaction_count_6_months`, - ma.total_l2_gas, - m6.l2_gas_6_months, - ma.total_users as `total_user_address_count`, - m6.users_6_months as `user_address_count_6_months`, - nu.new_user_count as `new_user_address_count_3_months`, - us.high_frequency_users as `high_frequency_address_count`, - us.more_active_users as `more_active_user_address_count`, - us.less_active_users as `less_active_user_address_count`, - us.multi_project_users as `multi_project_user_address_count`, - ( - us.high_frequency_users + us.more_active_users + us.less_active_users - ) as `total_active_user_address_count` -from project_by_network as p -left join metrics_all_time as ma - on - p.project_id = ma.project_id - and p.onchain_network = ma.onchain_network -left join metrics_6_months as m6 - on - p.project_id = m6.project_id - and p.onchain_network = m6.onchain_network -left join new_users as nu - on - p.project_id = nu.project_id - and p.onchain_network = nu.onchain_network -left join user_segments as us - on - p.project_id = us.project_id - and p.onchain_network = us.onchain_network -left join contracts as c - on - p.project_id = c.project_id - and p.onchain_network = c.onchain_network diff --git a/warehouse/dbt/models/intermediate/metrics/int_pm_contributors.sql b/warehouse/dbt/models/intermediate/metrics/int_pm_contributors.sql deleted file mode 100644 index 8a9f9a12e..000000000 --- a/warehouse/dbt/models/intermediate/metrics/int_pm_contributors.sql +++ /dev/null @@ -1,16 +0,0 @@ -{# - -#} - -select - d.project_id, - d.repository_source as namespace, - t.time_interval, - CONCAT('CONTRIBUTORS_TOTAL') as impact_metric, - COUNT(distinct d.from_id) as amount -from {{ ref('int_devs') }} as d -cross join {{ ref('int_time_intervals') }} as t -group by - d.project_id, - d.repository_source, - t.time_interval diff --git a/warehouse/dbt/models/intermediate/metrics/int_pm_new_contribs.sql b/warehouse/dbt/models/intermediate/metrics/int_pm_new_contribs.sql deleted file mode 100644 index 57678da45..000000000 --- a/warehouse/dbt/models/intermediate/metrics/int_pm_new_contribs.sql +++ /dev/null @@ -1,19 +0,0 @@ -{# - -#} - -select - d.project_id, - d.repository_source as namespace, - t.time_interval, - CONCAT('NEW_CONTRIBUTORS_TOTAL') as impact_metric, - COUNT(distinct case - when DATE(d.date_first_contribution) >= DATE_TRUNC(t.start_date, month) - then d.from_id - end) as amount -from {{ ref('int_devs') }} as d -cross join {{ ref('int_time_intervals') }} as t -group by - d.project_id, - d.repository_source, - t.time_interval diff --git a/warehouse/dbt/models/intermediate/metrics/int_pm_busfactor_by_project.sql b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_busfactor_by_project.sql similarity index 93% rename from warehouse/dbt/models/intermediate/metrics/int_pm_busfactor_by_project.sql rename to warehouse/dbt/models/intermediate/metrics/pms/int_pm_busfactor_by_project.sql index 4d94c0fab..8e4e95310 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_pm_busfactor_by_project.sql +++ b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_busfactor_by_project.sql @@ -20,14 +20,14 @@ with all_contributions as ( select project_id, - from_id, + from_artifact_id, SUM(amount) as total_amount, DATE_TRUNC(DATE(time), month) as contribution_month from {{ ref('int_events_to_project') }} where event_type = 'COMMIT_CODE' -- CONTRIBUTION FILTER group by project_id, - from_id, + from_artifact_id, DATE_TRUNC(DATE(time), month) ), @@ -49,7 +49,7 @@ project_periods as ( aggregated_contributions as ( select c.project_id, - c.from_id, + c.from_artifact_id, SUM(c.total_amount) as total_amount, '90D' as period from contributions as c @@ -57,11 +57,11 @@ aggregated_contributions as ( where c.contribution_month > DATE_SUB(p.end_month, interval 3 month) group by c.project_id, - c.from_id + c.from_artifact_id union all select c.project_id, - c.from_id, + c.from_artifact_id, SUM(c.total_amount) as total_amount, '6M' as period from contributions as c @@ -69,11 +69,11 @@ aggregated_contributions as ( where c.contribution_month > DATE_SUB(p.end_month, interval 6 month) group by c.project_id, - c.from_id + c.from_artifact_id union all select c.project_id, - c.from_id, + c.from_artifact_id, SUM(c.total_amount) as total_amount, '1Y' as period from contributions as c @@ -81,24 +81,24 @@ aggregated_contributions as ( where c.contribution_month > DATE_SUB(p.end_month, interval 12 month) group by c.project_id, - c.from_id + c.from_artifact_id union all select c.project_id, - c.from_id, + c.from_artifact_id, SUM(c.total_amount) as total_amount, 'ALL' as period from contributions as c group by c.project_id, - c.from_id + c.from_artifact_id ), ranked_contributions as ( select project_id, period, - from_id, + from_artifact_id, total_amount, RANK() over ( diff --git a/warehouse/dbt/models/intermediate/metrics/int_pm_contracts_deployed.sql b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_contracts_deployed.sql similarity index 85% rename from warehouse/dbt/models/intermediate/metrics/int_pm_contracts_deployed.sql rename to warehouse/dbt/models/intermediate/metrics/pms/int_pm_contracts_deployed.sql index 52a257ca1..67d314200 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_pm_contracts_deployed.sql +++ b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_contracts_deployed.sql @@ -8,14 +8,13 @@ with users_by_contract as ( select - to_id as artifact_id, - COUNT(distinct from_id) as num_users + to_artifact_id as artifact_id, + COUNT(distinct from_artifact_id) as num_users from {{ ref('int_events_with_artifact_id') }} where - from_id in ( + from_artifact_id in ( select user_id from {{ ref('int_users') }} - where is_trusted = true ) and event_type = 'CONTRACT_INVOCATION_DAILY_COUNT' group by 1 diff --git a/warehouse/dbt/models/intermediate/metrics/pms/int_pm_contributors.sql b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_contributors.sql new file mode 100644 index 000000000..2f73b2e52 --- /dev/null +++ b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_contributors.sql @@ -0,0 +1,14 @@ +{# + Contributors to a project in a time interval +#} + +select + int_contributors_to_project.project_id, + int_time_intervals.time_interval, + CONCAT('CONTRIBUTORS_TOTAL') as impact_metric, + COUNT(distinct int_contributors_to_project.artifact_id) as amount +from {{ ref('int_contributors_to_project') }} +cross join {{ ref('int_time_intervals') }} +group by + int_contributors_to_project.project_id, + int_time_intervals.time_interval diff --git a/warehouse/dbt/models/intermediate/metrics/int_pm_dev_months.sql b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_dev_months.sql similarity index 92% rename from warehouse/dbt/models/intermediate/metrics/int_pm_dev_months.sql rename to warehouse/dbt/models/intermediate/metrics/pms/int_pm_dev_months.sql index b4883abc2..9a901920d 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_pm_dev_months.sql +++ b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_dev_months.sql @@ -9,7 +9,6 @@ select e.project_id, - e.repository_source as namespace, t.time_interval, CONCAT(e.user_segment_type, '_TOTAL') as impact_metric, SUM(e.amount) as amount @@ -20,6 +19,5 @@ where and DATE(e.bucket_month) < DATE_TRUNC(CURRENT_DATE(), month) group by e.project_id, - e.repository_source, t.time_interval, e.user_segment_type diff --git a/warehouse/dbt/models/intermediate/metrics/pms/int_pm_new_contribs.sql b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_new_contribs.sql new file mode 100644 index 000000000..0178859b6 --- /dev/null +++ b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_new_contribs.sql @@ -0,0 +1,30 @@ +{# + New contributors to a project in a given time interval +#} + +with contributors as ( + select + project_id, + artifact_id, + MIN(first_contribution_time) as first_contribution_time + from {{ ref('int_contributors_to_project') }} + group by + project_id, + artifact_id +) + +select + contributors.project_id, + int_time_intervals.time_interval, + 'NEW_CONTRIBUTORS_TOTAL' as impact_metric, + COUNT(distinct case + when + DATE(contributors.first_contribution_time) + >= DATE_TRUNC(int_time_intervals.start_date, month) + then contributors.artifact_id + end) as amount +from contributors +cross join {{ ref('int_time_intervals') }} +group by + contributors.project_id, + int_time_intervals.time_interval diff --git a/warehouse/dbt/models/intermediate/metrics/int_pm_trusted_transactions.sql b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_trusted_transactions.sql similarity index 82% rename from warehouse/dbt/models/intermediate/metrics/int_pm_trusted_transactions.sql rename to warehouse/dbt/models/intermediate/metrics/pms/int_pm_trusted_transactions.sql index cea7809da..629b586f9 100644 --- a/warehouse/dbt/models/intermediate/metrics/int_pm_trusted_transactions.sql +++ b/warehouse/dbt/models/intermediate/metrics/pms/int_pm_trusted_transactions.sql @@ -5,7 +5,6 @@ select e.project_id, - e.from_namespace as namespace, t.time_interval, 'TRUSTED_TRANSACTIONS_TOTAL' as impact_metric, SUM(e.amount) as amount @@ -13,13 +12,11 @@ from {{ ref('int_events_to_project') }} as e cross join {{ ref('int_time_intervals') }} as t where DATE(e.time) >= t.start_date - and e.from_id in ( + and e.from_artifact_id in ( select user_id from {{ ref('int_users') }} - where is_trusted = true ) and e.event_type = 'CONTRACT_INVOCATION_DAILY_COUNT' group by e.project_id, - e.from_namespace, t.time_interval diff --git a/warehouse/dbt/models/intermediate/users/int_active_addresses_daily_to_project.sql b/warehouse/dbt/models/intermediate/users/int_active_addresses_daily_to_project.sql index d71ca87dc..b1cb0554d 100644 --- a/warehouse/dbt/models/intermediate/users/int_active_addresses_daily_to_project.sql +++ b/warehouse/dbt/models/intermediate/users/int_active_addresses_daily_to_project.sql @@ -2,12 +2,17 @@ Daily active addresses to project by network #} - select - project_id, - from_namespace as network, - bucket_day, - address_type as user_type, - COUNT(distinct from_id) as amount + int_addresses_daily_activity.project_id, + int_artifacts.artifact_source, + int_addresses_daily_activity.bucket_day, + int_addresses_daily_activity.address_type, + COUNT(distinct int_addresses_daily_activity.from_artifact_id) as amount from {{ ref('int_addresses_daily_activity') }} -group by 1, 2, 3, 4 +left join {{ ref('int_artifacts') }} + on int_addresses_daily_activity.from_artifact_id = int_artifacts.artifact_id +group by + int_addresses_daily_activity.project_id, + int_artifacts.artifact_source, + int_addresses_daily_activity.bucket_day, + int_addresses_daily_activity.address_type diff --git a/warehouse/dbt/models/intermediate/users/int_active_addresses_monthly_to_project.sql b/warehouse/dbt/models/intermediate/users/int_active_addresses_monthly_to_project.sql index 7d0488456..6822f126d 100644 --- a/warehouse/dbt/models/intermediate/users/int_active_addresses_monthly_to_project.sql +++ b/warehouse/dbt/models/intermediate/users/int_active_addresses_monthly_to_project.sql @@ -6,8 +6,8 @@ with activity as ( select project_id, - from_namespace as network, - from_id, + event_source, + from_artifact_id, DATE_TRUNC(bucket_day, month) as bucket_month, (address_type = 'NEW') as is_new_user from {{ ref('int_addresses_daily_activity') }} @@ -16,20 +16,24 @@ with activity as ( activity_monthly as ( select project_id, - network, - from_id, + event_source, + from_artifact_id, bucket_month, MAX(is_new_user) as is_new_user from activity - group by 1, 2, 3, 4 + group by + project_id, + event_source, + from_artifact_id, + bucket_month ), user_classification as ( select project_id, - network, + event_source, bucket_month, - from_id, + from_artifact_id, case when is_new_user then 'NEW' else 'RETURNING' @@ -39,9 +43,13 @@ user_classification as ( select project_id, - network, + event_source, bucket_month, user_type, - COUNT(distinct from_id) as amount + COUNT(distinct from_artifact_id) as amount from user_classification -group by 1, 2, 3, 4 +group by + project_id, + event_source, + bucket_month, + user_type diff --git a/warehouse/dbt/models/intermediate/users/int_active_devs_monthly_to_collection.sql b/warehouse/dbt/models/intermediate/users/int_active_devs_monthly_to_collection.sql index 5b94ba61b..3704a2db9 100644 --- a/warehouse/dbt/models/intermediate/users/int_active_devs_monthly_to_collection.sql +++ b/warehouse/dbt/models/intermediate/users/int_active_devs_monthly_to_collection.sql @@ -11,8 +11,8 @@ select collection_id, - from_id, - from_namespace as repository_source, + from_artifact_id, + event_source, bucket_month, 1 as amount, case diff --git a/warehouse/dbt/models/intermediate/users/int_active_devs_monthly_to_project.sql b/warehouse/dbt/models/intermediate/users/int_active_devs_monthly_to_project.sql index 1c4c33607..83ffebe5b 100644 --- a/warehouse/dbt/models/intermediate/users/int_active_devs_monthly_to_project.sql +++ b/warehouse/dbt/models/intermediate/users/int_active_devs_monthly_to_project.sql @@ -11,8 +11,7 @@ select project_id, - from_id, - from_namespace as repository_source, + from_artifact_id, bucket_month, 1 as amount, case diff --git a/warehouse/dbt/models/intermediate/users/int_address_rfm_segments_by_project.sql b/warehouse/dbt/models/intermediate/users/int_address_rfm_segments_by_project.sql index 052060424..935ffe87a 100644 --- a/warehouse/dbt/models/intermediate/users/int_address_rfm_segments_by_project.sql +++ b/warehouse/dbt/models/intermediate/users/int_address_rfm_segments_by_project.sql @@ -16,31 +16,32 @@ with user_history as ( select - from_id, - network, + artifact_id, project_id, - count_events as total_activity, - DATE_DIFF(CURRENT_TIMESTAMP(), date_last_txn, day) + transaction_count, + DATE_DIFF(CURRENT_TIMESTAMP(), last_transaction_time, day) as days_since_last_activity - from {{ ref('int_addresses') }} + from {{ ref('int_addresses_to_project') }} ), user_stats as ( select - from_id, - network, + artifact_id, project_id, - total_activity, + transaction_count, days_since_last_activity, COUNT(distinct project_id) as project_count from user_history - group by 1, 2, 3, 4, 5 + group by + artifact_id, + project_id, + transaction_count, + days_since_last_activity ), rfm_components as ( select - from_id, - network, + artifact_id, project_id, case when days_since_last_activity < 7 then 5 @@ -50,10 +51,10 @@ rfm_components as ( else 1 end as rfm_recency, case - when total_activity = 1 then 1 - when total_activity < 10 then 2 - when total_activity < 100 then 3 - when total_activity < 1000 then 4 + when transaction_count = 1 then 1 + when transaction_count < 10 then 2 + when transaction_count < 100 then 3 + when transaction_count < 1000 then 4 else 5 end as rfm_frequency, case @@ -67,7 +68,11 @@ rfm_components as ( ) select - *, + artifact_id, + project_id, + rfm_recency, + rfm_frequency, + rfm_ecosystem, case when rfm_frequency = 5 then diff --git a/warehouse/dbt/models/intermediate/users/int_address_totals_by_project.sql b/warehouse/dbt/models/intermediate/users/int_address_totals_by_project.sql index 940a40e3f..69bb25840 100644 --- a/warehouse/dbt/models/intermediate/users/int_address_totals_by_project.sql +++ b/warehouse/dbt/models/intermediate/users/int_address_totals_by_project.sql @@ -12,24 +12,30 @@ with user_data as ( select - a.project_id, - a.from_namespace as network, - a.from_id, - a.address_type, - a.amount, - t.time_interval, - t.start_date, - DATE(a.bucket_day) as bucket_day - from {{ ref('int_addresses_daily_activity') }} as a - cross join {{ ref('int_time_intervals') }} as t + int_addresses_daily_activity.project_id, + int_addresses_daily_activity.event_source, + int_addresses_daily_activity.from_artifact_id, + int_addresses_daily_activity.address_type, + int_addresses_daily_activity.amount, + int_time_intervals.time_interval, + int_time_intervals.start_date, + DATE(int_addresses_daily_activity.bucket_day) as bucket_day + from {{ ref('int_addresses_daily_activity') }} + left join {{ ref('int_addresses_to_project') }} + on + int_addresses_daily_activity.from_artifact_id + = int_addresses_to_project.artifact_id + and int_addresses_daily_activity.project_id + = int_addresses_to_project.project_id + cross join {{ ref('int_time_intervals') }} ), user_status as ( select project_id, - network, + event_source, time_interval, - from_id, + from_artifact_id, amount, case when bucket_day >= start_date then address_type @@ -41,9 +47,9 @@ user_status as ( user_activity_levels as ( select project_id, - network, + event_source, time_interval, - from_id, + from_artifact_id, case when SUM(amount) >= {{ activity_thresh }} then 'HIGH_ACTIVITY' when @@ -54,32 +60,44 @@ user_activity_levels as ( end as activity_level from user_status where address_status != 'INACTIVE' - group by 1, 2, 3, 4 + group by + project_id, + event_source, + time_interval, + from_artifact_id ), final_users as ( select project_id, - network, + event_source, time_interval, CONCAT(address_status, '_ADDRESSES') as impact_metric, - COUNT(distinct from_id) as amount + COUNT(distinct from_artifact_id) as amount from user_status - group by 1, 2, 3, 4 + group by + project_id, + event_source, + time_interval, + address_status union all select project_id, - network, + event_source, time_interval, CONCAT(activity_level, '_ADDRESSES') as impact_metric, - COUNT(distinct from_id) as amount + COUNT(distinct from_artifact_id) as amount from user_activity_levels - group by 1, 2, 3, 4 + group by + project_id, + event_source, + time_interval, + activity_level ) select project_id, - network, + event_source, time_interval, impact_metric, amount diff --git a/warehouse/dbt/models/intermediate/users/int_addresses.sql b/warehouse/dbt/models/intermediate/users/int_addresses.sql deleted file mode 100644 index e074fd4f4..000000000 --- a/warehouse/dbt/models/intermediate/users/int_addresses.sql +++ /dev/null @@ -1,14 +0,0 @@ -{# - Address stats by project and network -#} - -select - from_id, - from_namespace as network, - project_id, - MIN(bucket_day) as date_first_txn, - MAX(bucket_day) as date_last_txn, - SUM(amount) as count_events -from {{ ref('int_user_events_daily_to_project') }} -where event_type = 'CONTRACT_INVOCATION_DAILY_COUNT' -group by 1, 2, 3 diff --git a/warehouse/dbt/models/intermediate/users/int_addresses_daily_activity.sql b/warehouse/dbt/models/intermediate/users/int_addresses_daily_activity.sql index a528a658b..c399c5b9c 100644 --- a/warehouse/dbt/models/intermediate/users/int_addresses_daily_activity.sql +++ b/warehouse/dbt/models/intermediate/users/int_addresses_daily_activity.sql @@ -3,20 +3,23 @@ #} select - e.project_id, - e.from_namespace, - e.from_id, - e.bucket_day, - e.amount, + int_events_to_project.project_id, + int_events_to_project.from_artifact_id, + int_events_to_project.event_source, + int_events_to_project.amount, + DATE_TRUNC(int_events_to_project.time, day) as bucket_day, case - when e.bucket_day = a.date_first_txn then 'NEW' + when + int_events_to_project.time + = int_addresses_to_project.first_transaction_time + then 'NEW' else 'RETURNING' end as address_type -from {{ ref('int_user_events_daily_to_project') }} as e -left join {{ ref('int_addresses') }} as a +from {{ ref('int_events_to_project') }} +left join {{ ref('int_addresses_to_project') }} on - e.from_id = a.from_id - and e.from_namespace = a.network - and e.project_id = a.project_id + int_events_to_project.from_artifact_id + = int_addresses_to_project.artifact_id + and int_events_to_project.project_id = int_addresses_to_project.project_id where - e.event_type = 'CONTRACT_INVOCATION_DAILY_COUNT' + int_events_to_project.event_type = 'CONTRACT_INVOCATION_DAILY_COUNT' diff --git a/warehouse/dbt/models/intermediate/users/int_addresses_to_project.sql b/warehouse/dbt/models/intermediate/users/int_addresses_to_project.sql new file mode 100644 index 000000000..37bdde10c --- /dev/null +++ b/warehouse/dbt/models/intermediate/users/int_addresses_to_project.sql @@ -0,0 +1,20 @@ +{# + Address stats by project +#} + +select + from_artifact_id as artifact_id, + project_id, + event_type, + MIN(time) as first_transaction_time, + MAX(time) as last_transaction_time, + SUM(amount) as transaction_count +from {{ ref('int_events_to_project') }} +where + event_type in ( + 'CONTRACT_INVOCATION_DAILY_COUNT' + ) +group by + from_artifact_id, + project_id, + event_type diff --git a/warehouse/dbt/models/intermediate/users/int_contributors_to_project.sql b/warehouse/dbt/models/intermediate/users/int_contributors_to_project.sql new file mode 100644 index 000000000..2c0edc5fd --- /dev/null +++ b/warehouse/dbt/models/intermediate/users/int_contributors_to_project.sql @@ -0,0 +1,27 @@ +{# + Contributor stats by project and event type +#} + +select + from_artifact_id as artifact_id, + project_id, + event_type, + MIN(time) as first_contribution_time, + MAX(time) as last_contribution_time, + SUM(amount) as contribution_count +from {{ ref('int_events_to_project') }} +where + event_type in ( + 'COMMIT_CODE', + 'PULL_REQUEST_OPENED', + 'PULL_REQUEST_REOPENED', + 'PULL_REQUEST_CLOSED', + 'PULL_REQUEST_MERGED', + 'ISSUE_CLOSED', + 'ISSUE_OPENED', + 'ISSUE_REOPENED' + ) +group by + from_artifact_id, + project_id, + event_type diff --git a/warehouse/dbt/models/intermediate/users/int_devs.sql b/warehouse/dbt/models/intermediate/users/int_devs.sql deleted file mode 100644 index 1797189ff..000000000 --- a/warehouse/dbt/models/intermediate/users/int_devs.sql +++ /dev/null @@ -1,24 +0,0 @@ -{# - Developer stats by project and repo source -#} - -select - from_id, - from_namespace as repository_source, - project_id, - MIN(bucket_day) as date_first_contribution, - MAX(bucket_day) as date_last_contribution, - SUM(amount) as count_events -from {{ ref('int_user_events_daily_to_project') }} -where - event_type in ( - 'COMMIT_CODE', - 'PULL_REQUEST_OPENED', - 'PULL_REQUEST_REOPENED', - 'PULL_REQUEST_CLOSED', - 'PULL_REQUEST_MERGED', - 'ISSUE_CLOSED', - 'ISSUE_OPENED', - 'ISSUE_REOPENED' - ) -group by 1, 2, 3 diff --git a/warehouse/dbt/models/intermediate/users/int_first_contribution_to_project.sql b/warehouse/dbt/models/intermediate/users/int_first_contribution_to_project.sql deleted file mode 100644 index bebe70dde..000000000 --- a/warehouse/dbt/models/intermediate/users/int_first_contribution_to_project.sql +++ /dev/null @@ -1,13 +0,0 @@ -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.event_type, - e.project_id, - e.from_id as artifact_id, - MIN(e.time) as event_time -from {{ ref('int_events_to_project') }} as e -group by e.event_type, e.project_id, e.from_id diff --git a/warehouse/dbt/models/intermediate/users/int_last_contribution_to_project.sql b/warehouse/dbt/models/intermediate/users/int_last_contribution_to_project.sql deleted file mode 100644 index 795d3c8c7..000000000 --- a/warehouse/dbt/models/intermediate/users/int_last_contribution_to_project.sql +++ /dev/null @@ -1,13 +0,0 @@ -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.event_type, - e.project_id, - e.from_id as artifact_id, - MAX(e.time) as event_time -from {{ ref('int_events_to_project') }} as e -group by e.event_type, e.project_id, e.from_id diff --git a/warehouse/dbt/models/intermediate/users/int_user_addresses.sql b/warehouse/dbt/models/intermediate/users/int_user_addresses.sql index cf96b70ea..4796a72ee 100644 --- a/warehouse/dbt/models/intermediate/users/int_user_addresses.sql +++ b/warehouse/dbt/models/intermediate/users/int_user_addresses.sql @@ -4,15 +4,15 @@ with user_data as ( select - from_id, + artifact_id, MAX(rfm_recency) as r, MAX(rfm_frequency) as f, MAX(rfm_ecosystem) as e from {{ ref('int_address_rfm_segments_by_project') }} - group by 1 + group by artifact_id ) select - from_id as user_id, + artifact_id as user_id, (r > 2 and f > 2 and e > 2) as is_trusted from user_data diff --git a/warehouse/dbt/models/intermediate/users/int_user_events_daily_to_collection.sql b/warehouse/dbt/models/intermediate/users/int_user_events_daily_to_collection.sql index ccd5467c0..f4da7abab 100644 --- a/warehouse/dbt/models/intermediate/users/int_user_events_daily_to_collection.sql +++ b/warehouse/dbt/models/intermediate/users/int_user_events_daily_to_collection.sql @@ -5,8 +5,8 @@ #} select - from_id, - from_namespace, + from_artifact_id, + event_source, collection_id, event_type, TIMESTAMP_TRUNC(time, day) as bucket_day, @@ -25,8 +25,8 @@ where 'CONTRACT_INVOCATION_DAILY_COUNT' ) group by - from_id, - from_namespace, + from_artifact_id, + event_source, collection_id, event_type, TIMESTAMP_TRUNC(time, day) diff --git a/warehouse/dbt/models/intermediate/users/int_user_events_daily_to_project.sql b/warehouse/dbt/models/intermediate/users/int_user_events_daily_to_project.sql index 71a9feb61..2ade1f736 100644 --- a/warehouse/dbt/models/intermediate/users/int_user_events_daily_to_project.sql +++ b/warehouse/dbt/models/intermediate/users/int_user_events_daily_to_project.sql @@ -5,8 +5,8 @@ #} select - from_id, - from_namespace, + from_artifact_id, + event_source, project_id, event_type, TIMESTAMP_TRUNC(time, day) as bucket_day, @@ -25,8 +25,8 @@ where 'CONTRACT_INVOCATION_DAILY_COUNT' ) group by - from_id, - from_namespace, + from_artifact_id, + event_source, project_id, event_type, TIMESTAMP_TRUNC(time, day) diff --git a/warehouse/dbt/models/intermediate/users/int_user_events_monthly_to_collection.sql b/warehouse/dbt/models/intermediate/users/int_user_events_monthly_to_collection.sql index ff0feefab..b424eb8da 100644 --- a/warehouse/dbt/models/intermediate/users/int_user_events_monthly_to_collection.sql +++ b/warehouse/dbt/models/intermediate/users/int_user_events_monthly_to_collection.sql @@ -5,8 +5,8 @@ #} select - from_id, - from_namespace, + from_artifact_id, + event_source, collection_id, event_type, TIMESTAMP_TRUNC(bucket_day, month) as bucket_month, @@ -14,8 +14,8 @@ select SUM(amount) as total_amount from {{ ref('int_user_events_daily_to_collection') }} group by - from_id, - from_namespace, + from_artifact_id, + event_source, collection_id, event_type, TIMESTAMP_TRUNC(bucket_day, month) diff --git a/warehouse/dbt/models/intermediate/users/int_user_events_monthly_to_project.sql b/warehouse/dbt/models/intermediate/users/int_user_events_monthly_to_project.sql index 17edbb468..44ac55f1b 100644 --- a/warehouse/dbt/models/intermediate/users/int_user_events_monthly_to_project.sql +++ b/warehouse/dbt/models/intermediate/users/int_user_events_monthly_to_project.sql @@ -5,8 +5,8 @@ #} select - from_id, - from_namespace, + from_artifact_id, + event_source, project_id, event_type, TIMESTAMP_TRUNC(bucket_day, month) as bucket_month, @@ -14,8 +14,8 @@ select SUM(amount) as total_amount from {{ ref('int_user_events_daily_to_project') }} group by - from_id, - from_namespace, + from_artifact_id, + event_source, project_id, event_type, TIMESTAMP_TRUNC(bucket_day, month) diff --git a/warehouse/dbt/models/intermediate/users/int_user_events_to_project_by_time_interval.sql b/warehouse/dbt/models/intermediate/users/int_user_events_to_project_by_time_interval.sql index 58800efb7..4f7555f42 100644 --- a/warehouse/dbt/models/intermediate/users/int_user_events_to_project_by_time_interval.sql +++ b/warehouse/dbt/models/intermediate/users/int_user_events_to_project_by_time_interval.sql @@ -4,18 +4,20 @@ #} select - e.from_id, - e.from_namespace, - e.project_id, - t.time_interval, - e.event_type, - SUM(e.amount) as amount -from {{ ref('int_user_events_daily_to_project') }} as e -cross join {{ ref('int_time_intervals') }} as t -where DATE(e.bucket_day) >= t.start_date + int_user_events_daily_to_project.from_artifact_id, + int_user_events_daily_to_project.event_source, + int_user_events_daily_to_project.project_id, + int_time_intervals.time_interval, + int_user_events_daily_to_project.event_type, + SUM(int_user_events_daily_to_project.amount) as amount +from {{ ref('int_user_events_daily_to_project') }} +cross join {{ ref('int_time_intervals') }} +where + DATE(int_user_events_daily_to_project.bucket_day) + >= int_time_intervals.start_date group by - e.from_id, - e.project_id, - e.from_namespace, - t.time_interval, - e.event_type + int_user_events_daily_to_project.from_artifact_id, + int_user_events_daily_to_project.project_id, + int_user_events_daily_to_project.event_source, + int_time_intervals.time_interval, + int_user_events_daily_to_project.event_type diff --git a/warehouse/dbt/models/intermediate/users/int_users_monthly_to_project.sql b/warehouse/dbt/models/intermediate/users/int_users_monthly_to_project.sql index 195b28b53..82cf2c31a 100644 --- a/warehouse/dbt/models/intermediate/users/int_users_monthly_to_project.sql +++ b/warehouse/dbt/models/intermediate/users/int_users_monthly_to_project.sql @@ -7,12 +7,16 @@ with users as ( select project_id, - from_namespace, + event_source, event_type, bucket_month, - COUNT(distinct from_id) as amount + COUNT(distinct from_artifact_id) as amount from {{ ref('int_user_events_monthly_to_project') }} - group by 1, 2, 3, 4 + group by + project_id, + event_source, + event_type, + bucket_month ) select diff --git a/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql b/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql index 4ca77c112..363c2707b 100644 --- a/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql +++ b/warehouse/dbt/models/marts/directory/artifacts_by_project_v1.sql @@ -6,13 +6,14 @@ select artifacts_by_project.artifact_id, - artifacts_by_project.artifact_source, + artifacts_by_project.artifact_source_id, artifacts_by_project.artifact_namespace, artifacts_by_project.artifact_name, + artifacts_by_project.artifact_type, projects.project_id, projects.project_source, projects.project_namespace, projects.project_name -from {{ ref('stg_ossd__artifacts_by_project') }} as artifacts_by_project +from {{ ref('int_ossd__artifacts_by_project') }} as artifacts_by_project left join {{ ref('int_projects') }} as projects on artifacts_by_project.project_id = projects.project_id diff --git a/warehouse/dbt/models/marts/directory/artifacts_v1.sql b/warehouse/dbt/models/marts/directory/artifacts_v1.sql index b58ef983c..118d359ed 100644 --- a/warehouse/dbt/models/marts/directory/artifacts_v1.sql +++ b/warehouse/dbt/models/marts/directory/artifacts_v1.sql @@ -10,7 +10,7 @@ select artifact_id, artifact_source_id, artifact_source, - artifact_namespace, artifact_name, + artifact_type, artifact_url from {{ ref('int_artifacts') }} diff --git a/warehouse/dbt/models/marts/events/daily/events_daily_from_artifact.sql b/warehouse/dbt/models/marts/events/daily/events_daily_from_artifact.sql deleted file mode 100644 index 4c9743a15..000000000 --- a/warehouse/dbt/models/marts/events/daily/events_daily_from_artifact.sql +++ /dev/null @@ -1,16 +0,0 @@ -{# - All events daily from an artifact -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.from_id as artifact_id, - e.event_type, - TIMESTAMP_TRUNC(e.time, day) as bucket_day, - SUM(e.amount) as amount -from {{ ref('int_events_from_project') }} as e -group by 1, 2, 3 diff --git a/warehouse/dbt/models/marts/events/daily/events_daily_from_collection.sql b/warehouse/dbt/models/marts/events/daily/events_daily_from_collection.sql deleted file mode 100644 index 4ef894015..000000000 --- a/warehouse/dbt/models/marts/events/daily/events_daily_from_collection.sql +++ /dev/null @@ -1,14 +0,0 @@ -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.collection_id, - e.event_type, - TIMESTAMP_TRUNC(e.time, day) as bucket_day, - SUM(e.amount) as amount -from {{ ref('int_events_from_collection') }} as e -where e.collection_id is not null -group by 1, 2, 3 diff --git a/warehouse/dbt/models/marts/events/daily/events_daily_from_project.sql b/warehouse/dbt/models/marts/events/daily/events_daily_from_project.sql deleted file mode 100644 index 0ac7c67e8..000000000 --- a/warehouse/dbt/models/marts/events/daily/events_daily_from_project.sql +++ /dev/null @@ -1,17 +0,0 @@ -{# - All events daily from a project -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.project_id, - e.event_type, - TIMESTAMP_TRUNC(e.time, day) as bucket_day, - SUM(e.amount) as amount -from {{ ref('int_events_from_project') }} as e -where e.project_id is not null -group by 1, 2, 3 diff --git a/warehouse/dbt/models/marts/events/daily/events_daily_to_artifact.sql b/warehouse/dbt/models/marts/events/daily/events_daily_to_artifact.sql index 16a665603..a12b7fdb4 100644 --- a/warehouse/dbt/models/marts/events/daily/events_daily_to_artifact.sql +++ b/warehouse/dbt/models/marts/events/daily/events_daily_to_artifact.sql @@ -8,9 +8,12 @@ }} select - e.to_id as artifact_id, - e.event_type, - TIMESTAMP_TRUNC(e.time, day) as bucket_day, - SUM(e.amount) as amount -from {{ ref('int_events_to_project') }} as e -group by 1, 2, 3 + to_artifact_id, + event_type, + TIMESTAMP_TRUNC(time, day) as bucket_day, + SUM(amount) as amount +from {{ ref('int_events_to_project') }} +group by + to_artifact_id, + event_type, + TIMESTAMP_TRUNC(time, day) diff --git a/warehouse/dbt/models/marts/events/daily/events_daily_to_artifact_by_source.sql b/warehouse/dbt/models/marts/events/daily/events_daily_to_artifact_by_source.sql deleted file mode 100644 index 7fdd05fdc..000000000 --- a/warehouse/dbt/models/marts/events/daily/events_daily_to_artifact_by_source.sql +++ /dev/null @@ -1,17 +0,0 @@ -{# - All events daily to an artifact by source -#} -{{ - config(meta = { - 'sync_to_db': False - }) -}} - -select - e.to_id as artifact_id, - e.from_namespace, - e.event_type, - TIMESTAMP_TRUNC(e.time, day) as bucket_day, - SUM(e.amount) as amount -from {{ ref('int_events_to_project') }} as e -group by 1, 2, 3, 4 diff --git a/warehouse/dbt/models/marts/events/daily/events_daily_to_collection.sql b/warehouse/dbt/models/marts/events/daily/events_daily_to_collection.sql index f96fd61a0..71ac09295 100644 --- a/warehouse/dbt/models/marts/events/daily/events_daily_to_collection.sql +++ b/warehouse/dbt/models/marts/events/daily/events_daily_to_collection.sql @@ -5,9 +5,12 @@ }} select - e.collection_id, - e.event_type, - TIMESTAMP_TRUNC(e.time, day) as bucket_day, - SUM(e.amount) as amount -from {{ ref('int_events_to_collection') }} as e -group by 1, 2, 3 + collection_id, + event_type, + TIMESTAMP_TRUNC(time, day) as bucket_day, + SUM(amount) as amount +from {{ ref('int_events_to_collection') }} +group by + collection_id, + event_type, + TIMESTAMP_TRUNC(time, day) diff --git a/warehouse/dbt/models/marts/events/daily/events_daily_to_collection_by_source.sql b/warehouse/dbt/models/marts/events/daily/events_daily_to_collection_by_source.sql index 1bd9360bf..a2d6de475 100644 --- a/warehouse/dbt/models/marts/events/daily/events_daily_to_collection_by_source.sql +++ b/warehouse/dbt/models/marts/events/daily/events_daily_to_collection_by_source.sql @@ -8,10 +8,14 @@ }} select - e.collection_id, - e.from_namespace, - e.event_type, - TIMESTAMP_TRUNC(e.time, day) as bucket_day, - SUM(e.amount) as amount -from {{ ref('int_events_to_collection') }} as e -group by 1, 2, 3, 4 + collection_id, + event_source, + event_type, + TIMESTAMP_TRUNC(time, day) as bucket_day, + SUM(amount) as amount +from {{ ref('int_events_to_collection') }} +group by + collection_id, + event_source, + event_type, + TIMESTAMP_TRUNC(time, day) diff --git a/warehouse/dbt/models/marts/events/daily/events_daily_to_project.sql b/warehouse/dbt/models/marts/events/daily/events_daily_to_project.sql index 60b580227..f4a049b01 100644 --- a/warehouse/dbt/models/marts/events/daily/events_daily_to_project.sql +++ b/warehouse/dbt/models/marts/events/daily/events_daily_to_project.sql @@ -8,9 +8,12 @@ }} select - e.project_id, - e.event_type, - TIMESTAMP_TRUNC(e.time, day) as bucket_day, - SUM(e.amount) as amount -from {{ ref('int_events_to_project') }} as e -group by 1, 2, 3 + project_id, + event_type, + TIMESTAMP_TRUNC(time, day) as bucket_day, + SUM(amount) as amount +from {{ ref('int_events_to_project') }} +group by + project_id, + event_type, + TIMESTAMP_TRUNC(time, day) diff --git a/warehouse/dbt/models/marts/events/daily/events_daily_to_project_by_source.sql b/warehouse/dbt/models/marts/events/daily/events_daily_to_project_by_source.sql index 55c67ecd9..7f7340abf 100644 --- a/warehouse/dbt/models/marts/events/daily/events_daily_to_project_by_source.sql +++ b/warehouse/dbt/models/marts/events/daily/events_daily_to_project_by_source.sql @@ -8,10 +8,14 @@ }} select - e.project_id, - e.from_namespace, - e.event_type, - TIMESTAMP_TRUNC(e.time, day) as bucket_day, - SUM(e.amount) as amount -from {{ ref('int_events_to_project') }} as e -group by 1, 2, 3, 4 + project_id, + event_source, + event_type, + TIMESTAMP_TRUNC(time, day) as bucket_day, + SUM(amount) as amount +from {{ ref('int_events_to_project') }} +group by + project_id, + event_source, + event_type, + TIMESTAMP_TRUNC(time, day) diff --git a/warehouse/dbt/models/marts/events/event_indexing_status_by_project_v1.sql b/warehouse/dbt/models/marts/events/event_indexing_status_by_project_v1.sql index 8dba10189..612936c4a 100644 --- a/warehouse/dbt/models/marts/events/event_indexing_status_by_project_v1.sql +++ b/warehouse/dbt/models/marts/events/event_indexing_status_by_project_v1.sql @@ -9,7 +9,7 @@ select projects.project_source, projects.project_namespace, projects.project_name, - events.from_namespace as `artifact_source`, + events.event_source, events.event_type, MIN(events.bucket_day) as first_event_date, MAX(events.bucket_day) as last_event_date, @@ -22,5 +22,5 @@ group by projects.project_source, projects.project_namespace, projects.project_name, - events.from_namespace, + events.event_source, events.event_type diff --git a/warehouse/dbt/models/marts/events/monthly/events_monthly_from_artifact.sql b/warehouse/dbt/models/marts/events/monthly/events_monthly_from_artifact.sql deleted file mode 100644 index 996b98820..000000000 --- a/warehouse/dbt/models/marts/events/monthly/events_monthly_from_artifact.sql +++ /dev/null @@ -1,16 +0,0 @@ -{# - All events monthly from an artifact -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.artifact_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, month) as bucket_month, - SUM(e.amount) as amount -from {{ ref('events_daily_from_artifact') }} as e -group by 1, 2, 3 diff --git a/warehouse/dbt/models/marts/events/monthly/events_monthly_from_collection.sql b/warehouse/dbt/models/marts/events/monthly/events_monthly_from_collection.sql deleted file mode 100644 index 760114db6..000000000 --- a/warehouse/dbt/models/marts/events/monthly/events_monthly_from_collection.sql +++ /dev/null @@ -1,17 +0,0 @@ -{# - All events monthly from a collection -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.collection_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, month) as bucket_month, - SUM(e.amount) as amount -from {{ ref('events_daily_from_collection') }} as e -where e.collection_id is not null -group by 1, 2, 3 diff --git a/warehouse/dbt/models/marts/events/monthly/events_monthly_from_project.sql b/warehouse/dbt/models/marts/events/monthly/events_monthly_from_project.sql deleted file mode 100644 index 02555a3a2..000000000 --- a/warehouse/dbt/models/marts/events/monthly/events_monthly_from_project.sql +++ /dev/null @@ -1,17 +0,0 @@ -{# - All events monthly from a project -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.project_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, month) as bucket_month, - SUM(e.amount) as amount -from {{ ref('events_daily_from_project') }} as e -where e.project_id is not null -group by 1, 2, 3 diff --git a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_artifact.sql b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_artifact.sql index 7cd724b13..2edc6f97a 100644 --- a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_artifact.sql +++ b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_artifact.sql @@ -8,9 +8,12 @@ }} select - e.artifact_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, month) as bucket_month, - SUM(e.amount) as amount -from {{ ref('events_daily_to_artifact') }} as e -group by 1, 2, 3 + to_artifact_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) as bucket_month, + SUM(amount) as amount +from {{ ref('events_daily_to_artifact') }} +group by + to_artifact_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) diff --git a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_artifact_by_source.sql b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_artifact_by_source.sql deleted file mode 100644 index cd5d32881..000000000 --- a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_artifact_by_source.sql +++ /dev/null @@ -1,17 +0,0 @@ -{# - All events monthly to an artifact by source -#} -{{ - config(meta = { - 'sync_to_db': False - }) -}} - -select - e.artifact_id, - e.from_namespace, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, month) as bucket_month, - SUM(e.amount) as amount -from {{ ref('events_daily_to_artifact_by_source') }} as e -group by 1, 2, 3, 4 diff --git a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_collection.sql b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_collection.sql index 0796c444f..f1bf3d7bd 100644 --- a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_collection.sql +++ b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_collection.sql @@ -8,9 +8,12 @@ }} select - e.collection_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, month) as bucket_month, - SUM(e.amount) as amount -from {{ ref('events_daily_to_collection') }} as e -group by 1, 2, 3 + collection_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) as bucket_month, + SUM(amount) as amount +from {{ ref('events_daily_to_collection') }} +group by + collection_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) diff --git a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_collection_by_source.sql b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_collection_by_source.sql index 70b3fdcd4..747fcb6f4 100644 --- a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_collection_by_source.sql +++ b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_collection_by_source.sql @@ -8,10 +8,14 @@ }} select - e.collection_id, - e.from_namespace, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, month) as bucket_month, - SUM(e.amount) as amount -from {{ ref('events_daily_to_collection_by_source') }} as e -group by 1, 2, 3, 4 + collection_id, + event_source, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) as bucket_month, + SUM(amount) as amount +from {{ ref('events_daily_to_collection_by_source') }} +group by + collection_id, + event_source, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) diff --git a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_project.sql b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_project.sql index f7dc6ea1e..80e845c5a 100644 --- a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_project.sql +++ b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_project.sql @@ -8,9 +8,12 @@ }} select - e.project_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, month) as bucket_month, - SUM(e.amount) as amount -from {{ ref('events_daily_to_project') }} as e -group by 1, 2, 3 + project_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) as bucket_month, + SUM(amount) as amount +from {{ ref('events_daily_to_project') }} +group by + project_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) diff --git a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_project_by_source.sql b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_project_by_source.sql index e73472c20..2b42e6a65 100644 --- a/warehouse/dbt/models/marts/events/monthly/events_monthly_to_project_by_source.sql +++ b/warehouse/dbt/models/marts/events/monthly/events_monthly_to_project_by_source.sql @@ -8,10 +8,14 @@ }} select - e.project_id, - e.from_namespace, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, month) as bucket_month, - SUM(e.amount) as amount -from {{ ref('events_daily_to_project_by_source') }} as e -group by 1, 2, 3, 4 + project_id, + event_source, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) as bucket_month, + SUM(amount) as amount +from {{ ref('events_daily_to_project_by_source') }} +group by + project_id, + event_source, + event_type, + TIMESTAMP_TRUNC(bucket_day, month) diff --git a/warehouse/dbt/models/marts/events/weekly/events_weekly_from_artifact.sql b/warehouse/dbt/models/marts/events/weekly/events_weekly_from_artifact.sql deleted file mode 100644 index ffbe7f1f2..000000000 --- a/warehouse/dbt/models/marts/events/weekly/events_weekly_from_artifact.sql +++ /dev/null @@ -1,16 +0,0 @@ -{# - All events weekly from an artifact -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.artifact_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, week) as bucket_week, - SUM(e.amount) as amount -from {{ ref('events_daily_from_artifact') }} as e -group by 1, 2, 3 diff --git a/warehouse/dbt/models/marts/events/weekly/events_weekly_from_collection.sql b/warehouse/dbt/models/marts/events/weekly/events_weekly_from_collection.sql deleted file mode 100644 index a059e5a92..000000000 --- a/warehouse/dbt/models/marts/events/weekly/events_weekly_from_collection.sql +++ /dev/null @@ -1,17 +0,0 @@ -{# - All events monthly from a collection -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.collection_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, week) as bucket_week, - SUM(e.amount) as amount -from {{ ref('events_daily_from_collection') }} as e -where e.collection_id is not null -group by 1, 2, 3 diff --git a/warehouse/dbt/models/marts/events/weekly/events_weekly_from_project.sql b/warehouse/dbt/models/marts/events/weekly/events_weekly_from_project.sql deleted file mode 100644 index fc4e91054..000000000 --- a/warehouse/dbt/models/marts/events/weekly/events_weekly_from_project.sql +++ /dev/null @@ -1,17 +0,0 @@ -{# - All events weekly from a project -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - e.project_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, week) as bucket_week, - SUM(e.amount) as amount -from {{ ref('events_daily_from_project') }} as e -where e.project_id is not null -group by 1, 2, 3 diff --git a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_artifact.sql b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_artifact.sql index b41c9fd46..ff31f2c50 100644 --- a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_artifact.sql +++ b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_artifact.sql @@ -8,9 +8,12 @@ }} select - e.artifact_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, week) as bucket_week, - SUM(e.amount) as amount -from {{ ref('events_daily_to_artifact') }} as e -group by 1, 2, 3 + to_artifact_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) as bucket_week, + SUM(amount) as amount +from {{ ref('events_daily_to_artifact') }} +group by + to_artifact_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) diff --git a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_artifact_by_source.sql b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_artifact_by_source.sql deleted file mode 100644 index 423167c4c..000000000 --- a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_artifact_by_source.sql +++ /dev/null @@ -1,17 +0,0 @@ -{# - All events weekly to an artifact by source -#} -{{ - config(meta = { - 'sync_to_db': False - }) -}} - -select - e.artifact_id, - e.from_namespace, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, week) as bucket_week, - SUM(e.amount) as amount -from {{ ref('events_daily_to_artifact_by_source') }} as e -group by 1, 2, 3, 4 diff --git a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_collection.sql b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_collection.sql index 6bc0cf1ce..be89decea 100644 --- a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_collection.sql +++ b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_collection.sql @@ -8,9 +8,12 @@ }} select - e.collection_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, week) as bucket_week, - SUM(e.amount) as amount -from {{ ref('events_daily_to_collection') }} as e -group by 1, 2, 3 + collection_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) as bucket_week, + SUM(amount) as amount +from {{ ref('events_daily_to_collection') }} +group by + collection_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) diff --git a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_collection_by_source.sql b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_collection_by_source.sql index d1aa7074a..c64438a56 100644 --- a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_collection_by_source.sql +++ b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_collection_by_source.sql @@ -8,10 +8,14 @@ }} select - e.collection_id, - e.from_namespace, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, week) as bucket_week, - SUM(e.amount) as amount -from {{ ref('events_daily_to_collection_by_source') }} as e -group by 1, 2, 3, 4 + collection_id, + event_source, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) as bucket_week, + SUM(amount) as amount +from {{ ref('events_daily_to_collection_by_source') }} +group by + collection_id, + event_source, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) diff --git a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_project.sql b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_project.sql index aecfcb245..d23e1dbe5 100644 --- a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_project.sql +++ b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_project.sql @@ -8,9 +8,12 @@ }} select - e.project_id, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, week) as bucket_week, - SUM(e.amount) as amount -from {{ ref('events_daily_to_project') }} as e -group by 1, 2, 3 + project_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) as bucket_week, + SUM(amount) as amount +from {{ ref('events_daily_to_project') }} +group by + project_id, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) diff --git a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_project_by_source.sql b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_project_by_source.sql index 4521b0db5..fb001cb86 100644 --- a/warehouse/dbt/models/marts/events/weekly/events_weekly_to_project_by_source.sql +++ b/warehouse/dbt/models/marts/events/weekly/events_weekly_to_project_by_source.sql @@ -3,10 +3,14 @@ #} select - e.project_id, - e.from_namespace, - e.event_type, - TIMESTAMP_TRUNC(e.bucket_day, week) as bucket_week, - SUM(e.amount) as amount -from {{ ref('events_daily_to_project_by_source') }} as e -group by 1, 2, 3, 4 + project_id, + event_source, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) as bucket_week, + SUM(amount) as amount +from {{ ref('events_daily_to_project_by_source') }} +group by + project_id, + event_source, + event_type, + TIMESTAMP_TRUNC(bucket_day, week) diff --git a/warehouse/dbt/models/marts/metrics/code_metrics_by_collection_v1.sql b/warehouse/dbt/models/marts/metrics/code_metrics_by_collection_v1.sql deleted file mode 100644 index 3a33e2b39..000000000 --- a/warehouse/dbt/models/marts/metrics/code_metrics_by_collection_v1.sql +++ /dev/null @@ -1,47 +0,0 @@ -{# - TODO: this should go into a yml file for doc generation - Summary GitHub metrics for a collection: - - first_commit_date: The date of the first commit to the collection - - last_commit_date: The date of the last commit to the collection - - repos: The number of repositories in the collection - - stars: The number of stars the collection has - - forks: The number of forks the collection has - - contributors: The number of contributors to the collection - - contributors_6_months: The number of contributors to the collection in the last 6 months - - new_contributors_6_months: The number of new contributors to the collection in the last 6 months - - avg_fulltime_devs_6_months: The number of full-time developers in the last 6 months - - avg_active_devs_6_months: The average number of active developers in the last 6 months - - commits_6_months: The number of commits to the collection in the last 6 months - - issues_opened_6_months: The number of issues opened in the collection in the last 6 months - - issues_closed_6_months: The number of issues closed in the collection in the last 6 months - - pull_requests_opened_6_months: The number of pull requests opened in the collection in the last 6 months - - pull_requests_merged_6_months: The number of pull requests merged in the collection in the last 6 months -#} -{{ - config(meta = { - 'sync_to_db': True - }) -}} - -select - collection_id, - collection_source, - collection_namespace, - collection_name, - artifact_source, - first_commit_date, - last_commit_date, - repositories as `repository_count_all`, - stars as `star_count_all`, - forks as `fork_count_all`, - contributors as `contributor_count_all`, - contributors_6_months as `contributor_count_6_months`, - new_contributors_6_months as `new_contributors_count_6_months`, - avg_fulltime_devs_6_months as `fulltime_developer_count_6_months`, - avg_active_devs_6_months as `active_developer_count_6_months`, - commits_6_months as `commit_count_6_months`, - issues_opened_6_months as `opened_issue_count_6_months`, - issues_closed_6_months as `closed_issue_count_6_months`, - pull_requests_opened_6_months as `opened_pull_request_count_6_months`, - pull_requests_merged_6_months as `merged_pull_request_count_6_months` -from {{ ref('int_code_metrics_by_collection') }} diff --git a/warehouse/dbt/models/marts/metrics/code_metrics_by_project_v1.sql b/warehouse/dbt/models/marts/metrics/code_metrics_by_project_v1.sql index d145fa998..a8fed6bdc 100644 --- a/warehouse/dbt/models/marts/metrics/code_metrics_by_project_v1.sql +++ b/warehouse/dbt/models/marts/metrics/code_metrics_by_project_v1.sql @@ -28,7 +28,6 @@ select project_source, project_namespace, project_name, - artifact_source, first_commit_date, last_commit_date, repositories as `repository_count_all`, diff --git a/warehouse/dbt/models/marts/metrics/onchain_metrics_by_project_v1.sql b/warehouse/dbt/models/marts/metrics/onchain_metrics_by_project_v1.sql index 4c7fb8cba..8e407d58b 100644 --- a/warehouse/dbt/models/marts/metrics/onchain_metrics_by_project_v1.sql +++ b/warehouse/dbt/models/marts/metrics/onchain_metrics_by_project_v1.sql @@ -29,7 +29,8 @@ select project_source, project_namespace, project_name, - artifact_source, + display_name, + event_source, date_first_txn as `first_transaction_date`, total_txns as `transaction_count_all`, txns_6_months as `transaction_count_6_months`, @@ -41,6 +42,6 @@ select high_activity_addresses as `high_activity_address_count_3_months`, med_activity_addresses as `medium_activity_address_count_3_months`, low_activity_addresses as `low_activity_address_count_3_months`, - multi_project_addresses as `multi_project_address_count_3_months`, + --multi_project_addresses as `multi_project_address_count_3_months`, (new_addresses + returning_addresses) as `address_count_3_months` from {{ ref('int_onchain_metrics_by_project') }} diff --git a/warehouse/dbt/models/staging/oss-directory/stg_ossd__schema.yml b/warehouse/dbt/models/staging/oss-directory/stg_ossd__schema.yml index 344c1c30b..2368537a2 100644 --- a/warehouse/dbt/models/staging/oss-directory/stg_ossd__schema.yml +++ b/warehouse/dbt/models/staging/oss-directory/stg_ossd__schema.yml @@ -1,7 +1,7 @@ version: 2 models: - - name: stg_ossd__artifacts_by_project + - name: int_ossd__artifacts_by_project meta: #... contributors: oso-team, tomfutago @@ -81,7 +81,7 @@ models: description: "JSON array containing blockchain address(es)" - *sync_time - - name: stg_ossd__projects_by_collection + - name: int_ossd__projects_by_collection meta: #... contributors: oso-team, tomfutago @@ -138,7 +138,7 @@ models: name: is_fork description: "is this repo a fork?" - - name: stg_ossd__repositories_by_project + - name: int_ossd__repositories_by_project meta: #... contributors: oso-team, tomfutago