forked from opensource-observer/oso
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
reconnect int models to new schema and naming conventions (opensource…
…-observer#1363) * refactor: int models in directory * fix artifact_name references * fix: naming for repos by project table * big fixes to artifact source and namespace fields * fix: int events union * fix linting error * (broken) fixes to namespaces and event source in int models * fix invocation of macro * complete int model refactor * remove log file --------- Co-authored-by: Reuven V. Gonzales <[email protected]>
- Loading branch information
Showing
85 changed files
with
999 additions
and
1,694 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ data/ | |
*.env | ||
.env*.local | ||
*.log | ||
logs/ | ||
coverage.json | ||
|
||
# typescript | ||
|
67 changes: 30 additions & 37 deletions
67
warehouse/dbt/models/intermediate/directory/int_artifacts.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,47 +1,40 @@ | ||
with ossd_artifacts as ( | ||
select distinct | ||
artifact_source_id, | ||
artifact_namespace, | ||
artifact_type, | ||
artifact_url, | ||
LOWER(artifact_name) as artifact_name | ||
from {{ ref('stg_ossd__artifacts_by_project') }} | ||
), | ||
|
||
from_artifacts as ( | ||
{# `from` actor artifacts derived from all events #} | ||
select | ||
from_source_id as artifact_source_id, | ||
from_namespace as artifact_namespace, | ||
from_type as artifact_type, | ||
"" as artifact_url, {# for now this is blank #} | ||
LOWER(from_name) as artifact_name, | ||
MAX(e.time) as last_used | ||
from {{ ref('int_events') }} as e | ||
group by 1, 2, 3, 4, 5 | ||
), | ||
|
||
all_artifacts as ( | ||
with all_artifacts as ( | ||
{# | ||
The `last_used` value is later used in this query to determine what the most | ||
_current_ name is. However, oss-directory names are considered canonical so | ||
we will use those by setting `last_used` to be the current timestamp. | ||
`last_used` is only relevent for `git_user` artifacts. | ||
#} | ||
select | ||
oa.*, | ||
CURRENT_TIMESTAMP() as last_used | ||
from ossd_artifacts as oa | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_url, | ||
artifact_name | ||
from {{ ref('int_ossd__artifacts_by_project') }} | ||
union all | ||
select * from from_artifacts | ||
select | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_url, | ||
MAX_BY(artifact_name, last_used) as artifact_name | ||
from {{ ref('int_artifacts_history') }} | ||
group by | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_url | ||
) | ||
|
||
select | ||
select distinct | ||
{{ oso_artifact_id("artifact") }} as artifact_id, | ||
artifact_source_id as artifact_source_id, | ||
artifact_namespace as artifact_namespace, | ||
artifact_type as artifact_type, | ||
artifact_url as artifact_url, | ||
TO_JSON(ARRAY_AGG(distinct artifact_name)) as artifact_names, | ||
MAX_BY(artifact_name, last_used) as artifact_latest_name | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url | ||
from all_artifacts | ||
group by 1, 2, 3, 4, 5 |
35 changes: 35 additions & 0 deletions
35
warehouse/dbt/models/intermediate/directory/int_artifacts_history.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
{# | ||
Currently this only captures the history of git_users. | ||
It does not capture git_repo naming histories. | ||
#} | ||
|
||
with git_user_events as ( | ||
{# `from` actor artifacts derived from all events #} | ||
select | ||
event_source as artifact_source, | ||
from_artifact_source_id as artifact_source_id, | ||
from_artifact_type as artifact_type, | ||
from_artifact_namespace as artifact_namespace, | ||
from_artifact_name as artifact_name, | ||
"" as artifact_url, | ||
time | ||
from {{ ref('int_events') }} | ||
) | ||
|
||
select | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_url, | ||
artifact_name, | ||
MAX(time) as last_used, | ||
MIN(time) as first_used | ||
from git_user_events | ||
group by | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_url, | ||
artifact_name |
178 changes: 178 additions & 0 deletions
178
warehouse/dbt/models/intermediate/directory/int_ossd__artifacts_by_project.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
{# | ||
This model is responsible for generating a list of all artifacts associated with a project. | ||
This includes repositories, npm packages, blockchain addresses, and contracts. | ||
|
||
Currently, the source and namespace for blockchain artifacts are the same. This may change | ||
in the future. | ||
#} | ||
|
||
with all_repos as ( | ||
select | ||
"GITHUB" as artifact_source, | ||
"REPOSITORY" as artifact_type, | ||
repos.project_id as project_id, | ||
repos.owner as artifact_namespace, | ||
repos.name_with_owner as artifact_name, | ||
repos.url as artifact_url, | ||
CAST(repos.id as STRING) as artifact_source_id | ||
from {{ ref('int_ossd__repositories_by_project') }} as repos | ||
), | ||
|
||
all_npm_raw as ( | ||
select | ||
"NPM" as artifact_source, | ||
"PACKAGE" as artifact_type, | ||
projects.project_id, | ||
JSON_VALUE(npm.url) as artifact_source_id, | ||
case | ||
when | ||
JSON_VALUE(npm.url) like "https://npmjs.com/package/%" | ||
then SUBSTR(JSON_VALUE(npm.url), 28) | ||
when | ||
JSON_VALUE(npm.url) like "https://www.npmjs.com/package/%" | ||
then SUBSTR(JSON_VALUE(npm.url), 31) | ||
end as artifact_name, | ||
JSON_VALUE(npm.url) as artifact_url | ||
from | ||
{{ ref('stg_ossd__current_projects') }} as projects | ||
cross join | ||
UNNEST(JSON_QUERY_ARRAY(projects.npm)) as npm | ||
), | ||
|
||
all_npm as ( | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_name, | ||
artifact_url, | ||
SPLIT(REPLACE(artifact_name, "@", ""), "/")[SAFE_OFFSET(0)] | ||
as artifact_namespace | ||
from all_npm_raw | ||
), | ||
|
||
ossd_blockchain as ( | ||
select | ||
projects.project_id, | ||
tag as artifact_type, | ||
network as artifact_namespace, | ||
network as artifact_source, | ||
JSON_VALUE(blockchains.address) as artifact_source_id, | ||
JSON_VALUE(blockchains.address) as artifact_name, | ||
JSON_VALUE(blockchains.address) as artifact_url | ||
from | ||
{{ ref('stg_ossd__current_projects') }} as projects | ||
cross join | ||
UNNEST(JSON_QUERY_ARRAY(projects.blockchain)) as blockchains | ||
cross join | ||
UNNEST(JSON_VALUE_ARRAY(blockchains.networks)) as network | ||
cross join | ||
UNNEST(JSON_VALUE_ARRAY(blockchains.tags)) as tag | ||
), | ||
|
||
all_deployers as ( | ||
select | ||
*, | ||
"OPTIMISM" as artifact_namespace, | ||
"OPTIMISM" as artifact_source | ||
from {{ ref("stg_optimism__deployers") }} | ||
union all | ||
select | ||
*, | ||
"MAINNET" as artifact_namespace, | ||
"MAINNET" as artifact_source | ||
from {{ ref("stg_ethereum__deployers") }} | ||
union all | ||
select | ||
*, | ||
"ARBITRUM_ONE" as artifact_namespace, | ||
"ARBITRUM_ONE" as artifact_source | ||
from {{ ref("stg_arbitrum__deployers") }} | ||
), | ||
|
||
discovered_contracts as ( | ||
select | ||
"CONTRACT" as artifact_type, | ||
ob.project_id, | ||
ad.contract_address as artifact_source_id, | ||
ob.artifact_namespace, | ||
ob.artifact_namespace as artifact_source, | ||
ad.contract_address as artifact_name, | ||
ad.contract_address as artifact_url | ||
from ossd_blockchain as ob | ||
inner join all_deployers as ad | ||
on | ||
ob.artifact_source_id = ad.deployer_address | ||
and ob.artifact_namespace = ad.artifact_namespace | ||
and ob.artifact_type in ("EOA", "DEPLOYER", "FACTORY") | ||
), | ||
|
||
all_artifacts as ( | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url | ||
from | ||
all_repos | ||
union all | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url | ||
from | ||
ossd_blockchain | ||
union all | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url | ||
from | ||
discovered_contracts | ||
union all | ||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url | ||
from | ||
all_npm | ||
), | ||
|
||
all_unique_artifacts as ( | ||
select distinct | ||
project_id, | ||
LOWER(artifact_source_id) as artifact_source_id, | ||
UPPER(artifact_source) as artifact_source, | ||
UPPER(artifact_type) as artifact_type, | ||
UPPER(artifact_namespace) as artifact_namespace, | ||
LOWER(artifact_name) as artifact_name, | ||
LOWER(artifact_url) as artifact_url | ||
from all_artifacts | ||
) | ||
|
||
select | ||
project_id, | ||
artifact_source_id, | ||
artifact_source, | ||
artifact_type, | ||
artifact_namespace, | ||
artifact_name, | ||
artifact_url, | ||
{{ oso_artifact_id("artifact", "a") }} as `artifact_id` | ||
from all_unique_artifacts as a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.