Skip to content

Commit

Permalink
changed macro into model and built first fct_ model to check for dag …
Browse files Browse the repository at this point in the history
…issues
  • Loading branch information
graciegoheen committed Jan 11, 2022
1 parent f3aca96 commit 2c5672a
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 93 deletions.
93 changes: 0 additions & 93 deletions macros/get_dependencies.sql

This file was deleted.

9 changes: 9 additions & 0 deletions models/audit/dag/dag.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
version: 2

models:
- name: stg_dag_relationships
description: "This table shows one record for every node and each of its downstream children (including itself)."
- name: fct_source_fanout
description: "This table shows each parent/child relationship where a source is the direct parent of multiple nodes in the DAG."
tests:
- is_empty
28 changes: 28 additions & 0 deletions models/audit/dag/fct_source_fanout.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-- this model finds cases where a source is used in multiple direct downstream models
with direct_source_relationships as (
select
*
from {{ ref('stg_dag_relationships') }}
where distance = 1
and parent_type = 'source'
),

source_fanout as (
select
parent,
count(*)
from direct_source_relationships
group by 1
having count(*) > 1
),

final as (
select
direct_source_relationships.*
from direct_source_relationships
inner join source_fanout
on direct_source_relationships.parent = source_fanout.parent
order by direct_source_relationships.parent
)

select * from final
95 changes: 95 additions & 0 deletions models/audit/dag/stg_dag_relationships.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
-- TO DO: only include ENABLED nodes
-- TO DO: exclude models that are part of the audit package
-- can use package_name attribute in final version
-- TO DO: fix whitespace

-- one record for each node in the DAG (models and sources) and its direct parent
with direct_relationships as (

{%- for model in graph.nodes.values() | selectattr("resource_type", "equalto", "model") -%}
{%- set outer_loop = loop -%}

{%- if model.depends_on.nodes|length == 0 -%}

select
'{{model.name}}' as node,
'{{model.unique_id}}' as node_id,
'model' as node_type,
NULL as direct_parent_id

{%- else -%}

{%- for model_parent in model.depends_on.nodes -%}

select
'{{model.name}}' as node,
'{{model.unique_id}}' as node_id,
'model' as node_type,
'{{model_parent}}' as direct_parent_id
{% if not loop.last %}union all{% endif %}

{% endfor -%}

{%- endif %}

{% if not outer_loop.last %}union all{% endif %}

{% endfor -%}

{%- for source in graph.sources.values() -%}

{% if loop.first and graph.nodes|length > 0 %}union all{% endif %}
select
'{{source.source_name}}.{{source.name}}' as node,
'{{source.unique_id}}' as node_id,
'source' as node_type,
NULL as direct_parent_id
{% if not loop.last %}union all{% endif %}

{% endfor -%}

),

-- recursive CTE
-- one record for every node and each of its downstream children (including itself)
all_relationships as (
-- anchor
select distinct
node as parent,
node_id as parent_id,
node_type as parent_type,
node as child,
node_id as child_id,
0 as distance,
array_construct(child) as path {# snowflake-specific, but helpful for troubleshooting right now #}
from direct_relationships
-- where direct_parent is null {# optional lever to change filtering of anchor clause to only include root nodes #}

union all

-- recursive clause
select
all_relationships.parent as parent,
all_relationships.parent_id as parent_id,
all_relationships.parent_type as parent_type,
direct_relationships.node as child,
direct_relationships.node_id as child_id,
all_relationships.distance+1 as distance,
array_append(all_relationships.path, direct_relationships.node) as path
from direct_relationships
inner join all_relationships
on all_relationships.child_id = direct_relationships.direct_parent_id
),

final as (
select
parent,
parent_type,
child,
distance,
path
from all_relationships
)

select * from final
order by parent, distance
11 changes: 11 additions & 0 deletions tests/generic/test_is_empty.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{% test is_empty(model) %}

{{ config (
severity = 'warn',
fail_calc = "n_records"
) }}

select count(*) as n_records
from {{ model }}

{% endtest %}

0 comments on commit 2c5672a

Please sign in to comment.