Skip to content

Commit

Permalink
Updated get_dependencies macro to only show paths from root nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
graciegoheen committed Jan 10, 2022
1 parent 41cf9e8 commit abb228e
Showing 1 changed file with 51 additions and 24 deletions.
75 changes: 51 additions & 24 deletions macros/get_dependencies.sql
Original file line number Diff line number Diff line change
@@ -1,63 +1,90 @@
{% macro get_dependencies() %}

{%- set sql -%}
-- one record for each direct parent & child pair of models in DAG
with direct_parents as (
-- one record for each node in the DAG (models and sources) and its direct parent
with direct_relationships as (

{%- for model in graph.nodes.values() | selectattr("resource_type", "equalto", "model") -%} {# what about sources with no children?? #}
{%- for model in graph.nodes.values() | selectattr("resource_type", "equalto", "model") -%}
{%- set outer_loop = loop -%}

{%- for model_parent in model.refs -%}
{%- if model.depends_on.nodes|length == 0 -%}

select
'{{model.name}}' as model,
'{{model_parent.0}}' as parent,
'model' as parent_type
{% if not outer_loop.last %}union all{% endif %}
NULL as direct_parent,
NULL as direct_parent_type {# if this field still useful? if not could simply by looping through depends_on.nodes instead of refs & sources seperately #}

{% endfor -%}
{%- else -%}

{%- for source_parent in model.sources -%}
{%- for model_parent in model.refs -%}

select
'{{model.name}}' as model,
'{{model_parent.0}}' as direct_parent,
'model' as direct_parent_type
{% if not loop.last %}union all{% endif %}

{% endfor -%}

{%- for source_parent in model.sources -%}

{% if loop.first and model.refs|length > 0 %}union all{% endif %}
select
'{{model.name}}' as model,
'{{source_parent.0}}.{{source_parent.1}}' as direct_parent,
'source' as direct_parent_type
{% if not loop.last %}union all{% endif %}

{% endfor -%}

{%- endif -%}

select
'{{model.name}}' as model,
'{{source_parent.0}}.{{source_parent.1}}' as parent,
'source' as parent_type
{% if not outer_loop.last %}union all{% endif %}

{% endfor -%}
{% endfor -%}

{%- for source in graph.sources.values() -%}

{%- endfor -%}
{% if loop.first and graph.nodes|length > 0 %}union all{% endif %}
select
'{{source.source_name}}.{{source.name}}' as model,
NULL as direct_parent,
NULL as direct_parent_type
{% if not loop.last %}union all{% endif %}

{% endfor -%}

),

-- recursive CTE
-- one record for every root node and each of its downstream children (including itself)
all_relationships as (

-- anchor
select
parent as parent,
model as parent,
model as child,
1 as distance,
array_construct(parent, child) as path -- snowflake-specific, but helpful for troubleshooting right now
from direct_parents
--where parent_type = 'source'
0 as distance,
array_construct(child) as path {# snowflake-specific, but helpful for troubleshooting right now #}
from direct_relationships
where direct_parent is null

union all

-- recursive clause
select
all_relationships.parent as parent,
direct_parents.model as child,
direct_relationships.model as child,
all_relationships.distance+1 as distance,
array_append(all_relationships.path, direct_parents.model) as path
from direct_parents
array_append(all_relationships.path, direct_relationships.model) as path
from direct_relationships
inner join all_relationships
on all_relationships.child = direct_parents.parent
on all_relationships.child = direct_relationships.direct_parent

)

select * from all_relationships
order by parent, distance

{%- endset -%}

Expand Down

0 comments on commit abb228e

Please sign in to comment.