Skip to content

Commit

Permalink
Add compare_queries macro
Browse files Browse the repository at this point in the history
  • Loading branch information
clrcrl committed Jul 3, 2019
1 parent 2559376 commit 3ae5c91
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 81 deletions.
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,40 @@ Arguments:
* `primary_key` (optional): The primary key of the model. Used to sort unmatched
results for row-by-row validation.

## compare_queries ([source](macros/compare_queries.sql))
Super similar to `compare_relations`, except it takes two select statements. This macro is useful when:
* You need to filter out records from one of the relations
* You need to rename or recast some columns to get them to match up
* You only want to compare a small number of column, so it's easier write the columns you want to compare, rather than the columns you want to exclude,

```sql
{# in dbt Develop #}

{% set old_fct_orders_query %}
select
id as order_id,
amount,
customer_id
from old_etl_schema.fct_orders
{% endset %}

{% set new_fct_orders_query %}
select
order_id,
amount,
customer_id
from {{ ref('fct_orders') }}
{% endset %}

{{ audit_helper.compare_queries(
a_query=old_fct_orders_query,
b_query=new_fct_orders_query,
primary_key="order_id"
) }}


```

# To-do:
* Macro to check if two models have the same structure
* Macro to check if two schemas contain the same relations
Expand Down
82 changes: 82 additions & 0 deletions macros/compare_queries.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{% macro compare_queries(a_query, b_query, primary_key=None) %}

with a as (

{{ a_query }}

),

b as (

{{ b_query }}

),

a_intersect_b as (

select * from a
{{ dbt_utils.intersect() }}
select * from b

),

a_except_b as (

select * from a
{{ dbt_utils.except() }}
select * from b

),

b_except_a as (

select * from b
{{ dbt_utils.except() }}
select * from a

),

all_records as (

select
*,
true as in_a,
true as in_b
from a_intersect_b

union all

select
*,
true as in_a,
false as in_b
from a_except_b

union all

select
*,
false as in_a,
true as in_b
from b_except_a

),

summary_stats as (
select
in_a,
in_b,
count(*) as count
from all_records

group by 1, 2
)
-- select * from all_records
-- where not (in_a and in_b)
-- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc

select * from summary_stats

order by in_a desc, in_b desc

{% endmacro %}
92 changes: 11 additions & 81 deletions macros/compare_relations.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,90 +21,20 @@

{% set check_cols_csv = check_columns | map(attribute='quoted') | join(', ') %}

{% set a_query %}
select
{{ check_cols_csv }}

with a as (
from {{ a_relation }}
{% endset %}

select
{{ check_cols_csv }}
{% set b_query %}
select
{{ check_cols_csv }}

from {{ a_relation }}
from {{ b_relation }}
{% endset %}

),

b as (

select
{{ check_cols_csv }}

from {{ b_relation }}

),

a_intersect_b as (

select * from a
{{ dbt_utils.intersect() }}
select * from b

),

a_except_b as (

select * from a
{{ dbt_utils.except() }}
select * from b

),

b_except_a as (

select * from b
{{ dbt_utils.except() }}
select * from a

),

all_records as (

select
*,
true as in_a,
true as in_b
from a_intersect_b

union all

select
*,
true as in_a,
false as in_b
from a_except_b

union all

select
*,
false as in_a,
true as in_b
from b_except_a

),

summary_stats as (
select
in_a,
in_b,
count(*) as count
from all_records

group by 1, 2
)
-- select * from all_records
-- where not (in_a and in_b)
-- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc

select * from summary_stats

order by in_a desc, in_b desc
{{ audit_helper.compare_queries(a_query, b_query) }}

{% endmacro %}

0 comments on commit 3ae5c91

Please sign in to comment.