From 3ae5c91646b9eab410d1a0b24c5e983b50342549 Mon Sep 17 00:00:00 2001 From: Claire Carroll Date: Mon, 1 Jul 2019 15:16:33 -0400 Subject: [PATCH] Add compare_queries macro --- README.md | 34 +++++++++++++ macros/compare_queries.sql | 82 ++++++++++++++++++++++++++++++++ macros/compare_relations.sql | 92 +++++------------------------------- 3 files changed, 127 insertions(+), 81 deletions(-) create mode 100644 macros/compare_queries.sql diff --git a/README.md b/README.md index 6cd0b189..c4ff96e9 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,40 @@ Arguments: * `primary_key` (optional): The primary key of the model. Used to sort unmatched results for row-by-row validation. +## compare_queries ([source](macros/compare_queries.sql)) +Super similar to `compare_relations`, except it takes two select statements. This macro is useful when: +* You need to filter out records from one of the relations +* You need to rename or recast some columns to get them to match up +* You only want to compare a small number of column, so it's easier write the columns you want to compare, rather than the columns you want to exclude, + +```sql +{# in dbt Develop #} + +{% set old_fct_orders_query %} + select + id as order_id, + amount, + customer_id + from old_etl_schema.fct_orders +{% endset %} + +{% set new_fct_orders_query %} + select + order_id, + amount, + customer_id + from {{ ref('fct_orders') }} +{% endset %} + +{{ audit_helper.compare_queries( + a_query=old_fct_orders_query, + b_query=new_fct_orders_query, + primary_key="order_id" +) }} + + +``` + # To-do: * Macro to check if two models have the same structure * Macro to check if two schemas contain the same relations diff --git a/macros/compare_queries.sql b/macros/compare_queries.sql new file mode 100644 index 00000000..64db9c37 --- /dev/null +++ b/macros/compare_queries.sql @@ -0,0 +1,82 @@ +{% macro compare_queries(a_query, b_query, primary_key=None) %} + +with a as ( + + {{ a_query }} + +), + +b as ( + + {{ b_query }} + +), + +a_intersect_b as ( + + select * from a + {{ dbt_utils.intersect() }} + select * from b + +), + +a_except_b as ( + + select * from a + {{ dbt_utils.except() }} + select * from b + +), + +b_except_a as ( + + select * from b + {{ dbt_utils.except() }} + select * from a + +), + +all_records as ( + + select + *, + true as in_a, + true as in_b + from a_intersect_b + + union all + + select + *, + true as in_a, + false as in_b + from a_except_b + + union all + + select + *, + false as in_a, + true as in_b + from b_except_a + +), + +summary_stats as ( + select + in_a, + in_b, + count(*) as count + from all_records + + group by 1, 2 +) +-- select * from all_records +-- where not (in_a and in_b) +-- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc + +select * from summary_stats + +order by in_a desc, in_b desc + +{% endmacro %} diff --git a/macros/compare_relations.sql b/macros/compare_relations.sql index e40af1ae..400797d0 100644 --- a/macros/compare_relations.sql +++ b/macros/compare_relations.sql @@ -21,90 +21,20 @@ {% set check_cols_csv = check_columns | map(attribute='quoted') | join(', ') %} +{% set a_query %} +select + {{ check_cols_csv }} -with a as ( +from {{ a_relation }} +{% endset %} - select - {{ check_cols_csv }} +{% set b_query %} +select + {{ check_cols_csv }} - from {{ a_relation }} +from {{ b_relation }} +{% endset %} -), - -b as ( - - select - {{ check_cols_csv }} - - from {{ b_relation }} - -), - -a_intersect_b as ( - - select * from a - {{ dbt_utils.intersect() }} - select * from b - -), - -a_except_b as ( - - select * from a - {{ dbt_utils.except() }} - select * from b - -), - -b_except_a as ( - - select * from b - {{ dbt_utils.except() }} - select * from a - -), - -all_records as ( - - select - *, - true as in_a, - true as in_b - from a_intersect_b - - union all - - select - *, - true as in_a, - false as in_b - from a_except_b - - union all - - select - *, - false as in_a, - true as in_b - from b_except_a - -), - -summary_stats as ( - select - in_a, - in_b, - count(*) as count - from all_records - - group by 1, 2 -) --- select * from all_records --- where not (in_a and in_b) --- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc - -select * from summary_stats - -order by in_a desc, in_b desc +{{ audit_helper.compare_queries(a_query, b_query) }} {% endmacro %}