From a5a80e8db34b733e914c391e7d441e6862ced24b Mon Sep 17 00:00:00 2001 From: Claire Carroll Date: Wed, 2 Oct 2019 19:50:44 -0400 Subject: [PATCH 1/3] Add percent of total to compare_queries --- README.md | 10 +++++----- ...xpected_results__compare_relations_with_exclude.csv | 4 ++-- ...cted_results__compare_relations_without_exclude.csv | 8 ++++---- macros/compare_queries.sql | 6 +++++- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 8a15d401..d8fa3fab 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,11 @@ relations. It is largely based on the [equality](https://github.com/fishtown-ana test in dbt-utils. By default, the generated query returns a summary of audit results, like so: -| in_a | in_b | count | -|-------|-------|-------| -| True | True | 6870 | -| True | False | 9 | -| False | True | 9 | +| in_a | in_b | count | percent_of_total | +|-------|-------|------:|-----------------:| +| True | True | 6870 | 99.74 | +| True | False | 9 | 0.13 | +| False | True | 9 | 0.13 | The generated SQL also contains commented-out SQL that you can use to check the rows that do not match perfectly: diff --git a/integration_tests/data/expected_results__compare_relations_with_exclude.csv b/integration_tests/data/expected_results__compare_relations_with_exclude.csv index 86217685..882f4cbd 100644 --- a/integration_tests/data/expected_results__compare_relations_with_exclude.csv +++ b/integration_tests/data/expected_results__compare_relations_with_exclude.csv @@ -1,2 +1,2 @@ -in_a,in_b,count -True,True,2 +in_a,in_b,count,percent_of_total +True,True,2,100.0 diff --git a/integration_tests/data/expected_results__compare_relations_without_exclude.csv b/integration_tests/data/expected_results__compare_relations_without_exclude.csv index ffe667a7..94538309 100644 --- a/integration_tests/data/expected_results__compare_relations_without_exclude.csv +++ b/integration_tests/data/expected_results__compare_relations_without_exclude.csv @@ -1,4 +1,4 @@ -in_a,in_b,count -True,True,1 -True,False,1 -False,True,1 +in_a,in_b,count,percent_of_total +True,True,1,33.33 +True,False,1,33.33 +False,True,1,33.33 diff --git a/macros/compare_queries.sql b/macros/compare_queries.sql index a1bdf4d7..0f5309cb 100644 --- a/macros/compare_queries.sql +++ b/macros/compare_queries.sql @@ -75,7 +75,11 @@ summary_stats as ( -- where not (in_a and in_b) -- order by {{ primary_key ~ ", " if primary_key is not none }} in_a desc, in_b desc -select * from summary_stats +select + *, + round(100.0 * count / sum(count) over (), 2) as percent_of_total + +from summary_stats order by in_a desc, in_b desc {% endmacro %} From e9721e9b6a8c2592167bb72dd7be1a08a6f60227 Mon Sep 17 00:00:00 2001 From: Claire Carroll Date: Wed, 2 Oct 2019 19:50:51 -0400 Subject: [PATCH 2/3] Minor formatting fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d8fa3fab..cf5246ec 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ two queries, and summarizes how many records match perfectly (note: a primary key is required to match values across the two queries). | match_status | count | percent_of_total | -|-----------------------------|--------|------------------| +|-----------------------------|-------:|-----------------:| | ✅: perfect match | 37,721 | 79.03 | | ✅: both are null | 5,789 | 12.13 | | 🤷: missing from b | 25 | 0.05 | From b5237771ff8107ad0d38b21a3464f1bf24a2ce5e Mon Sep 17 00:00:00 2001 From: Claire Carroll Date: Wed, 2 Oct 2019 19:56:55 -0400 Subject: [PATCH 3/3] Fix BQ test? --- .../data/expected_results__compare_relations_with_exclude.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/data/expected_results__compare_relations_with_exclude.csv b/integration_tests/data/expected_results__compare_relations_with_exclude.csv index 882f4cbd..0e90c32d 100644 --- a/integration_tests/data/expected_results__compare_relations_with_exclude.csv +++ b/integration_tests/data/expected_results__compare_relations_with_exclude.csv @@ -1,2 +1,2 @@ in_a,in_b,count,percent_of_total -True,True,2,100.0 +True,True,2,100