Skip to content

Commit

Permalink
[Bigquery] unit test tests + support for complex types (#1031)
Browse files Browse the repository at this point in the history
* first pass: unit test typing

* expand test coverage, update safe_cast

* TestBigQueryUnitTestCaseInsensitivity

* changelog entry

* structs

* structs of structs, arrays of structs, clean up safe_cast

* typos

* restore dev-requirements

* changelog entry
  • Loading branch information
MichelleArk authored Feb 9, 2024
1 parent 1b04762 commit ea3abee
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 0 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20240205-174614.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Support all types for unit testing in dbt-bigquery, expand coverage of
safe_cast macro
time: 2024-02-05T17:46:14.505597-05:00
custom:
Author: michelleark
Issue: "1090"
24 changes: 24 additions & 0 deletions dbt/include/bigquery/macros/utils/safe_cast.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
{% macro bigquery__safe_cast(field, type) %}
{%- if type.lower().startswith('array') and field is iterable and (field is not string and field is not mapping) and field | length > 0 -%}
{#-- Extract nested type from 'array<nested_type>' --#}
{% set nested_type = type.lower()[6:-1] %}
{#-- BigQuery does not support direct casts to arrays. instead, each element must be cast individually + reaggregated into an array --#}
{%- if cast_from_string_unsupported_for(nested_type) %}
(select array_agg(safe_cast(i as {{ nested_type }})) from unnest([
{%- for nested_field in field %}
{{ nested_field.strip('"').strip("'") }}{{ ',' if not loop.last }}
{%- endfor %}
]) i)
{%- else -%}
(select array_agg(safe_cast(i as {{nested_type}})) from unnest({{field}}) i)
{%- endif -%}

{%- elif type.lower() == 'json' and field is mapping -%}
safe_cast(json {{ dbt.string_literal(tojson(field)) }} as json)
{%- elif cast_from_string_unsupported_for(type) and field is string -%}
safe_cast({{field.strip('"').strip("'")}} as {{type}})
{%- else -%}
safe_cast({{field}} as {{type}})
{%- endif -%}
{% endmacro %}

{% macro cast_from_string_unsupported_for(type) %}
{{ return(type.lower().startswith('struct') or type.lower() == 'geography') }}
{% endmacro %}
64 changes: 64 additions & 0 deletions tests/functional/adapter/unit_testing/test_unit_testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import pytest
from dbt.tests.adapter.unit_testing.test_types import BaseUnitTestingTypes
from dbt.tests.adapter.unit_testing.test_case_insensitivity import BaseUnitTestCaseInsensivity
from dbt.tests.adapter.unit_testing.test_invalid_input import BaseUnitTestInvalidInput


class TestBigQueryUnitTestingTypes(BaseUnitTestingTypes):
@pytest.fixture
def data_types(self):
# sql_value, yaml_value
return [
["1", "1"],
["'1'", "1"],
["cast('true' as boolean)", "true"],
["1.0", "1.0"],
["'string value'", "string value"],
["cast(1.0 as numeric)", "1.0"],
["cast(1 as bigint)", 1],
["cast('2019-01-01' as date)", "2019-01-01"],
["cast('2013-11-03 00:00:00-07' as timestamp)", "2013-11-03 00:00:00-07"],
["st_geogpoint(75, 45)", "'st_geogpoint(75, 45)'"],
# arrays
["cast(['a','b','c'] as array<string>)", "['a','b','c']"],
["cast([1,2,3] as array<int>)", "[1,2,3]"],
["cast([true,true,false] as array<bool>)", "[true,true,false]"],
# array of date
["[date '2019-01-01']", "['2020-01-01']"],
["[date '2019-01-01']", "[]"],
["[date '2019-01-01']", "null"],
# array of timestamp
["[timestamp '2019-01-01']", "['2020-01-01']"],
["[timestamp '2019-01-01']", "[]"],
["[timestamp '2019-01-01']", "null"],
# json
[
"""json '{"name": "Cooper", "forname": "Alice"}'""",
"""{"name": "Cooper", "forname": "Alice"}""",
],
["""json '{"name": "Cooper", "forname": "Alice"}'""", "{}"],
# structs
["struct('Isha' as name, 22 as age)", """'struct("Isha" as name, 22 as age)'"""],
[
"struct('Kipketer' AS name, [23.2, 26.1, 27.3, 29.4] AS laps)",
"""'struct("Kipketer" AS name, [23.2, 26.1, 27.3, 29.4] AS laps)'""",
],
# struct of struct
[
"struct(struct(1 as id, 'blue' as color) as my_struct)",
"""'struct(struct(1 as id, "blue" as color) as my_struct)'""",
],
# array of struct
[
"[struct(st_geogpoint(75, 45) as my_point), struct(st_geogpoint(75, 35) as my_point)]",
"['struct(st_geogpoint(75, 45) as my_point)', 'struct(st_geogpoint(75, 35) as my_point)']",
],
]


class TestBigQueryUnitTestCaseInsensitivity(BaseUnitTestCaseInsensivity):
pass


class TestBigQueryUnitTestInvalidInput(BaseUnitTestInvalidInput):
pass

0 comments on commit ea3abee

Please sign in to comment.