diff --git a/README.md b/README.md index c438d69..71d7425 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ This dbt package contains macros for SQL functions to run the dbt project on mul - [optional](#optional-source) - [optional_table](#optional_table-source) - [star](#star-source) + - [union](#union-source) - [Data type cast functions](#Data-type-cast-functions) - [as_varchar](#as_varchar-source) - [to_boolean](#to_boolean-source) @@ -152,6 +153,13 @@ select from {{ ref('Table_A') }} ``` +#### union ([source](macros/SQL_generators/union.sql)) +This macro performs a union of two or more relations. The relations can be models or source tables in the dbt project. The macro automatically aligns the columns based on the column names. If a column is missing in one of the relations, the macro adds the column with null values to that relation. The order of the columns in the result is based on the order of the columns in the first relation provided as argument. + +Usage: +`{{ pm_utils.union([ref('Model_A'), source('source_name', 'Table_B'), ref('Model_C')]) }}` + + ### Data type cast functions #### as_varchar ([source](macros/data_type_cast_functions/as_varchar.sql)) diff --git a/dbt_project.yml b/dbt_project.yml index bdc5a42..7b89340 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: 'pm_utils' -version: '2.3.2' +version: '2.4.0' config-version: 2 require-dbt-version: [">=1.0.0", "<2.0.0"] diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml index a2f8b8a..156dc0f 100644 --- a/integration_tests/models/schema.yml +++ b/integration_tests/models/schema.yml @@ -180,3 +180,24 @@ models: - equal_value: actual: '"Two_level_field"' expected: '"Two_level_field_expected"' + + - name: test_union_relations + data_tests: + - equal_value: + actual: '"Column_A"' + expected: '"Column_A_Expected"' + - equal_value: + actual: '"Column_B"' + expected: '"Column_B_Expected"' + - equal_value: + actual: '"Column_C"' + expected: '"Column_C_Expected"' + - equal_value: + actual: '"Column_D"' + expected: '"Column_D_Expected"' + - equal_value: + actual: '"Column_E"' + expected: '"Column_E_Expected"' + - equal_value: + actual: '"Column_F"' + expected: '"Column_F_Expected"' diff --git a/integration_tests/models/test_union/input_table_1.sql b/integration_tests/models/test_union/input_table_1.sql new file mode 100644 index 0000000..d1b4a8d --- /dev/null +++ b/integration_tests/models/test_union/input_table_1.sql @@ -0,0 +1,6 @@ +{# This creates a table that can be used in test_union. #} +select + 1 as "RN", + 'A' as "Column_A", + {{ pm_utils.to_boolean('true') }} as "Column_B", + null as "Column_C" diff --git a/integration_tests/models/test_union/input_table_2.sql b/integration_tests/models/test_union/input_table_2.sql new file mode 100644 index 0000000..86ea572 --- /dev/null +++ b/integration_tests/models/test_union/input_table_2.sql @@ -0,0 +1,7 @@ +{# This creates a table that can be used in test_union. #} +select + 5 as "Column_C", + 3.5 as "Column_D", + null as "Column_E", + 2 as "RN", + 'B' as "Column_A" diff --git a/integration_tests/models/test_union/input_table_3.sql b/integration_tests/models/test_union/input_table_3.sql new file mode 100644 index 0000000..769b4cb --- /dev/null +++ b/integration_tests/models/test_union/input_table_3.sql @@ -0,0 +1,7 @@ +{# This creates a table that can be used in test_union. #} +select + 3 as "RN", + {{ pm_utils.to_boolean('false') }} as "Column_B", + 7 as "Column_C", + 2.3 as "Column_E", + null as "Column_F" diff --git a/integration_tests/models/test_union/test_union.sql b/integration_tests/models/test_union/test_union.sql new file mode 100644 index 0000000..62b29d1 --- /dev/null +++ b/integration_tests/models/test_union/test_union.sql @@ -0,0 +1,29 @@ +with Union_Three as ( + {{ pm_utils.union([ref('input_table_1'), ref('input_table_2'), ref('input_table_3')]) }} +), + +Union_Three_Expected as ( + select + 1 as "RN", 'A' as "Column_A_Expected", {{pm_utils.to_boolean('true')}} as "Column_B_Expected", null as "Column_C_Expected", null as "Column_D_Expected", null as "Column_E_Expected", null as "Column_F_Expected" + union all + select 2, 'B', null, 5, 3.5, null, null + union all + select 3, null, {{pm_utils.to_boolean('false')}}, 7, null, {{ pm_utils.to_boolean('false') }}, null +) + +select + Union_Three."Column_A", + Union_Three."Column_B", + Union_Three."Column_C", + Union_Three."Column_D", + Union_Three."Column_E", + Union_Three."Column_F", + Union_Three_Expected."Column_A_Expected", + Union_Three_Expected."Column_B_Expected", + Union_Three_Expected."Column_C_Expected", + Union_Three_Expected."Column_D_Expected", + Union_Three_Expected."Column_E_Expected", + Union_Three_Expected."Column_F_Expected" +from Union_Three +left join Union_Three_Expected + on Union_Three."RN" = Union_Three_Expected."RN" diff --git a/macros/SQL_generators/union.sql b/macros/SQL_generators/union.sql new file mode 100644 index 0000000..37bf450 --- /dev/null +++ b/macros/SQL_generators/union.sql @@ -0,0 +1,33 @@ +{%- macro union(relations) -%} + {# + relations: a list of relations (model/ source tables) that exist in the database. + This macro unions any number of tables, aligning columns by name and filling missing columns with NULLs. + #} + {%- set joint_columns = [] -%} + {%- for relation in relations -%} + {%- set cols = adapter.get_columns_in_relation(relation) -%} + {%- for col in cols -%} + {%- if col.name not in joint_columns -%} + {%- do joint_columns.append(col.name) -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + + {%- set selects = [] -%} + {%- for relation in relations -%} + {%- set cols = adapter.get_columns_in_relation(relation) -%} + {%- set col_names = cols | map(attribute='name') | list -%} + {%- set select_parts = [] -%} + {%- for col in joint_columns -%} + {%- if col in col_names -%} + {%- do select_parts.append('"' ~ col ~ '"') -%} + {%- else -%} + {%- do select_parts.append('NULL as "' ~ col ~ '"') -%} + {%- endif -%} + {%- endfor -%} + {%- set select_sql = 'select ' ~ select_parts | join(', ') ~ ' from ' ~ relation -%} + {%- do selects.append(select_sql) -%} + {%- endfor -%} + + {{ selects | join('\nunion all\n') }} +{%- endmacro -%}