From ba9a333d2ca879a7ba714d58b839f99fbe6f2fee Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Tue, 11 Nov 2025 16:54:59 +0100 Subject: [PATCH 01/13] feat: union relation for tables in database --- macros/union_relations.sql | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 macros/union_relations.sql diff --git a/macros/union_relations.sql b/macros/union_relations.sql new file mode 100644 index 0000000..25fc044 --- /dev/null +++ b/macros/union_relations.sql @@ -0,0 +1,35 @@ +{%- macro union_relations(relations) -%} + {# + relations: a list of relation objects or strings (table/view names) that exist in the database. + This macro unions any number of tables, aligning columns by name and filling missing columns with NULLs. + #} + {%- set joint_columns = [] -%} + {%- for relation in relations -%} + {%- set rel = ref(relation) if relation is string else relation -%} + {%- set cols = adapter.get_columns_in_relation(rel) -%} + {%- for col in cols -%} + {%- if col.name not in joint_columns -%} + {%- do joint_columns.append(col.name) -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + + {%- set selects = [] -%} + {%- for relation in relations -%} + {%- set rel = ref(relation) if relation is string else relation -%} + {%- set cols = adapter.get_columns_in_relation(rel) -%} + {%- set col_names = cols | map(attribute='name') | list -%} + {%- set select_parts = [] -%} + {%- for col in joint_columns -%} + {%- if col in col_names -%} + {%- do select_parts.append('"' ~ col ~ '"') -%} + {%- else -%} + {%- do select_parts.append('NULL as "' ~ col ~ '"') -%} + {%- endif -%} + {%- endfor -%} + {%- set select_sql = 'select ' ~ select_parts | join(', ') ~ ' from ' ~ relation -%} + {%- do selects.append(select_sql) -%} + {%- endfor -%} + + {{ selects | join('\nunion all\n') }} +{%- endmacro -%} From f8660aaa362809fd9810bfadfcf0e5f1e9db54b0 Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Wed, 12 Nov 2025 14:03:04 +0100 Subject: [PATCH 02/13] feat: union relation macro and tests --- integration_tests/models/schema.yml | 24 ++++++++ .../models/test_union_relations.sql | 58 +++++++++++++++++++ macros/union_relations.sql | 25 ++++++-- 3 files changed, 101 insertions(+), 6 deletions(-) create mode 100644 integration_tests/models/test_union_relations.sql diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml index a2f8b8a..b777052 100644 --- a/integration_tests/models/schema.yml +++ b/integration_tests/models/schema.yml @@ -180,3 +180,27 @@ models: - equal_value: actual: '"Two_level_field"' expected: '"Two_level_field_expected"' + + - name: test_union_relations + data_tests: + - equal_value: + actual: '"Column_A"' + expected: '"Column_A_Expected"' + - equal_value: + actual: '"Column_B"' + expected: '"Column_B_Expected"' + - equal_value: + actual: '"Column_C"' + expected: '"Column_C_Expected"' + - equal_value: + actual: '"Column_D"' + expected: '"Column_D_Expected"' + - equal_value: + actual: '"Column_E"' + expected: '"Column_E_Expected"' + - equal_value: + actual: '"Column_F"' + expected: '"Column_F_Expected"' + - equal_value: + actual: '"Column_G"' + expected: '"Column_G_Expected"' diff --git a/integration_tests/models/test_union_relations.sql b/integration_tests/models/test_union_relations.sql new file mode 100644 index 0000000..7e72069 --- /dev/null +++ b/integration_tests/models/test_union_relations.sql @@ -0,0 +1,58 @@ +with Input_data_1 as ( + select + {{ pm_utils.to_varchar('A') }} as "Column_A", + {{ pm_utils.to_boolean('true') }} as "Column_B", + {{ pm_utils.to_integer('10') }} as "Column_C", + {{ pm_utils.to_timestamp('2023-02-01 10:00:00') }} as "Column_D" +), + +Input_data_2 as ( + select + {{ pm_utils.to_integer('20') }} as "Column_C", + {{ pm_utils.to_double('30.5') }} as "Column_E", + {{ pm_utils.to_timestamp('2023-02-02 10:00:00') }} as "Column_D", + null as "Column_F" +), + +Input_data_3 as ( + select + {{ pm_utils.to_integer('15') }} as "Column_C", + {{ pm_utils.to_varchar('D') }} as "Column_A", + {{ pm_utils.to_boolean('false') }} as "Column_B", + {{ pm_utils.to_integer('1') }} as "Column_F", + null as "Column_G" +), + +Union_Three as ( + {{ pm_utils.union_relations(['Input_data_1', 'Input_data_2', 'Input_data_3']) }} +), + +Union_Three_Expected as ( + select * from values + ( {{ pm_utils.to_varchar('A') }}, {{ pm_utils.to_boolean('true') }}, {{ pm_utils.to_integer('10') }}, {{ pm_utils.to_timestamp('2023-02-01 10:00:00') }}, null, null, null ), + ( null, null, {{ pm_utils.to_integer('20') }}, {{ pm_utils.to_timestamp('2023-02-02 10:00:00') }}, {{ pm_utils.to_double('30.5') }}, null, null ), + ( {{ pm_utils.to_varchar('D') }}, {{ pm_utils.to_boolean('false') }}, {{ pm_utils.to_integer('15') }}, null, null, {{ pm_utils.to_integer('1') }}, null ) + as ("Column_A_Expected", "Column_B_Expected", "Column_C_Expected", "Column_D_Expected", "Column_E_Expected", "Column_F_Expected", "Column_G_Expected") +) + +select + Union_Three."Column_A", + Union_Three."Column_B", + Union_Three."Column_C", + Union_Three."Column_D", + Union_Three."Column_E", + Union_Three."Column_F", + Union_Three."Column_G" +from Union_Three +union all +select + Union_Three_Expected."Column_A_Expected", + Union_Three_Expected."Column_B_Expected", + Union_Three_Expected."Column_C_Expected", + Union_Three_Expected."Column_D_Expected", + Union_Three_Expected."Column_E_Expected", + Union_Three_Expected."Column_F_Expected", + Union_Three_Expected."Column_G_Expected" +from Union_Three_Expected + + diff --git a/macros/union_relations.sql b/macros/union_relations.sql index 25fc044..69cb7e9 100644 --- a/macros/union_relations.sql +++ b/macros/union_relations.sql @@ -2,29 +2,42 @@ {# relations: a list of relation objects or strings (table/view names) that exist in the database. This macro unions any number of tables, aligning columns by name and filling missing columns with NULLs. + It ensures that columns with the same name are cast to a common type across all relations. #} {%- set joint_columns = [] -%} + {%- set column_types = {} -%} {%- for relation in relations -%} - {%- set rel = ref(relation) if relation is string else relation -%} - {%- set cols = adapter.get_columns_in_relation(rel) -%} + {%- set cols = adapter.get_columns_in_relation(relation) -%} {%- for col in cols -%} {%- if col.name not in joint_columns -%} {%- do joint_columns.append(col.name) -%} + {%- set _ = column_types.update({col.name: col.dtype}) -%} + {%- else -%} + {#- If a column appears in multiple tables, prefer the first type found -#} {%- endif -%} {%- endfor -%} {%- endfor -%} {%- set selects = [] -%} {%- for relation in relations -%} - {%- set rel = ref(relation) if relation is string else relation -%} - {%- set cols = adapter.get_columns_in_relation(rel) -%} + {%- set cols = adapter.get_columns_in_relation(relation) -%} {%- set col_names = cols | map(attribute='name') | list -%} + {%- set col_types = {} -%} + {%- for col in cols -%} + {%- set _ = col_types.update({col.name: col.dtype}) -%} + {%- endfor -%} {%- set select_parts = [] -%} {%- for col in joint_columns -%} + {%- set target_type = column_types[col] -%} {%- if col in col_names -%} - {%- do select_parts.append('"' ~ col ~ '"') -%} + {%- set source_type = col_types[col] -%} + {%- if source_type != target_type -%} + {%- do select_parts.append('CAST("' ~ col ~ '" AS ' ~ target_type ~ ') as "' ~ col ~ '"') -%} + {%- else -%} + {%- do select_parts.append('"' ~ col ~ '"') -%} + {%- endif -%} {%- else -%} - {%- do select_parts.append('NULL as "' ~ col ~ '"') -%} + {%- do select_parts.append('CAST(NULL AS ' ~ target_type ~ ') as "' ~ col ~ '"') -%} {%- endif -%} {%- endfor -%} {%- set select_sql = 'select ' ~ select_parts | join(', ') ~ ' from ' ~ relation -%} From d190acdc2c2c095fc88258fb4fea28e0ef8ea716 Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Wed, 12 Nov 2025 14:51:55 +0100 Subject: [PATCH 03/13] feat: sample input data added and tests updated --- integration_tests/models/schema.yml | 3 - integration_tests/models/sources.yml | 39 +++++++++++++ .../models/test_union_relations.sql | 56 ++++--------------- .../sample_data/Input_data_1.csv | 2 + .../sample_data/Input_data_2.csv | 2 + .../sample_data/Input_data_3.csv | 2 + integration_tests/sample_data/union_data.csv | 4 ++ 7 files changed, 59 insertions(+), 49 deletions(-) create mode 100644 integration_tests/models/sources.yml create mode 100644 integration_tests/sample_data/Input_data_1.csv create mode 100644 integration_tests/sample_data/Input_data_2.csv create mode 100644 integration_tests/sample_data/Input_data_3.csv create mode 100644 integration_tests/sample_data/union_data.csv diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml index b777052..ffe1c0c 100644 --- a/integration_tests/models/schema.yml +++ b/integration_tests/models/schema.yml @@ -192,9 +192,6 @@ models: - equal_value: actual: '"Column_C"' expected: '"Column_C_Expected"' - - equal_value: - actual: '"Column_D"' - expected: '"Column_D_Expected"' - equal_value: actual: '"Column_E"' expected: '"Column_E_Expected"' diff --git a/integration_tests/models/sources.yml b/integration_tests/models/sources.yml new file mode 100644 index 0000000..1d9df9c --- /dev/null +++ b/integration_tests/models/sources.yml @@ -0,0 +1,39 @@ +version: 2 + +sources: + - name: sources + schema: "{{ var('schema_sources', target.schema) }}" + tables: + - name: input_data_1 + columns: + - name: RN + - name: Column_A + - name: Column_B + - name: Column_C + - name: input_data_2 + columns: + - name: RN + - name: Column_C + - name: Column_E + - name: Column_F + - name: Column_A + - name: input_data_3 + columns: + - name: RN + - name: Column_B + - name: Column_C + - name: Column_F + - name: Column_G + - name: union_data + columns: + - name: RN + - name: Column_A + - name: Column_B + - name: Column_C + - name: Column_E + - name: Column_F + - name: Column_G + quoting: + database: true + schema: true + identifier: true diff --git a/integration_tests/models/test_union_relations.sql b/integration_tests/models/test_union_relations.sql index 7e72069..00400ae 100644 --- a/integration_tests/models/test_union_relations.sql +++ b/integration_tests/models/test_union_relations.sql @@ -1,58 +1,22 @@ -with Input_data_1 as ( - select - {{ pm_utils.to_varchar('A') }} as "Column_A", - {{ pm_utils.to_boolean('true') }} as "Column_B", - {{ pm_utils.to_integer('10') }} as "Column_C", - {{ pm_utils.to_timestamp('2023-02-01 10:00:00') }} as "Column_D" -), - -Input_data_2 as ( - select - {{ pm_utils.to_integer('20') }} as "Column_C", - {{ pm_utils.to_double('30.5') }} as "Column_E", - {{ pm_utils.to_timestamp('2023-02-02 10:00:00') }} as "Column_D", - null as "Column_F" -), - -Input_data_3 as ( - select - {{ pm_utils.to_integer('15') }} as "Column_C", - {{ pm_utils.to_varchar('D') }} as "Column_A", - {{ pm_utils.to_boolean('false') }} as "Column_B", - {{ pm_utils.to_integer('1') }} as "Column_F", - null as "Column_G" -), - -Union_Three as ( - {{ pm_utils.union_relations(['Input_data_1', 'Input_data_2', 'Input_data_3']) }} -), - -Union_Three_Expected as ( - select * from values - ( {{ pm_utils.to_varchar('A') }}, {{ pm_utils.to_boolean('true') }}, {{ pm_utils.to_integer('10') }}, {{ pm_utils.to_timestamp('2023-02-01 10:00:00') }}, null, null, null ), - ( null, null, {{ pm_utils.to_integer('20') }}, {{ pm_utils.to_timestamp('2023-02-02 10:00:00') }}, {{ pm_utils.to_double('30.5') }}, null, null ), - ( {{ pm_utils.to_varchar('D') }}, {{ pm_utils.to_boolean('false') }}, {{ pm_utils.to_integer('15') }}, null, null, {{ pm_utils.to_integer('1') }}, null ) - as ("Column_A_Expected", "Column_B_Expected", "Column_C_Expected", "Column_D_Expected", "Column_E_Expected", "Column_F_Expected", "Column_G_Expected") +with Union_Three as ( + {{ pm_utils.union_relations([source('sources', 'input_data_1'), source('sources', 'input_data_2'), source('sources', 'input_data_3')]) }} ) select Union_Three."Column_A", Union_Three."Column_B", Union_Three."Column_C", - Union_Three."Column_D", Union_Three."Column_E", Union_Three."Column_F", Union_Three."Column_G" + Union_Three_Expected."Column_A" as "Column_A_Expected", + Union_Three_Expected."Column_B" as "Column_B_Expected", + Union_Three_Expected."Column_C" as "Column_C_Expected", + Union_Three_Expected."Column_E" as "Column_E_Expected", + Union_Three_Expected."Column_F" as "Column_F_Expected", + Union_Three_Expected."Column_G" as "Column_G_Expected" from Union_Three -union all -select - Union_Three_Expected."Column_A_Expected", - Union_Three_Expected."Column_B_Expected", - Union_Three_Expected."Column_C_Expected", - Union_Three_Expected."Column_D_Expected", - Union_Three_Expected."Column_E_Expected", - Union_Three_Expected."Column_F_Expected", - Union_Three_Expected."Column_G_Expected" -from Union_Three_Expected +join {{ source('sources', 'union_data') }} as Union_Three_Expected + on Union_Three."RN" = Union_Three_Expected."RN" diff --git a/integration_tests/sample_data/Input_data_1.csv b/integration_tests/sample_data/Input_data_1.csv new file mode 100644 index 0000000..3c9766d --- /dev/null +++ b/integration_tests/sample_data/Input_data_1.csv @@ -0,0 +1,2 @@ +RN Column_A Column_B Column_C +0 A True 10 diff --git a/integration_tests/sample_data/Input_data_2.csv b/integration_tests/sample_data/Input_data_2.csv new file mode 100644 index 0000000..49fd0e1 --- /dev/null +++ b/integration_tests/sample_data/Input_data_2.csv @@ -0,0 +1,2 @@ +RN Column_C Column_E Column_F Column_A +1 5 3.5 B diff --git a/integration_tests/sample_data/Input_data_3.csv b/integration_tests/sample_data/Input_data_3.csv new file mode 100644 index 0000000..970df3e --- /dev/null +++ b/integration_tests/sample_data/Input_data_3.csv @@ -0,0 +1,2 @@ +RN Column_B Column_C Column_F Column_G +2 False 7 2.1 diff --git a/integration_tests/sample_data/union_data.csv b/integration_tests/sample_data/union_data.csv new file mode 100644 index 0000000..538bae4 --- /dev/null +++ b/integration_tests/sample_data/union_data.csv @@ -0,0 +1,4 @@ +RN Column_A Column_B Column_C Column_E Column_F Column_G +0 A True 10 +1 B 5 3.5 +2 False 7 2.1 From 7ce597ca7447f7f9909dfd8c279c051d79ef7f77 Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Wed, 12 Nov 2025 16:59:26 +0100 Subject: [PATCH 04/13] feat: update integration test --- integration_tests/models/test_union_relations.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/models/test_union_relations.sql b/integration_tests/models/test_union_relations.sql index 00400ae..d71db8a 100644 --- a/integration_tests/models/test_union_relations.sql +++ b/integration_tests/models/test_union_relations.sql @@ -8,7 +8,7 @@ select Union_Three."Column_C", Union_Three."Column_E", Union_Three."Column_F", - Union_Three."Column_G" + Union_Three."Column_G", Union_Three_Expected."Column_A" as "Column_A_Expected", Union_Three_Expected."Column_B" as "Column_B_Expected", Union_Three_Expected."Column_C" as "Column_C_Expected", From e3e35c5e02914cb0ed4267eda7c2e6902f9b617a Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Thu, 13 Nov 2025 13:53:17 +0100 Subject: [PATCH 05/13] feat: update sources --- integration_tests/models/sources.yml | 1 - integration_tests/models/test_union_relations.sql | 2 -- 2 files changed, 3 deletions(-) diff --git a/integration_tests/models/sources.yml b/integration_tests/models/sources.yml index 1d9df9c..4dea511 100644 --- a/integration_tests/models/sources.yml +++ b/integration_tests/models/sources.yml @@ -2,7 +2,6 @@ version: 2 sources: - name: sources - schema: "{{ var('schema_sources', target.schema) }}" tables: - name: input_data_1 columns: diff --git a/integration_tests/models/test_union_relations.sql b/integration_tests/models/test_union_relations.sql index d71db8a..d5a886c 100644 --- a/integration_tests/models/test_union_relations.sql +++ b/integration_tests/models/test_union_relations.sql @@ -18,5 +18,3 @@ select from Union_Three join {{ source('sources', 'union_data') }} as Union_Three_Expected on Union_Three."RN" = Union_Three_Expected."RN" - - From c42352ff84f7b5b1f91d4d4b23442521faa87825 Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Thu, 13 Nov 2025 14:09:32 +0100 Subject: [PATCH 06/13] feat: update integration test --- integration_tests/models/test_union_relations.sql | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/integration_tests/models/test_union_relations.sql b/integration_tests/models/test_union_relations.sql index d5a886c..7b7ae99 100644 --- a/integration_tests/models/test_union_relations.sql +++ b/integration_tests/models/test_union_relations.sql @@ -1,5 +1,9 @@ with Union_Three as ( {{ pm_utils.union_relations([source('sources', 'input_data_1'), source('sources', 'input_data_2'), source('sources', 'input_data_3')]) }} +), + +Union_Three_Expected as ( + select * from {{ source('sources', 'union_data') }} ) select @@ -16,5 +20,5 @@ select Union_Three_Expected."Column_F" as "Column_F_Expected", Union_Three_Expected."Column_G" as "Column_G_Expected" from Union_Three -join {{ source('sources', 'union_data') }} as Union_Three_Expected +left join Union_Three_Expected on Union_Three."RN" = Union_Three_Expected."RN" From e4eb51f0a3fa87c16b84a9cbb38d78182f259c9a Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Thu, 13 Nov 2025 14:43:06 +0100 Subject: [PATCH 07/13] feat: apply snowflake test first --- .pipelines/azure-pipelines-integration-tests.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.pipelines/azure-pipelines-integration-tests.yml b/.pipelines/azure-pipelines-integration-tests.yml index a177b69..93c0e26 100644 --- a/.pipelines/azure-pipelines-integration-tests.yml +++ b/.pipelines/azure-pipelines-integration-tests.yml @@ -125,16 +125,16 @@ jobs: dbt deps displayName: Install dbt dependencies - - bash: | - source dbt-env-sqlserver/bin/activate - cd $(dbtProjectPath) - dbt build --profiles-dir $(Agent.BuildDirectory)/self/.pipelines --profile default -t sqlserver-ci \ - --vars '{"DBT_SQL_SERVER_SERVER": "$(DBT_SQL_SERVER_SERVER)", "DBT_SQL_SERVER_USER": "$(DBT_SQL_SERVER_USER)", "DBT_SQL_SERVER_PASSWORD": "$(DBT_SQL_SERVER_PASSWORD)", "DBT_SQL_SERVER_DATABASE": "$(DBT_SQL_SERVER_DATABASE)", "DBT_SCHEMA": "$(DBT_SCHEMA)"}' - displayName: Test (SQL Server) - - bash: | source dbt-env/bin/activate cd $(dbtProjectPath) dbt build --profiles-dir $(Agent.BuildDirectory)/self/.pipelines --profile default -t snowflake-ci \ --vars '{"DBT_SNOWFLAKE_ACCOUNT": "$(DBT_SNOWFLAKE_ACCOUNT)", "DBT_SNOWFLAKE_USER": "$(DBT_SNOWFLAKE_USER)", "DBT_SNOWFLAKE_PASSWORD": "$(DBT_SNOWFLAKE_PASSWORD)", "DBT_SNOWFLAKE_ROLE": "$(DBT_SNOWFLAKE_ROLE)", "DBT_SNOWFLAKE_DATABASE": "$(DBT_SNOWFLAKE_DATABASE)", "DBT_SNOWFLAKE_WAREHOUSE": "$(DBT_SNOWFLAKE_WAREHOUSE)", "DBT_SCHEMA": "$(DBT_SCHEMA)"}' displayName: Test (Snowflake) + + - bash: | + source dbt-env-sqlserver/bin/activate + cd $(dbtProjectPath) + dbt build --profiles-dir $(Agent.BuildDirectory)/self/.pipelines --profile default -t sqlserver-ci \ + --vars '{"DBT_SQL_SERVER_SERVER": "$(DBT_SQL_SERVER_SERVER)", "DBT_SQL_SERVER_USER": "$(DBT_SQL_SERVER_USER)", "DBT_SQL_SERVER_PASSWORD": "$(DBT_SQL_SERVER_PASSWORD)", "DBT_SQL_SERVER_DATABASE": "$(DBT_SQL_SERVER_DATABASE)", "DBT_SCHEMA": "$(DBT_SCHEMA)"}' + displayName: Test (SQL Server) From 64da6c451d7f3cb23d7e3e6477da57797c1b5f38 Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Thu, 13 Nov 2025 15:29:48 +0100 Subject: [PATCH 08/13] feat: updating integration tests and input_tables --- integration_tests/models/input_table_1.sql | 6 +++ integration_tests/models/input_table_2.sql | 7 ++++ integration_tests/models/input_table_3.sql | 7 ++++ integration_tests/models/schema.yml | 6 +-- integration_tests/models/sources.yml | 38 ------------------- .../models/test_union_relations.sql | 23 ++++++----- .../sample_data/Input_data_1.csv | 2 - .../sample_data/Input_data_2.csv | 2 - .../sample_data/Input_data_3.csv | 2 - integration_tests/sample_data/union_data.csv | 4 -- 10 files changed, 37 insertions(+), 60 deletions(-) create mode 100644 integration_tests/models/input_table_1.sql create mode 100644 integration_tests/models/input_table_2.sql create mode 100644 integration_tests/models/input_table_3.sql delete mode 100644 integration_tests/models/sources.yml delete mode 100644 integration_tests/sample_data/Input_data_1.csv delete mode 100644 integration_tests/sample_data/Input_data_2.csv delete mode 100644 integration_tests/sample_data/Input_data_3.csv delete mode 100644 integration_tests/sample_data/union_data.csv diff --git a/integration_tests/models/input_table_1.sql b/integration_tests/models/input_table_1.sql new file mode 100644 index 0000000..0afff06 --- /dev/null +++ b/integration_tests/models/input_table_1.sql @@ -0,0 +1,6 @@ +{# This creates a table that can be used in tests that require a relation as argument. #} +select + 1 as "RN", + 'A' as "Column_A", + {{ pm_utils.to_boolean('true') }} as "Column_B", + null as "Column_C" diff --git a/integration_tests/models/input_table_2.sql b/integration_tests/models/input_table_2.sql new file mode 100644 index 0000000..d885279 --- /dev/null +++ b/integration_tests/models/input_table_2.sql @@ -0,0 +1,7 @@ +{# This creates a table that can be used in tests that require a relation as argument. #} +select + 5 as "Column_C", + 3.5 as "Column_D", + null as "Column_E", + 2 as "RN", + 'B' as "Column_A" diff --git a/integration_tests/models/input_table_3.sql b/integration_tests/models/input_table_3.sql new file mode 100644 index 0000000..b221bb3 --- /dev/null +++ b/integration_tests/models/input_table_3.sql @@ -0,0 +1,7 @@ +{# This creates a table that can be used in tests that require a relation as argument. #} +select + 3 as "RN", + {{ pm_utils.to_boolean('false') }} as "Column_B", + 7 as "Column_C", + 2.1 as "Column_E", + null as "Column_F" diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml index ffe1c0c..156dc0f 100644 --- a/integration_tests/models/schema.yml +++ b/integration_tests/models/schema.yml @@ -192,12 +192,12 @@ models: - equal_value: actual: '"Column_C"' expected: '"Column_C_Expected"' + - equal_value: + actual: '"Column_D"' + expected: '"Column_D_Expected"' - equal_value: actual: '"Column_E"' expected: '"Column_E_Expected"' - equal_value: actual: '"Column_F"' expected: '"Column_F_Expected"' - - equal_value: - actual: '"Column_G"' - expected: '"Column_G_Expected"' diff --git a/integration_tests/models/sources.yml b/integration_tests/models/sources.yml deleted file mode 100644 index 4dea511..0000000 --- a/integration_tests/models/sources.yml +++ /dev/null @@ -1,38 +0,0 @@ -version: 2 - -sources: - - name: sources - tables: - - name: input_data_1 - columns: - - name: RN - - name: Column_A - - name: Column_B - - name: Column_C - - name: input_data_2 - columns: - - name: RN - - name: Column_C - - name: Column_E - - name: Column_F - - name: Column_A - - name: input_data_3 - columns: - - name: RN - - name: Column_B - - name: Column_C - - name: Column_F - - name: Column_G - - name: union_data - columns: - - name: RN - - name: Column_A - - name: Column_B - - name: Column_C - - name: Column_E - - name: Column_F - - name: Column_G - quoting: - database: true - schema: true - identifier: true diff --git a/integration_tests/models/test_union_relations.sql b/integration_tests/models/test_union_relations.sql index 7b7ae99..6614fe7 100644 --- a/integration_tests/models/test_union_relations.sql +++ b/integration_tests/models/test_union_relations.sql @@ -1,24 +1,29 @@ with Union_Three as ( - {{ pm_utils.union_relations([source('sources', 'input_data_1'), source('sources', 'input_data_2'), source('sources', 'input_data_3')]) }} + {{ pm_utils.union_relations([ref('input_table_1'), ref('input_table_2'), ref('input_table_3')]) }} ), Union_Three_Expected as ( - select * from {{ source('sources', 'union_data') }} + select + 1 as "RN", 'A' as "Column_A_Expected", {{pm_utils.to_boolean('true')}} as "Column_B_Expected", null as "Column_C_Expected", null as "Column_D_Expected", null as "Column_E_Expected", null as "Column_F_Expected" + union all + select 2, 'B', null, 5, 3.5, null, null + union all + select 3, null, {{pm_utils.to_boolean('false')}}, 7, null, 2.1, null ) select Union_Three."Column_A", Union_Three."Column_B", Union_Three."Column_C", + Union_Three."Column_D", Union_Three."Column_E", Union_Three."Column_F", - Union_Three."Column_G", - Union_Three_Expected."Column_A" as "Column_A_Expected", - Union_Three_Expected."Column_B" as "Column_B_Expected", - Union_Three_Expected."Column_C" as "Column_C_Expected", - Union_Three_Expected."Column_E" as "Column_E_Expected", - Union_Three_Expected."Column_F" as "Column_F_Expected", - Union_Three_Expected."Column_G" as "Column_G_Expected" + Union_Three_Expected."Column_A_Expected", + Union_Three_Expected."Column_B_Expected", + Union_Three_Expected."Column_C_Expected", + Union_Three_Expected."Column_D_Expected", + Union_Three_Expected."Column_E_Expected", + Union_Three_Expected."Column_F_Expected" from Union_Three left join Union_Three_Expected on Union_Three."RN" = Union_Three_Expected."RN" diff --git a/integration_tests/sample_data/Input_data_1.csv b/integration_tests/sample_data/Input_data_1.csv deleted file mode 100644 index 3c9766d..0000000 --- a/integration_tests/sample_data/Input_data_1.csv +++ /dev/null @@ -1,2 +0,0 @@ -RN Column_A Column_B Column_C -0 A True 10 diff --git a/integration_tests/sample_data/Input_data_2.csv b/integration_tests/sample_data/Input_data_2.csv deleted file mode 100644 index 49fd0e1..0000000 --- a/integration_tests/sample_data/Input_data_2.csv +++ /dev/null @@ -1,2 +0,0 @@ -RN Column_C Column_E Column_F Column_A -1 5 3.5 B diff --git a/integration_tests/sample_data/Input_data_3.csv b/integration_tests/sample_data/Input_data_3.csv deleted file mode 100644 index 970df3e..0000000 --- a/integration_tests/sample_data/Input_data_3.csv +++ /dev/null @@ -1,2 +0,0 @@ -RN Column_B Column_C Column_F Column_G -2 False 7 2.1 diff --git a/integration_tests/sample_data/union_data.csv b/integration_tests/sample_data/union_data.csv deleted file mode 100644 index 538bae4..0000000 --- a/integration_tests/sample_data/union_data.csv +++ /dev/null @@ -1,4 +0,0 @@ -RN Column_A Column_B Column_C Column_E Column_F Column_G -0 A True 10 -1 B 5 3.5 -2 False 7 2.1 From f5fc3ee3188c11cd2eef7c994fd090913c69a5b4 Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Thu, 13 Nov 2025 16:21:04 +0100 Subject: [PATCH 09/13] feat: update dbt version --- dbt_project.yml | 2 +- integration_tests/models/input_table_1.sql | 2 +- integration_tests/models/input_table_2.sql | 6 +++--- integration_tests/models/input_table_3.sql | 6 +++--- integration_tests/models/test_union_relations.sql | 6 +++--- macros/{ => aggregate_functions}/union_relations.sql | 0 6 files changed, 11 insertions(+), 11 deletions(-) rename macros/{ => aggregate_functions}/union_relations.sql (100%) diff --git a/dbt_project.yml b/dbt_project.yml index bdc5a42..7ca573f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: 'pm_utils' -version: '2.3.2' +version: '2.4.2' config-version: 2 require-dbt-version: [">=1.0.0", "<2.0.0"] diff --git a/integration_tests/models/input_table_1.sql b/integration_tests/models/input_table_1.sql index 0afff06..d6f185d 100644 --- a/integration_tests/models/input_table_1.sql +++ b/integration_tests/models/input_table_1.sql @@ -1,6 +1,6 @@ {# This creates a table that can be used in tests that require a relation as argument. #} select - 1 as "RN", + {{ pm_utils.to_integer('1') }} as "RN", 'A' as "Column_A", {{ pm_utils.to_boolean('true') }} as "Column_B", null as "Column_C" diff --git a/integration_tests/models/input_table_2.sql b/integration_tests/models/input_table_2.sql index d885279..a397fb1 100644 --- a/integration_tests/models/input_table_2.sql +++ b/integration_tests/models/input_table_2.sql @@ -1,7 +1,7 @@ {# This creates a table that can be used in tests that require a relation as argument. #} select - 5 as "Column_C", - 3.5 as "Column_D", + {{ pm_utils.to_integer('5') }} as "Column_C", + {{ pm_utils.to_double('3.5') }} as "Column_D", null as "Column_E", - 2 as "RN", + {{ pm_utils.to_integer('2') }} as "RN", 'B' as "Column_A" diff --git a/integration_tests/models/input_table_3.sql b/integration_tests/models/input_table_3.sql index b221bb3..a7b310f 100644 --- a/integration_tests/models/input_table_3.sql +++ b/integration_tests/models/input_table_3.sql @@ -1,7 +1,7 @@ {# This creates a table that can be used in tests that require a relation as argument. #} select - 3 as "RN", + {{ pm_utils.to_integer('3') }} as "RN", {{ pm_utils.to_boolean('false') }} as "Column_B", - 7 as "Column_C", - 2.1 as "Column_E", + {{ pm_utils.to_integer('7') }} as "Column_C", + {{ pm_utils.to_double('2.1') }} as "Column_E", null as "Column_F" diff --git a/integration_tests/models/test_union_relations.sql b/integration_tests/models/test_union_relations.sql index 6614fe7..c3b1265 100644 --- a/integration_tests/models/test_union_relations.sql +++ b/integration_tests/models/test_union_relations.sql @@ -4,11 +4,11 @@ with Union_Three as ( Union_Three_Expected as ( select - 1 as "RN", 'A' as "Column_A_Expected", {{pm_utils.to_boolean('true')}} as "Column_B_Expected", null as "Column_C_Expected", null as "Column_D_Expected", null as "Column_E_Expected", null as "Column_F_Expected" + {{ pm_utils.to_integer('1') }} as "RN", 'A' as "Column_A_Expected", {{pm_utils.to_boolean('true')}} as "Column_B_Expected", null as "Column_C_Expected", null as "Column_D_Expected", null as "Column_E_Expected", null as "Column_F_Expected" union all - select 2, 'B', null, 5, 3.5, null, null + select {{ pm_utils.to_integer('2') }}, 'B', null, {{ pm_utils.to_integer('5') }}, {{ pm_utils.to_double('3.5') }}, null, null union all - select 3, null, {{pm_utils.to_boolean('false')}}, 7, null, 2.1, null + select {{ pm_utils.to_integer('3') }}, null, {{pm_utils.to_boolean('false')}}, {{ pm_utils.to_integer('7') }}, null, {{ pm_utils.to_double('2.1') }}, null ) select diff --git a/macros/union_relations.sql b/macros/aggregate_functions/union_relations.sql similarity index 100% rename from macros/union_relations.sql rename to macros/aggregate_functions/union_relations.sql From 69fdc9fcd93fa8b2e2daed1fae00a2e7ab423d39 Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Thu, 13 Nov 2025 16:38:58 +0100 Subject: [PATCH 10/13] feat: update test scenario --- integration_tests/models/input_table_3.sql | 2 +- integration_tests/models/test_union_relations.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/integration_tests/models/input_table_3.sql b/integration_tests/models/input_table_3.sql index a7b310f..941d4f4 100644 --- a/integration_tests/models/input_table_3.sql +++ b/integration_tests/models/input_table_3.sql @@ -3,5 +3,5 @@ select {{ pm_utils.to_integer('3') }} as "RN", {{ pm_utils.to_boolean('false') }} as "Column_B", {{ pm_utils.to_integer('7') }} as "Column_C", - {{ pm_utils.to_double('2.1') }} as "Column_E", + {{ pm_utils.to_boolean('false') }} as "Column_E", null as "Column_F" diff --git a/integration_tests/models/test_union_relations.sql b/integration_tests/models/test_union_relations.sql index c3b1265..a503018 100644 --- a/integration_tests/models/test_union_relations.sql +++ b/integration_tests/models/test_union_relations.sql @@ -8,7 +8,7 @@ Union_Three_Expected as ( union all select {{ pm_utils.to_integer('2') }}, 'B', null, {{ pm_utils.to_integer('5') }}, {{ pm_utils.to_double('3.5') }}, null, null union all - select {{ pm_utils.to_integer('3') }}, null, {{pm_utils.to_boolean('false')}}, {{ pm_utils.to_integer('7') }}, null, {{ pm_utils.to_double('2.1') }}, null + select {{ pm_utils.to_integer('3') }}, null, {{pm_utils.to_boolean('false')}}, {{ pm_utils.to_integer('7') }}, null, {{ pm_utils.to_boolean('false') }}, null ) select From fdf78a925a249d08a332b1fe0383e9f7c1fa9f4b Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Fri, 14 Nov 2025 14:13:25 +0100 Subject: [PATCH 11/13] feat: update document and union macro --- .../azure-pipelines-integration-tests.yml | 14 +++--- README.md | 8 ++++ dbt_project.yml | 2 +- integration_tests/models/input_table_1.sql | 6 --- integration_tests/models/input_table_2.sql | 7 --- integration_tests/models/input_table_3.sql | 7 --- .../models/test_union/input_table_1.sql | 6 +++ .../models/test_union/input_table_2.sql | 7 +++ .../models/test_union/input_table_3.sql | 7 +++ .../models/test_union/test_union.sql | 29 +++++++++++ .../models/test_union_relations.sql | 29 ----------- .../aggregate_functions/union_relations.sql | 48 ------------------- macros/union.sql | 33 +++++++++++++ 13 files changed, 98 insertions(+), 105 deletions(-) delete mode 100644 integration_tests/models/input_table_1.sql delete mode 100644 integration_tests/models/input_table_2.sql delete mode 100644 integration_tests/models/input_table_3.sql create mode 100644 integration_tests/models/test_union/input_table_1.sql create mode 100644 integration_tests/models/test_union/input_table_2.sql create mode 100644 integration_tests/models/test_union/input_table_3.sql create mode 100644 integration_tests/models/test_union/test_union.sql delete mode 100644 integration_tests/models/test_union_relations.sql delete mode 100644 macros/aggregate_functions/union_relations.sql create mode 100644 macros/union.sql diff --git a/.pipelines/azure-pipelines-integration-tests.yml b/.pipelines/azure-pipelines-integration-tests.yml index 93c0e26..a177b69 100644 --- a/.pipelines/azure-pipelines-integration-tests.yml +++ b/.pipelines/azure-pipelines-integration-tests.yml @@ -125,16 +125,16 @@ jobs: dbt deps displayName: Install dbt dependencies - - bash: | - source dbt-env/bin/activate - cd $(dbtProjectPath) - dbt build --profiles-dir $(Agent.BuildDirectory)/self/.pipelines --profile default -t snowflake-ci \ - --vars '{"DBT_SNOWFLAKE_ACCOUNT": "$(DBT_SNOWFLAKE_ACCOUNT)", "DBT_SNOWFLAKE_USER": "$(DBT_SNOWFLAKE_USER)", "DBT_SNOWFLAKE_PASSWORD": "$(DBT_SNOWFLAKE_PASSWORD)", "DBT_SNOWFLAKE_ROLE": "$(DBT_SNOWFLAKE_ROLE)", "DBT_SNOWFLAKE_DATABASE": "$(DBT_SNOWFLAKE_DATABASE)", "DBT_SNOWFLAKE_WAREHOUSE": "$(DBT_SNOWFLAKE_WAREHOUSE)", "DBT_SCHEMA": "$(DBT_SCHEMA)"}' - displayName: Test (Snowflake) - - bash: | source dbt-env-sqlserver/bin/activate cd $(dbtProjectPath) dbt build --profiles-dir $(Agent.BuildDirectory)/self/.pipelines --profile default -t sqlserver-ci \ --vars '{"DBT_SQL_SERVER_SERVER": "$(DBT_SQL_SERVER_SERVER)", "DBT_SQL_SERVER_USER": "$(DBT_SQL_SERVER_USER)", "DBT_SQL_SERVER_PASSWORD": "$(DBT_SQL_SERVER_PASSWORD)", "DBT_SQL_SERVER_DATABASE": "$(DBT_SQL_SERVER_DATABASE)", "DBT_SCHEMA": "$(DBT_SCHEMA)"}' displayName: Test (SQL Server) + + - bash: | + source dbt-env/bin/activate + cd $(dbtProjectPath) + dbt build --profiles-dir $(Agent.BuildDirectory)/self/.pipelines --profile default -t snowflake-ci \ + --vars '{"DBT_SNOWFLAKE_ACCOUNT": "$(DBT_SNOWFLAKE_ACCOUNT)", "DBT_SNOWFLAKE_USER": "$(DBT_SNOWFLAKE_USER)", "DBT_SNOWFLAKE_PASSWORD": "$(DBT_SNOWFLAKE_PASSWORD)", "DBT_SNOWFLAKE_ROLE": "$(DBT_SNOWFLAKE_ROLE)", "DBT_SNOWFLAKE_DATABASE": "$(DBT_SNOWFLAKE_DATABASE)", "DBT_SNOWFLAKE_WAREHOUSE": "$(DBT_SNOWFLAKE_WAREHOUSE)", "DBT_SCHEMA": "$(DBT_SCHEMA)"}' + displayName: Test (Snowflake) diff --git a/README.md b/README.md index c438d69..2534bde 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ This dbt package contains macros for SQL functions to run the dbt project on mul - [Post hooks](#Post-hooks) - [create_index](#create_index-source) - [record_count](#record_count-source) +- [union](#union-source) ### SQL generators @@ -407,6 +408,13 @@ Usage: ) }} ``` +#### union ([source](macros/aggregate_functions/union.sql)) +This macro performs a union of two or more relations. The relations can be models or source tables in the dbt project. The macro automatically aligns the columns based on the column names. If a column is missing in one of the relations, the macro adds the column with null values to that relation. The order of the columns in the result is based on the order of the columns in the first relation provided as argument. + +Usage: +`{{ pm_utils.union([ref('Model_A'), source('source_name', 'Table_B'), ref('Model_C')]) }}` + + Variables: - max_records_error - max_records_warning diff --git a/dbt_project.yml b/dbt_project.yml index 7ca573f..7b89340 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: 'pm_utils' -version: '2.4.2' +version: '2.4.0' config-version: 2 require-dbt-version: [">=1.0.0", "<2.0.0"] diff --git a/integration_tests/models/input_table_1.sql b/integration_tests/models/input_table_1.sql deleted file mode 100644 index d6f185d..0000000 --- a/integration_tests/models/input_table_1.sql +++ /dev/null @@ -1,6 +0,0 @@ -{# This creates a table that can be used in tests that require a relation as argument. #} -select - {{ pm_utils.to_integer('1') }} as "RN", - 'A' as "Column_A", - {{ pm_utils.to_boolean('true') }} as "Column_B", - null as "Column_C" diff --git a/integration_tests/models/input_table_2.sql b/integration_tests/models/input_table_2.sql deleted file mode 100644 index a397fb1..0000000 --- a/integration_tests/models/input_table_2.sql +++ /dev/null @@ -1,7 +0,0 @@ -{# This creates a table that can be used in tests that require a relation as argument. #} -select - {{ pm_utils.to_integer('5') }} as "Column_C", - {{ pm_utils.to_double('3.5') }} as "Column_D", - null as "Column_E", - {{ pm_utils.to_integer('2') }} as "RN", - 'B' as "Column_A" diff --git a/integration_tests/models/input_table_3.sql b/integration_tests/models/input_table_3.sql deleted file mode 100644 index 941d4f4..0000000 --- a/integration_tests/models/input_table_3.sql +++ /dev/null @@ -1,7 +0,0 @@ -{# This creates a table that can be used in tests that require a relation as argument. #} -select - {{ pm_utils.to_integer('3') }} as "RN", - {{ pm_utils.to_boolean('false') }} as "Column_B", - {{ pm_utils.to_integer('7') }} as "Column_C", - {{ pm_utils.to_boolean('false') }} as "Column_E", - null as "Column_F" diff --git a/integration_tests/models/test_union/input_table_1.sql b/integration_tests/models/test_union/input_table_1.sql new file mode 100644 index 0000000..d1b4a8d --- /dev/null +++ b/integration_tests/models/test_union/input_table_1.sql @@ -0,0 +1,6 @@ +{# This creates a table that can be used in test_union. #} +select + 1 as "RN", + 'A' as "Column_A", + {{ pm_utils.to_boolean('true') }} as "Column_B", + null as "Column_C" diff --git a/integration_tests/models/test_union/input_table_2.sql b/integration_tests/models/test_union/input_table_2.sql new file mode 100644 index 0000000..86ea572 --- /dev/null +++ b/integration_tests/models/test_union/input_table_2.sql @@ -0,0 +1,7 @@ +{# This creates a table that can be used in test_union. #} +select + 5 as "Column_C", + 3.5 as "Column_D", + null as "Column_E", + 2 as "RN", + 'B' as "Column_A" diff --git a/integration_tests/models/test_union/input_table_3.sql b/integration_tests/models/test_union/input_table_3.sql new file mode 100644 index 0000000..eb735df --- /dev/null +++ b/integration_tests/models/test_union/input_table_3.sql @@ -0,0 +1,7 @@ +{# This creates a table that can be used in test_union. #} +select + 3 as "RN", + {{ pm_utils.to_boolean('false') }} as "Column_B", + 7 as "Column_C", + {{ pm_utils.to_boolean('false') }} as "Column_E", + null as "Column_F" diff --git a/integration_tests/models/test_union/test_union.sql b/integration_tests/models/test_union/test_union.sql new file mode 100644 index 0000000..62b29d1 --- /dev/null +++ b/integration_tests/models/test_union/test_union.sql @@ -0,0 +1,29 @@ +with Union_Three as ( + {{ pm_utils.union([ref('input_table_1'), ref('input_table_2'), ref('input_table_3')]) }} +), + +Union_Three_Expected as ( + select + 1 as "RN", 'A' as "Column_A_Expected", {{pm_utils.to_boolean('true')}} as "Column_B_Expected", null as "Column_C_Expected", null as "Column_D_Expected", null as "Column_E_Expected", null as "Column_F_Expected" + union all + select 2, 'B', null, 5, 3.5, null, null + union all + select 3, null, {{pm_utils.to_boolean('false')}}, 7, null, {{ pm_utils.to_boolean('false') }}, null +) + +select + Union_Three."Column_A", + Union_Three."Column_B", + Union_Three."Column_C", + Union_Three."Column_D", + Union_Three."Column_E", + Union_Three."Column_F", + Union_Three_Expected."Column_A_Expected", + Union_Three_Expected."Column_B_Expected", + Union_Three_Expected."Column_C_Expected", + Union_Three_Expected."Column_D_Expected", + Union_Three_Expected."Column_E_Expected", + Union_Three_Expected."Column_F_Expected" +from Union_Three +left join Union_Three_Expected + on Union_Three."RN" = Union_Three_Expected."RN" diff --git a/integration_tests/models/test_union_relations.sql b/integration_tests/models/test_union_relations.sql deleted file mode 100644 index a503018..0000000 --- a/integration_tests/models/test_union_relations.sql +++ /dev/null @@ -1,29 +0,0 @@ -with Union_Three as ( - {{ pm_utils.union_relations([ref('input_table_1'), ref('input_table_2'), ref('input_table_3')]) }} -), - -Union_Three_Expected as ( - select - {{ pm_utils.to_integer('1') }} as "RN", 'A' as "Column_A_Expected", {{pm_utils.to_boolean('true')}} as "Column_B_Expected", null as "Column_C_Expected", null as "Column_D_Expected", null as "Column_E_Expected", null as "Column_F_Expected" - union all - select {{ pm_utils.to_integer('2') }}, 'B', null, {{ pm_utils.to_integer('5') }}, {{ pm_utils.to_double('3.5') }}, null, null - union all - select {{ pm_utils.to_integer('3') }}, null, {{pm_utils.to_boolean('false')}}, {{ pm_utils.to_integer('7') }}, null, {{ pm_utils.to_boolean('false') }}, null -) - -select - Union_Three."Column_A", - Union_Three."Column_B", - Union_Three."Column_C", - Union_Three."Column_D", - Union_Three."Column_E", - Union_Three."Column_F", - Union_Three_Expected."Column_A_Expected", - Union_Three_Expected."Column_B_Expected", - Union_Three_Expected."Column_C_Expected", - Union_Three_Expected."Column_D_Expected", - Union_Three_Expected."Column_E_Expected", - Union_Three_Expected."Column_F_Expected" -from Union_Three -left join Union_Three_Expected - on Union_Three."RN" = Union_Three_Expected."RN" diff --git a/macros/aggregate_functions/union_relations.sql b/macros/aggregate_functions/union_relations.sql deleted file mode 100644 index 69cb7e9..0000000 --- a/macros/aggregate_functions/union_relations.sql +++ /dev/null @@ -1,48 +0,0 @@ -{%- macro union_relations(relations) -%} - {# - relations: a list of relation objects or strings (table/view names) that exist in the database. - This macro unions any number of tables, aligning columns by name and filling missing columns with NULLs. - It ensures that columns with the same name are cast to a common type across all relations. - #} - {%- set joint_columns = [] -%} - {%- set column_types = {} -%} - {%- for relation in relations -%} - {%- set cols = adapter.get_columns_in_relation(relation) -%} - {%- for col in cols -%} - {%- if col.name not in joint_columns -%} - {%- do joint_columns.append(col.name) -%} - {%- set _ = column_types.update({col.name: col.dtype}) -%} - {%- else -%} - {#- If a column appears in multiple tables, prefer the first type found -#} - {%- endif -%} - {%- endfor -%} - {%- endfor -%} - - {%- set selects = [] -%} - {%- for relation in relations -%} - {%- set cols = adapter.get_columns_in_relation(relation) -%} - {%- set col_names = cols | map(attribute='name') | list -%} - {%- set col_types = {} -%} - {%- for col in cols -%} - {%- set _ = col_types.update({col.name: col.dtype}) -%} - {%- endfor -%} - {%- set select_parts = [] -%} - {%- for col in joint_columns -%} - {%- set target_type = column_types[col] -%} - {%- if col in col_names -%} - {%- set source_type = col_types[col] -%} - {%- if source_type != target_type -%} - {%- do select_parts.append('CAST("' ~ col ~ '" AS ' ~ target_type ~ ') as "' ~ col ~ '"') -%} - {%- else -%} - {%- do select_parts.append('"' ~ col ~ '"') -%} - {%- endif -%} - {%- else -%} - {%- do select_parts.append('CAST(NULL AS ' ~ target_type ~ ') as "' ~ col ~ '"') -%} - {%- endif -%} - {%- endfor -%} - {%- set select_sql = 'select ' ~ select_parts | join(', ') ~ ' from ' ~ relation -%} - {%- do selects.append(select_sql) -%} - {%- endfor -%} - - {{ selects | join('\nunion all\n') }} -{%- endmacro -%} diff --git a/macros/union.sql b/macros/union.sql new file mode 100644 index 0000000..37bf450 --- /dev/null +++ b/macros/union.sql @@ -0,0 +1,33 @@ +{%- macro union(relations) -%} + {# + relations: a list of relations (model/ source tables) that exist in the database. + This macro unions any number of tables, aligning columns by name and filling missing columns with NULLs. + #} + {%- set joint_columns = [] -%} + {%- for relation in relations -%} + {%- set cols = adapter.get_columns_in_relation(relation) -%} + {%- for col in cols -%} + {%- if col.name not in joint_columns -%} + {%- do joint_columns.append(col.name) -%} + {%- endif -%} + {%- endfor -%} + {%- endfor -%} + + {%- set selects = [] -%} + {%- for relation in relations -%} + {%- set cols = adapter.get_columns_in_relation(relation) -%} + {%- set col_names = cols | map(attribute='name') | list -%} + {%- set select_parts = [] -%} + {%- for col in joint_columns -%} + {%- if col in col_names -%} + {%- do select_parts.append('"' ~ col ~ '"') -%} + {%- else -%} + {%- do select_parts.append('NULL as "' ~ col ~ '"') -%} + {%- endif -%} + {%- endfor -%} + {%- set select_sql = 'select ' ~ select_parts | join(', ') ~ ' from ' ~ relation -%} + {%- do selects.append(select_sql) -%} + {%- endfor -%} + + {{ selects | join('\nunion all\n') }} +{%- endmacro -%} From c156943935cd77611e41b91a5c21d93167845d4e Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Fri, 14 Nov 2025 14:25:08 +0100 Subject: [PATCH 12/13] feat: update input table --- integration_tests/models/test_union/input_table_3.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/models/test_union/input_table_3.sql b/integration_tests/models/test_union/input_table_3.sql index eb735df..769b4cb 100644 --- a/integration_tests/models/test_union/input_table_3.sql +++ b/integration_tests/models/test_union/input_table_3.sql @@ -3,5 +3,5 @@ select 3 as "RN", {{ pm_utils.to_boolean('false') }} as "Column_B", 7 as "Column_C", - {{ pm_utils.to_boolean('false') }} as "Column_E", + 2.3 as "Column_E", null as "Column_F" From 334e8006f3a04607b8e6acf6f96d54834110471c Mon Sep 17 00:00:00 2001 From: Mozhgan Date: Fri, 14 Nov 2025 15:03:43 +0100 Subject: [PATCH 13/13] feat: update documents --- README.md | 16 ++++++++-------- macros/{ => SQL_generators}/union.sql | 0 2 files changed, 8 insertions(+), 8 deletions(-) rename macros/{ => SQL_generators}/union.sql (100%) diff --git a/README.md b/README.md index 2534bde..71d7425 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ This dbt package contains macros for SQL functions to run the dbt project on mul - [optional](#optional-source) - [optional_table](#optional_table-source) - [star](#star-source) + - [union](#union-source) - [Data type cast functions](#Data-type-cast-functions) - [as_varchar](#as_varchar-source) - [to_boolean](#to_boolean-source) @@ -63,7 +64,6 @@ This dbt package contains macros for SQL functions to run the dbt project on mul - [Post hooks](#Post-hooks) - [create_index](#create_index-source) - [record_count](#record_count-source) -- [union](#union-source) ### SQL generators @@ -153,6 +153,13 @@ select from {{ ref('Table_A') }} ``` +#### union ([source](macros/SQL_generators/union.sql)) +This macro performs a union of two or more relations. The relations can be models or source tables in the dbt project. The macro automatically aligns the columns based on the column names. If a column is missing in one of the relations, the macro adds the column with null values to that relation. The order of the columns in the result is based on the order of the columns in the first relation provided as argument. + +Usage: +`{{ pm_utils.union([ref('Model_A'), source('source_name', 'Table_B'), ref('Model_C')]) }}` + + ### Data type cast functions #### as_varchar ([source](macros/data_type_cast_functions/as_varchar.sql)) @@ -408,13 +415,6 @@ Usage: ) }} ``` -#### union ([source](macros/aggregate_functions/union.sql)) -This macro performs a union of two or more relations. The relations can be models or source tables in the dbt project. The macro automatically aligns the columns based on the column names. If a column is missing in one of the relations, the macro adds the column with null values to that relation. The order of the columns in the result is based on the order of the columns in the first relation provided as argument. - -Usage: -`{{ pm_utils.union([ref('Model_A'), source('source_name', 'Table_B'), ref('Model_C')]) }}` - - Variables: - max_records_error - max_records_warning diff --git a/macros/union.sql b/macros/SQL_generators/union.sql similarity index 100% rename from macros/union.sql rename to macros/SQL_generators/union.sql