Skip to content

Commit 5747778

Browse files
author
Dat Nguyen
committed
feat: schema diff more visibility
1 parent 8b7caf5 commit 5747778

File tree

6 files changed

+31
-17
lines changed

6 files changed

+31
-17
lines changed

integration_tests/dbt_project.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,23 @@ on-run-start:
1616
{% endif %}
1717
1818
vars:
19-
data_diff__database: data_diff
20-
data_diff__schema: common
19+
# data_diff__database: data_diff
20+
data_diff__schema: datadiff
2121
# For normal (non-async) mode
2222
data_diff__configured_tables:
2323
- # src_db: data_diff
24-
src_schema: blue_dat
24+
src_schema: dbt_blue
2525
src_table: my_first_dbt_model
2626
# trg_db: data_diff
27-
trg_schema: green_dat
27+
trg_schema: dbt_green
2828
trg_table: my_first_dbt_model
2929
pk: id # id1,id2
3030
include_columns: []
3131
exclude_columns: ["loaded_at"]
3232
# pipe_name: awesome_thread
33-
- src_schema: blue_dat
33+
- src_schema: dbt_blue
3434
src_table: my_second_dbt_model
35-
trg_schema: green_dat
35+
trg_schema: dbt_green
3636
trg_table: my_second_dbt_model
3737
pk: id
3838
include_columns: []

integration_tests/models/example/my_first_dbt_model.sql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,16 @@
1111

1212
with source_data as (
1313

14-
select 1 as id, 'id 1' as id_description
14+
select 1 as id, 'id 1' as id_description, {% if target.name.lower() == 'blue' %} '100' {%else %} 100 {% endif %} as amount
1515
union all
16-
select null as id, 'null' as id_description
16+
select null as id, 'null' as id_description, {% if target.name.lower() == 'blue' %} '100' {%else %} 100 {% endif %} as amount
1717
union all
18-
select 2 as id, 'id 2' as id_description
18+
select 2 as id, 'id 2' as id_description, {% if target.name.lower() == 'blue' %} '100' {%else %} 100 {% endif %} as amount
1919
union all
20-
select 3 as id, {% if target.name.lower() == 'blue' %}'id 3 blue'{% else %}'id 3 green'{% endif %}as id_description
20+
select 3 as id, {% if target.name.lower() == 'blue' %}'id 3 blue'{% else %}'id 3 green'{% endif %}as id_description, {% if target.name.lower() == 'blue' %} '100' {%else %} 100 {% endif %} as amount
2121
{% if target.name.lower() == 'blue' %}
2222
union all
23-
select 4 as id, 'id 4' as id_description
23+
select 4 as id, 'id 4' as id_description, {% if target.name.lower() == 'blue' %} '100' {%else %} 100 {% endif %} as amount
2424
{% endif %}
2525

2626
)

integration_tests/models/example/my_second_dbt_model.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
-- Use the `ref` function to select from other models
33

4-
select *
4+
select id, id_description
55
from {{ ref('my_first_dbt_model') }}
66
where true
77
{# and id is not null #}

macros/sis/diff_helper.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,17 @@
6969
)
7070
7171
select case when r.src_db is null then '🟢' else '🔴' end as result
72-
,concat(r.number_of_exclusive_source, ' (',upper(r.exclusive_source_list),')') as source_not_found
73-
,concat(r.number_of_exclusive_source, ' (',upper(r.exclusive_target_list),')') as target_not_found
74-
,coalesce(1 - r.mutual_columns * 1.0 / r.number_of_columns, 0) as failed_rate
72+
,case
73+
when r.number_of_exclusive_source > 0 then concat(r.number_of_exclusive_source, ' (',upper(r.exclusive_source_list),')')
74+
end as source_not_found
75+
,case
76+
when r.number_of_exclusive_source > 0 then concat(r.number_of_exclusive_source, ' (',upper(r.exclusive_target_list),')')
77+
end as target_not_found
78+
,coalesce(1 - r.mutual_columns * 1.0 / r.number_of_columns, 0) as not_found_rate
79+
,case
80+
when r.number_of_false_datatype_check > 0 then concat(r.number_of_false_datatype_check, ' (',upper(r.false_datatype_check_list),')')
81+
end as data_type_mismatched
82+
,coalesce(r.number_of_false_datatype_check * 1.0 / r.number_of_columns, 0) as mismatched_rate
7583
,concat(
7684
c.src_db,'.',c.src_schema,'.',c.src_table,
7785
' ▶️ ',

models/02_schema_diff/schema_check_summary.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@ select
1616
, diff_run_id
1717
, count(*) as number_of_columns
1818
, sum(case when common_col then 1 else 0 end) as mutual_columns
19+
1920
, sum(case when datatype_check then 0 else 1 end) as number_of_false_datatype_check
21+
, listagg(
22+
case when not datatype_check then column_name end, ', '
23+
) within group (order by column_name) as false_datatype_check_list
2024

2125
, sum(case when is_exclusive_src then 1 else 0 end) as number_of_exclusive_target
2226
, listagg(

models/02_schema_diff/schema_check_summary.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@ models:
2323
description: Total number of columns
2424
- name: mutual_columns
2525
description: Total number of columns existing in both target and source
26-
- name: number_of_exclusive_target
27-
description: Number of columns exist in target only
2826
- name: number_of_false_datatype_check
2927
description: Number of columns which are not matched in the data type
28+
- name: false_datatype_check_list
29+
description: List of columns have not been matched in data type
30+
- name: number_of_exclusive_target
31+
description: Number of columns exist in target only
3032
- name: exclusive_target_list
3133
description: List of columns exist in target only
3234
- name: number_of_exclusive_source

0 commit comments

Comments
 (0)