From 2810f43a5bf242d9724e49eeb882d57cdb4bb308 Mon Sep 17 00:00:00 2001 From: colinthebomb1 Date: Tue, 21 Oct 2025 20:35:21 -0700 Subject: [PATCH] fix issue with subtree generalization not applying to distinct on --- core/rule_generator.py | 4 ++-- tests/test_rule_generator.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/rule_generator.py b/core/rule_generator.py index 0035b5f..0d3f001 100644 --- a/core/rule_generator.py +++ b/core/rule_generator.py @@ -1472,13 +1472,13 @@ def replaceSubtreesOfASTJson(astJson: Any, path: list, subtree: dict, var: str) # if the subtree is the same as the given subtree # if RuleGenerator.sameSubtree(astJson, subtree): - # special case for 'select' list + # special case for 'select' or 'distinct_on' list # e.g., for {'select': [{'value': 'V001.V002'}, {'value': 'V001.age'}, ...]} # astJson = {'value': 'V001.V002'} # subtree = {'value': 'V001.V002'} # var = 'V005' # we should return {'value': 'V005'} - if len(path) > 0 and path[-1] == 'select' and 'value'in astJson.keys(): + if len(path) > 0 and path[-1] in ['select', 'distinct_on'] and 'value'in astJson.keys(): return {'value': var} # otherwise return var diff --git a/tests/test_rule_generator.py b/tests/test_rule_generator.py index 27c9e33..87f926f 100644 --- a/tests/test_rule_generator.py +++ b/tests/test_rule_generator.py @@ -2556,8 +2556,8 @@ def test_generate_spreadsheet_id_18(): assert type(rule) is dict q0_rule, q1_rule = unify_variable_names(rule['pattern'], rule['rewrite']) - assert StringUtil.strim(RuleGenerator._fingerPrint(q0_rule)) == StringUtil.strim(RuleGenerator._fingerPrint("SELECT DISTINCT ON () , , , COALESCE(., ), <> FROM LEFT JOIN ON <> LEFT JOIN ON <> WHERE <> AND . IN (, , , , , , ) AND <> ORDER BY . DESC")) - assert StringUtil.strim(RuleGenerator._fingerPrint(q1_rule)) == StringUtil.strim(RuleGenerator._fingerPrint("SELECT , , , COALESCE((SELECT . FROM WHERE <> AND <> LIMIT ), ), (SELECT <> FROM WHERE <> AND . IN (, , , , , , ) LIMIT ) FROM WHERE <>")) + assert q0_rule == "SELECT DISTINCT ON () , , , COALESCE(., ), <> FROM LEFT JOIN ON <> LEFT JOIN ON <> WHERE <> AND . IN (, , , , , , ) AND <> ORDER BY . DESC" + assert q1_rule == "SELECT , , , COALESCE((SELECT . FROM WHERE <> AND <> LIMIT ), ), (SELECT <> FROM WHERE <> AND . IN (, , , , , , ) LIMIT ) FROM WHERE <>" def test_generate_spreadsheet_id_20(): q0 = """SELECT * FROM (SELECT * FROM (SELECT NULL FROM EMP) WHERE N IS NULL) WHERE N IS NULL"""