From 7758756a6e01ce1422126d7607a68a561e353fd8 Mon Sep 17 00:00:00 2001 From: yangjiang Date: Mon, 6 Mar 2023 15:27:18 +0800 Subject: [PATCH 1/3] [KYLIN-5473] Use Calcite to parse DDL to create Model --- pom.xml | 16 + .../apache/kylin/common/util/StringUtil.java | 12 + .../percentile/PercentileMeasureType.java | 3 + .../kylin/metadata/model/JoinTableDesc.java | 2 + src/modeling-service/pom.xml | 4 + src/query/pom.xml | 4 + .../apache/kylin/query/util/DDLParser.java | 424 ++++++++++++++++++ .../kylin/query/util/DDLParserTest.java | 247 ++++++++++ 8 files changed, 712 insertions(+) create mode 100644 src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java create mode 100644 src/query/src/test/java/org/apache/kylin/query/util/DDLParserTest.java diff --git a/pom.xml b/pom.xml index 516eae722b9..f6c82dc53a2 100644 --- a/pom.xml +++ b/pom.xml @@ -1840,6 +1840,22 @@ + + + org.apache.calcite + calcite-server + ${calcite.version} + + + com.google.protobuf + protobuf-java + + + com.fasterxml.jackson.core + * + + + org.apache.calcite.avatica avatica-core diff --git a/src/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java b/src/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java index 5776a5895b9..e946ea5ebc3 100644 --- a/src/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java +++ b/src/core-common/src/main/java/org/apache/kylin/common/util/StringUtil.java @@ -213,4 +213,16 @@ public static String[] split(String str, String splitBy) { return str.split(splitBy); } + public static String extractSubStringIgnoreSensitive(String origin, String sub) { + String s1 = origin.toLowerCase(Locale.ROOT); + String s2 = sub.toLowerCase(Locale.ROOT); + int i = s1.indexOf(s2); + if (i != -1) { + return origin.substring(i, i + sub.length()); + } else { + return null; + } + + } + } diff --git a/src/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileMeasureType.java b/src/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileMeasureType.java index 69ae984293a..3c3d5653414 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileMeasureType.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/measure/percentile/PercentileMeasureType.java @@ -38,6 +38,9 @@ public class PercentileMeasureType extends MeasureType { public static final String FUNC_PERCENTILE = "PERCENTILE"; public static final String FUNC_PERCENTILE_APPROX = "PERCENTILE_APPROX"; public static final String DATATYPE_PERCENTILE = "percentile"; + public static final String FUNC_PERCENTILE_100 = "PERCENTILE_100"; + public static final String FUNC_PERCENTILE_1000 = "PERCENTILE_1000"; + public static final String FUNC_PERCENTILE_10000 = "PERCENTILE_10000"; public PercentileMeasureType(String funcName, DataType dataType) { this.dataType = dataType; diff --git a/src/core-metadata/src/main/java/org/apache/kylin/metadata/model/JoinTableDesc.java b/src/core-metadata/src/main/java/org/apache/kylin/metadata/model/JoinTableDesc.java index 169a79cac58..15675de3e9b 100644 --- a/src/core-metadata/src/main/java/org/apache/kylin/metadata/model/JoinTableDesc.java +++ b/src/core-metadata/src/main/java/org/apache/kylin/metadata/model/JoinTableDesc.java @@ -28,9 +28,11 @@ import lombok.Getter; import lombok.Setter; +import lombok.ToString; @Getter @Setter +@ToString @JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE) public class JoinTableDesc implements Serializable { private static final long serialVersionUID = 1L; diff --git a/src/modeling-service/pom.xml b/src/modeling-service/pom.xml index c560e34aaae..a91d23060a0 100644 --- a/src/modeling-service/pom.xml +++ b/src/modeling-service/pom.xml @@ -43,6 +43,10 @@ org.apache.kylin kylin-datasource-service + + org.apache.kylin + kylin-query + org.apache.spark spark-sql_2.12 diff --git a/src/query/pom.xml b/src/query/pom.xml index ca0c7cc2f26..f87ade3e517 100644 --- a/src/query/pom.xml +++ b/src/query/pom.xml @@ -68,6 +68,10 @@ org.postgresql postgresql + + org.apache.calcite + calcite-server + commons-collections diff --git a/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java b/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java new file mode 100644 index 00000000000..bfa808ab7bd --- /dev/null +++ b/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java @@ -0,0 +1,424 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.query.util; + +import static org.apache.kylin.measure.percentile.PercentileMeasureType.FUNC_PERCENTILE; +import static org.apache.kylin.measure.percentile.PercentileMeasureType.FUNC_PERCENTILE_100; +import static org.apache.kylin.measure.percentile.PercentileMeasureType.FUNC_PERCENTILE_1000; +import static org.apache.kylin.measure.percentile.PercentileMeasureType.FUNC_PERCENTILE_10000; +import static org.apache.kylin.measure.percentile.PercentileMeasureType.FUNC_PERCENTILE_APPROX; +import static org.apache.kylin.metadata.model.FunctionDesc.FUNC_COUNT; +import static org.apache.kylin.metadata.model.FunctionDesc.FUNC_MAX; +import static org.apache.kylin.metadata.model.FunctionDesc.FUNC_MIN; +import static org.apache.kylin.metadata.model.FunctionDesc.FUNC_SUM; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.calcite.sql.JoinType; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlJoin; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlNodeList; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.ddl.SqlCreateMaterializedView; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.parser.ddl.ParseException; +import org.apache.calcite.sql.parser.ddl.SqlDdlParserImpl; +import org.apache.kylin.common.util.Pair; +import org.apache.kylin.common.util.StringUtil; +import org.apache.kylin.metadata.model.JoinDesc; +import org.apache.kylin.metadata.model.JoinTableDesc; +import org.apache.kylin.metadata.model.NDataModel; +import org.apache.kylin.query.engine.KECalciteConfig; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; + +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; + +public class DDLParser { + private final SqlParser.Config config; + public static final String FUNC_HLL_COUNT = "HLL_COUNT"; + public static final String FUNC_HLL_COUNT_10 = "HLL_COUNT_10"; + public static final String FUNC_HLL_COUNT_12 = "HLL_COUNT_12"; + public static final String FUNC_HLL_COUNT_14 = "HLL_COUNT_14"; + public static final String FUNC_HLL_COUNT_15 = "HLL_COUNT_15"; + public static final String FUNC_HLL_COUNT_16 = "HLL_COUNT_16"; + public static final String FUNC_BITMAP_COUNT = "BITMAP_COUNT"; + + private static final List SUPPORT_MEASURE_PREFIX = Lists.newArrayList(FUNC_BITMAP_COUNT, FUNC_HLL_COUNT, + FUNC_PERCENTILE, FUNC_SUM, FUNC_MAX, FUNC_MIN, FUNC_COUNT, FUNC_HLL_COUNT_10, FUNC_HLL_COUNT_12, + FUNC_HLL_COUNT_14, FUNC_HLL_COUNT_15, FUNC_HLL_COUNT_16, FUNC_PERCENTILE_APPROX, FUNC_PERCENTILE_100, + FUNC_PERCENTILE_1000, FUNC_PERCENTILE_10000); + + public DDLParser(SqlParser.Config config) { + this.config = config; + } + + public static DDLParser CreateParser(KECalciteConfig connectionConfig) { + SqlParser.Config parserConfig = SqlParser.configBuilder().setQuotedCasing(connectionConfig.quotedCasing()) + .setUnquotedCasing(connectionConfig.unquotedCasing()).setQuoting(connectionConfig.quoting()) + .setIdentifierMaxLength(1024).setConformance(connectionConfig.conformance()) + .setCaseSensitive(connectionConfig.caseSensitive()).setParserFactory(SqlDdlParserImpl.FACTORY).build(); + return new DDLParser(parserConfig); + } + + public DDLParserResult parseSQL(String sql) throws Exception { + SqlCreateMaterializedView sNode = (SqlCreateMaterializedView) SqlParser.create(sql, this.config).parseQuery(); + DDLParserResult result = new DDLParserResult(); + SqlIdentifier identifier = (SqlIdentifier) sNode.getOperandList().get(0); + SqlSelect sqlSelect = (SqlSelect) sNode.getOperandList().get(2); + + parseFromIdentifier(identifier, result, sql); + + parseDimensionsAndMeasures(sqlSelect, result); + if (sqlSelect.getFrom() instanceof SqlJoin) { + SqlJoin from = (SqlJoin) sqlSelect.getFrom(); + parseFromWithJoin(from, result); + } else { + SqlIdentifier from = (SqlIdentifier) sqlSelect.getFrom(); + parseFromWithOutJoin(from, result); + } + + return result; + } + + private void parseFromWithOutJoin(SqlIdentifier fact, DDLParserResult result) throws ParseException { + result.setFactTable(getTableFullName(fact.names)); + result.setJoinTables(Lists.newArrayList()); + } + + private final static String emptyJoinConditionErr = "DDL not support without join condition!"; + private final static String joinTypeErr = "DDL only support InnerJoin or LeftJoin!"; + private final static String joinConditionErr = "DDL only support equal join!"; + + private void checkJoin(SqlJoin sqlJoin) throws ParseException { + if (sqlJoin.getCondition() == null) { + throw new ParseException(emptyJoinConditionErr); + } else { + SqlCall cond = (SqlCall) sqlJoin.getCondition(); + if (cond.toString().contains(">") || cond.toString().contains(">")) { + throw new ParseException(joinConditionErr); + } + } + JoinType joinType = sqlJoin.getJoinType(); + if (joinType != JoinType.INNER && joinType != JoinType.LEFT) { + throw new ParseException(joinTypeErr); + } + } + + private void parseFromWithJoin(SqlJoin sqlJoin, DDLParserResult result) throws ParseException { + checkJoin(sqlJoin); + SqlIdentifier fact; + List lookUp = Lists.newArrayList(); + List joinsDesc = Lists.newArrayList(); + SqlNode left = sqlJoin.getLeft(); + SqlNode right = sqlJoin.getRight(); + if (right != null) { + lookUp.add((SqlIdentifier) right); + JoinDesc joinDesc = getJoinDesc(sqlJoin); + joinsDesc.add(joinDesc); + } + while (left instanceof SqlJoin) { + SqlJoin leftJoin = (SqlJoin) left; + checkJoin(leftJoin); + lookUp.add((SqlIdentifier) leftJoin.getRight()); + JoinDesc joinDesc = getJoinDesc((SqlJoin) left); + joinsDesc.add(joinDesc); + left = leftJoin.getLeft(); + } + fact = (SqlIdentifier) left; + // 1. set factTable + result.setFactTable(getTableFullName(fact.names)); + + if (lookUp.size() != joinsDesc.size()) { + String msg = "Parse join info size fail" + sqlJoin; + throw new ParseException(msg); + } + + // 2. set lookupTable and joinDesc + List joinTableDesc = Lists.newArrayList(); + for (int i = 0; i < lookUp.size(); i++) { + JoinTableDesc jd = new JoinTableDesc(); + SqlIdentifier l = lookUp.get(i); + if (l.names.size() < 2) { + throw new ParseException("In joinCondition table name must be db_name.table_name"); + } + jd.setTable(getTableFullName(l.names)); + // `names` like table_name.col_name + jd.setAlias(l.names.get(1)); + jd.setJoin(joinsDesc.get(i)); + joinTableDesc.add(jd); + } + result.setJoinTables(joinTableDesc); + } + + private void parseDimensionsAndMeasures(SqlSelect sqlSelect, DDLParserResult result) throws ParseException { + SqlNodeList selectList = sqlSelect.getSelectList(); + List dims = Lists.newArrayList(); + List meas = Lists.newArrayList(); + + for (SqlNode node : selectList) { + if (node instanceof SqlIdentifier) { + dims.add((SqlIdentifier) node); + } else if (node instanceof SqlBasicCall) { + meas.add((SqlBasicCall) node); + } else { + throw new ParseException("Unexpected select: ".concat(node.toString())); + } + } + + if (dims.isEmpty()) { + throw new ParseException("In DDL dimensions should not be empty."); + } + parseDimsInner(dims, result); + parseMeasInner(meas, result); + + } + + private void parseMeasInner(List meas, DDLParserResult result) { + List measures = meas.stream().map(m -> { + // 1. set measure type + InnerMeasure measure = new InnerMeasure(); + String measureName = m.getOperator().getName(); + try { + checkMeasure(measureName); + } catch (ParseException e) { + throw new RuntimeException(e); + } + measure.setExpression(getMeasureExprInner(measureName)); + + // 2. set related column + List> parameterValues = Arrays.stream(m.getOperands()).map(operand -> { + Pair pair = new Pair<>(); + pair.setFirst("column"); + try { + pair.setSecond(getColNameWithTable(((SqlIdentifier) operand).names)); + } catch (ParseException e) { + throw new RuntimeException(e); + } + return pair; + }).collect(Collectors.toList()); + measure.setParameterValues(parameterValues); + + // 3. set measure return type + measure.setReturnType(getMeasureTypeInner(measureName)); + return measure; + }).collect(Collectors.toList()); + result.setSimplifiedMeasures(measures); + } + + private void checkMeasure(String measureName) throws ParseException { + String upperCaseName = measureName.toUpperCase(); + boolean res = SUPPORT_MEASURE_PREFIX.stream().anyMatch(str -> str.equals(upperCaseName)); + if (!res) { + throw new ParseException("Measure type not support: " + measureName); + } + } + + private void parseDimsInner(List dims, DDLParserResult result) { + List cols = dims.stream().map(d -> { + NDataModel.NamedColumn col = new NDataModel.NamedColumn(); + try { + col.setAliasDotColumn(getColNameWithTable(d.names)); + col.setName(getColNameWithTable(d.names).replace('.', '_')); + } catch (ParseException e) { + throw new RuntimeException(e); + } + return col; + }).collect(Collectors.toList()); + result.setSimplifiedDimensions(cols); + } + + private void parseFromIdentifier(SqlIdentifier identifier, DDLParserResult result, String sql) throws Exception { + ImmutableList names = identifier.names; + if (names.size() == 2 || names.size() == 4) { + // use `extractSubStringIgnoreSensitive` because project model are case-sensitive. + result.setProjectName(StringUtil.extractSubStringIgnoreSensitive(sql, names.get(0))); + result.setModelName(StringUtil.extractSubStringIgnoreSensitive(sql, names.get(1))); + if (names.size() == 4) { + result.setPartitionColName(names.get(2) + '.' + names.get(3)); + } + } else { + throw new ParseException( + "Identifier should contains project_name, model_name, partition_col_name(optional):" + names); + } + } + + private JoinDesc getJoinDesc(SqlJoin join) throws ParseException { + JoinDesc res = new JoinDesc(); + res.setType(join.getJoinType().toString().toUpperCase()); + List pKeys = Lists.newArrayList(); + List fKeys = Lists.newArrayList(); + //Just get the outer condition + SqlBasicCall condition = (SqlBasicCall) join.getCondition(); + List operands = Arrays.stream(condition.getOperands()).collect(Collectors.toList()); + for (int i = 0; i < operands.size(); i++) { + SqlNode operand = operands.get(i); + if (operand instanceof SqlBasicCall) { + SqlBasicCall call = (SqlBasicCall) operand; + operands.addAll(call.getOperandList()); + } else if (operand instanceof SqlIdentifier) { + SqlIdentifier id = (SqlIdentifier) operand; + String colNameWithTable = getColNameWithTable(id.names); + // col should be alternative + if (pKeys.size() == fKeys.size()) { + fKeys.add(colNameWithTable); + } else { + pKeys.add(colNameWithTable); + } + } + } + res.setPrimaryKey(pKeys.toArray(new String[0])); + res.setForeignKey(fKeys.toArray(new String[0])); + return res; + } + + private String getColNameWithTable(ImmutableList names) throws ParseException { + if (names.size() == 2) { + return names.get(0) + '.' + names.get(1); + } else { + throw new ParseException("colName must be table_name.col_name, got:" + names); + } + } + + private String getTableFullName(ImmutableList names) throws ParseException { + if (names.size() == 2) { + return names.get(0) + '.' + names.get(1); + } else if (names.size() == 1) { + return "DEFAULT" + '.' + names.get(0); + } else { + throw new ParseException("tableName must be db_name.table_name, got:" + names); + } + } + + // default set to 14 + public static final String HLL_COUNT_TYPE = "hllc(14)"; + public static final String HLL_COUNT_TYPE_10 = "hllc(10)"; + public static final String HLL_COUNT_TYPE_12 = "hllc(12)"; + public static final String HLL_COUNT_TYPE_14 = "hllc(14)"; + public static final String HLL_COUNT_TYPE_15 = "hllc(15)"; + public static final String HLL_COUNT_TYPE_16 = "hllc(16)"; + public static final String BITMAP_COUNT_TYPE = "bitmap"; + // default set to 100 + public static final String PERCENTILE_TYPE = "percentile(100)"; + public static final String PERCENTILE_TYPE_100 = "percentile(100)"; + public static final String PERCENTILE_TYPE_1000 = "percentile(1000)"; + public static final String PERCENTILE_TYPE_10000 = "percentile(10000)"; + private static final String COUNT_DISTINCT_EXPR = "COUNT_DISTINCT"; + private static final String PERCENTILE_EXPR = "PERCENTILE_APPROX"; + + // min, max, sum need set to `UNDEFINED`, then check return type in kylin + public static final String UNDEFINED_TYPE = "UNDEFINED"; + + private String getMeasureTypeInner(String measureName) { + switch (measureName) { + case "COUNT": + return "bigint"; + + case FUNC_PERCENTILE: + case FUNC_PERCENTILE_APPROX: + return PERCENTILE_TYPE; + + case FUNC_HLL_COUNT: + return HLL_COUNT_TYPE; + + case FUNC_BITMAP_COUNT: + return BITMAP_COUNT_TYPE; + // Support diff precise hll + case FUNC_HLL_COUNT_10: + return HLL_COUNT_TYPE_10; + case FUNC_HLL_COUNT_12: + return HLL_COUNT_TYPE_12; + case FUNC_HLL_COUNT_14: + return HLL_COUNT_TYPE_14; + case FUNC_HLL_COUNT_15: + return HLL_COUNT_TYPE_15; + case FUNC_HLL_COUNT_16: + return HLL_COUNT_TYPE_16; + // Support diff precise percentile + case FUNC_PERCENTILE_100: + return PERCENTILE_TYPE_100; + case FUNC_PERCENTILE_1000: + return PERCENTILE_TYPE_1000; + case FUNC_PERCENTILE_10000: + return PERCENTILE_TYPE_10000; + default: + return UNDEFINED_TYPE; + } + } + + private String getMeasureExprInner(String measureName) { + switch (measureName) { + case FUNC_PERCENTILE: + case FUNC_PERCENTILE_APPROX: + case FUNC_PERCENTILE_100: + case FUNC_PERCENTILE_1000: + case FUNC_PERCENTILE_10000: + return PERCENTILE_EXPR; + + case FUNC_HLL_COUNT: + case FUNC_BITMAP_COUNT: + case FUNC_HLL_COUNT_10: + case FUNC_HLL_COUNT_12: + case FUNC_HLL_COUNT_14: + case FUNC_HLL_COUNT_15: + case FUNC_HLL_COUNT_16: + return COUNT_DISTINCT_EXPR; + + default: + return measureName.toUpperCase(); + } + } + + @Getter + @Setter + @ToString + public static class DDLParserResult { + String modelName; + String ProjectName; + String partitionColName; + //just col_name + List simplifiedDimensions; + // see InnerMeasure + List simplifiedMeasures; + String factTable; + + // also means lookup tables + List joinTables; + } + + @Getter + @Setter + @ToString + public static class InnerMeasure { + // MIN, MAX, HLL(10) etc + String expression; + // Measure return type like bigInt, Integer, bitmap, hll(10) + String returnType; + // ("column", "db_name.table_name.col_name") + List> parameterValues; + } +} diff --git a/src/query/src/test/java/org/apache/kylin/query/util/DDLParserTest.java b/src/query/src/test/java/org/apache/kylin/query/util/DDLParserTest.java new file mode 100644 index 00000000000..7bd6bdc64d9 --- /dev/null +++ b/src/query/src/test/java/org/apache/kylin/query/util/DDLParserTest.java @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.query.util; + +import static org.mockito.Mockito.mock; + +import java.util.List; + +import org.apache.calcite.sql.parser.ddl.ParseException; +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.metadata.model.JoinTableDesc; +import org.apache.kylin.metadata.model.NDataModel; +import org.apache.kylin.query.engine.KECalciteConfig; +import org.junit.Assert; +import org.junit.Test; + +public class DDLParserTest { + @Test + public void test_multi_join() throws Exception { + + KylinConfig kylinConfig = mock(KylinConfig.class); + KECalciteConfig config = KECalciteConfig.fromKapConfig(kylinConfig); + + String sql1 = "CREATE MATERIALIZED VIEW project.test_model.table1.c1_1 AS\n" + "SELECT table1.c1_1,\n" + + " table2.c2_2,\n" + " MAX(table1.c3),\n" + " hll_count(table1.c4),\n" + + " bitmap_count(table1.c5)\n" + "FROM \n" + " db.table1 JOIN db.table2\n" + + " ON table1.c1_1 = table2.c2_1 and table1.c1_3 = table2.c2_3\n" + " JOIN db.table3\n" + + " ON table1.c1_2 = table2.c2_2\n" + "GROUP BY CUBE(table1.c1_1, table2.c2_2)"; + + DDLParser ddlParser = DDLParser.CreateParser(config); + DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); + + Assert.assertEquals(result.getProjectName(), "project"); + Assert.assertEquals(result.getModelName(), "test_model"); + Assert.assertEquals(result.getPartitionColName(), "table1.c1_1".toUpperCase()); + + List simplifiedDimensions = result.getSimplifiedDimensions(); + Assert.assertEquals(simplifiedDimensions.size(), 2); + //Not use id info + Assert.assertEquals(simplifiedDimensions.toString(), + "[NDataModel.NamedColumn(id=0, name=TABLE1_C1_1, aliasDotColumn=TABLE1.C1_1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE2_C2_2, aliasDotColumn=TABLE2.C2_2, status=EXIST)]"); + + List simplifiedMeasures = result.getSimplifiedMeasures(); + Assert.assertEquals(simplifiedMeasures.size(), 3); + Assert.assertEquals(simplifiedMeasures.get(0).toString(), + "DDLParser.InnerMeasure(expression=MAX, returnType=UNDEFINED, parameterValues=[{column,TABLE1.C3}])"); + Assert.assertEquals(simplifiedMeasures.get(1).toString(), + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(14), parameterValues=[{column,TABLE1.C4}])"); + Assert.assertEquals(simplifiedMeasures.get(2).toString(), + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=bitmap, parameterValues=[{column,TABLE1.C5}])"); + + Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); + + List joinTables = result.getJoinTables(); + Assert.assertEquals(joinTables.size(), 2); + Assert.assertEquals(joinTables.get(0).toString(), + "JoinTableDesc(table=DB.TABLE3, kind=LOOKUP, alias=TABLE3, join=JoinDesc [type=INNER, primary_key=[TABLE2.C2_2], foreign_key=[TABLE1.C1_2]], flattenable=null, joinRelationTypeEnum=MANY_TO_ONE, tableRef=null)"); + Assert.assertEquals(joinTables.get(1).toString(), + "JoinTableDesc(table=DB.TABLE2, kind=LOOKUP, alias=TABLE2, join=JoinDesc [type=INNER, primary_key=[TABLE2.C2_1, TABLE2.C2_3], foreign_key=[TABLE1.C1_1, TABLE1.C1_3]], flattenable=null, joinRelationTypeEnum=MANY_TO_ONE, tableRef=null)"); + + } + + @Test + public void test_without_join() throws Exception { + KylinConfig kylinConfig = mock(KylinConfig.class); + KECalciteConfig config = KECalciteConfig.fromKapConfig(kylinConfig); + + String sql1 = "CREATE MATERIALIZED VIEW project.test_model AS\n" + "SELECT table1.c1,\n" + " table1.c2,\n" + + " percentile(table1.c3)\n" + "FROM \n" + " db.table1 \n" + + "GROUP BY CUBE(table1.c1, table1.c2)"; + + DDLParser ddlParser = DDLParser.CreateParser(config); + DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); + + Assert.assertEquals(result.getProjectName(), "project"); + Assert.assertEquals(result.getModelName(), "test_model"); + Assert.assertNull(result.getPartitionColName()); + + List simplifiedDimensions = result.getSimplifiedDimensions(); + Assert.assertEquals(simplifiedDimensions.size(), 2); + Assert.assertEquals(simplifiedDimensions.toString(), + "[NDataModel.NamedColumn(id=0, name=TABLE1_C1, aliasDotColumn=TABLE1.C1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE1_C2, aliasDotColumn=TABLE1.C2, status=EXIST)]"); + + List simplifiedMeasures = result.getSimplifiedMeasures(); + Assert.assertEquals(simplifiedMeasures.size(), 1); + Assert.assertEquals(simplifiedMeasures.get(0).toString(), + "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(100), parameterValues=[{column,TABLE1.C3}])"); + Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); + + List joinTables = result.getJoinTables(); + Assert.assertEquals(joinTables.size(), 0); + + } + + @Test + public void test_one_join() throws Exception { + + KylinConfig kylinConfig = mock(KylinConfig.class); + KECalciteConfig config = KECalciteConfig.fromKapConfig(kylinConfig); + + String sql1 = "CREATE MATERIALIZED VIEW project.test_model.table1.c1_1 AS\n" + "SELECT table1.c1_1,\n" + + " table2.c2_2,\n" + " MAX(table1.c3)\n" + "FROM \n" + " db.table1 JOIN db.table2\n" + + " ON table1.c1_1 = table2.c2_1 and table1.c1_3 = table2.c2_3\n" + + "GROUP BY CUBE(table1.c1_1, table2.c2_2)"; + + DDLParser ddlParser = DDLParser.CreateParser(config); + DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); + + Assert.assertEquals(result.getProjectName(), "project"); + Assert.assertEquals(result.getModelName(), "test_model"); + Assert.assertEquals(result.getPartitionColName(), "table1.c1_1".toUpperCase()); + + List simplifiedDimensions = result.getSimplifiedDimensions(); + Assert.assertEquals(simplifiedDimensions.size(), 2); + //Not use id info + Assert.assertEquals(simplifiedDimensions.toString(), + "[NDataModel.NamedColumn(id=0, name=TABLE1_C1_1, aliasDotColumn=TABLE1.C1_1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE2_C2_2, aliasDotColumn=TABLE2.C2_2, status=EXIST)]"); + + List simplifiedMeasures = result.getSimplifiedMeasures(); + Assert.assertEquals(simplifiedMeasures.size(), 1); + Assert.assertEquals(simplifiedMeasures.get(0).toString(), + "DDLParser.InnerMeasure(expression=MAX, returnType=UNDEFINED, parameterValues=[{column,TABLE1.C3}])"); + + Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); + + List joinTables = result.getJoinTables(); + Assert.assertEquals(joinTables.size(), 1); + Assert.assertEquals(joinTables.get(0).toString(), + "JoinTableDesc(table=DB.TABLE2, kind=LOOKUP, alias=TABLE2, join=JoinDesc [type=INNER, primary_key=[TABLE2.C2_1, TABLE2.C2_3], foreign_key=[TABLE1.C1_1, TABLE1.C1_3]], flattenable=null, joinRelationTypeEnum=MANY_TO_ONE, tableRef=null)"); + } + + @Test + public void model_project_name_case() throws Exception { + + KylinConfig kylinConfig = mock(KylinConfig.class); + KECalciteConfig config = KECalciteConfig.fromKapConfig(kylinConfig); + + String sql1 = "CREATE MATERIALIZED VIEW proJect_Name.tesT_model.table1.c1_1 AS\n" + "SELECT table1.c1_1,\n" + + " table2.c2_2,\n" + " MAX(table1.c3)\n" + "FROM \n" + " db.table1 JOIN db.table2\n" + + " ON table1.c1_1 = table2.c2_1 and table1.c1_3 = table2.c2_3\n" + + "GROUP BY CUBE(table1.c1_1, table2.c2_2)"; + + DDLParser ddlParser = DDLParser.CreateParser(config); + DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); + + Assert.assertEquals(result.getProjectName(), "proJect_Name"); + Assert.assertEquals(result.getModelName(), "tesT_model"); + Assert.assertEquals(result.getPartitionColName(), "table1.c1_1".toUpperCase()); + } + + @Test + public void test_forbidden_join_condition() throws Exception { + KylinConfig kylinConfig = mock(KylinConfig.class); + KECalciteConfig config = KECalciteConfig.fromKapConfig(kylinConfig); + + // 1. no equal join + String sql1 = "CREATE MATERIALIZED VIEW proJect_Name.tesT_model.table1.c1_1 AS\n" + "SELECT table1.c1_1,\n" + + " table2.c2_2,\n" + " MAX(table1.c3)\n" + "FROM \n" + " db.table1 JOIN db.table2\n" + + " ON table1.c1_1 = table2.c2_1 and table1.c1_3 > table2.c2_3\n" + + "GROUP BY CUBE(table1.c1_1, table2.c2_2)"; + + DDLParser ddlParser = DDLParser.CreateParser(config); + Assert.assertThrows(ParseException.class, () -> ddlParser.parseSQL(sql1)); + + // 2. no equal join + String sql2 = "CREATE MATERIALIZED VIEW proJect_Name.tesT_model.table1.c1_1 AS\n" + "SELECT table1.c1_1,\n" + + " table2.c2_2,\n" + " MAX(table1.c3)\n" + "FROM \n" + " db.table1 JOIN db.table2\n" + + " ON table1.c1_1 > table2.c2_1 \n" + "GROUP BY CUBE(table1.c1_1, table2.c2_2)"; + + Assert.assertThrows(ParseException.class, () -> ddlParser.parseSQL(sql2)); + + // 3. no join condition + String sql3 = "CREATE MATERIALIZED VIEW proJect_Name.tesT_model.table1.c1_1 AS\n" + "SELECT table1.c1_1,\n" + + " table2.c2_2,\n" + " MAX(table1.c3)\n" + "FROM \n" + " db.table1 JOIN db.table2\n" + + "GROUP BY CUBE(table1.c1_1, table2.c2_2)"; + Assert.assertThrows(ParseException.class, () -> ddlParser.parseSQL(sql3)); + + // 3. right join + String sql4 = "CREATE MATERIALIZED VIEW proJect_Name.tesT_model.table1.c1_1 AS\n" + "SELECT table1.c1_1,\n" + + " table2.c2_2,\n" + " MAX(table1.c3)\n" + "FROM \n" + " db.table1 RIGHT JOIN db.table2\n" + + " ON table1.c1_1 = table2.c2_1 and table1.c1_3 = table2.c2_3\n" + + "GROUP BY CUBE(table1.c1_1, table2.c2_2)"; + Assert.assertThrows(ParseException.class, () -> ddlParser.parseSQL(sql4)); + } + + @Test + public void test_measure_diff_accuracy_type() throws Exception { + KylinConfig kylinConfig = mock(KylinConfig.class); + KECalciteConfig config = KECalciteConfig.fromKapConfig(kylinConfig); + + String sql1 = "CREATE MATERIALIZED VIEW project.test_model AS\n" + "SELECT table1.c1,\n" + " table1.c2,\n" + + " percentile_100(table1.c3),\n" + " percentile_1000(table1.c3),\n" + + " percentile_10000(table1.c3),\n" + " hll_count_10(table1.c4),\n" + + " hll_count_12(table1.c4),\n" + " hll_count_14(table1.c4),\n" + + " hll_count_15(table1.c4),\n" + " hll_count_16(table1.c4)\n" + "FROM \n" + + " db.table1 \n" + "GROUP BY CUBE(table1.c1, table1.c2)"; + + DDLParser ddlParser = DDLParser.CreateParser(config); + DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); + + List simplifiedDimensions = result.getSimplifiedDimensions(); + Assert.assertEquals(simplifiedDimensions.size(), 2); + Assert.assertEquals(simplifiedDimensions.toString(), + "[NDataModel.NamedColumn(id=0, name=TABLE1_C1, aliasDotColumn=TABLE1.C1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE1_C2, aliasDotColumn=TABLE1.C2, status=EXIST)]"); + + List simplifiedMeasures = result.getSimplifiedMeasures(); + Assert.assertEquals(simplifiedMeasures.size(), 8); + Assert.assertEquals(simplifiedMeasures.get(0).toString(), + "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(100), parameterValues=[{column,TABLE1.C3}])"); + Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); + Assert.assertEquals(simplifiedMeasures.get(1).toString(), + "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(1000), parameterValues=[{column,TABLE1.C3}])"); + Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); + Assert.assertEquals(simplifiedMeasures.get(2).toString(), + "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(10000), parameterValues=[{column,TABLE1.C3}])"); + Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); + + Assert.assertEquals(simplifiedMeasures.get(3).toString(), + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(10), parameterValues=[{column,TABLE1.C4}])"); + Assert.assertEquals(simplifiedMeasures.get(4).toString(), + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(12), parameterValues=[{column,TABLE1.C4}])"); + Assert.assertEquals(simplifiedMeasures.get(5).toString(), + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(14), parameterValues=[{column,TABLE1.C4}])"); + Assert.assertEquals(simplifiedMeasures.get(6).toString(), + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(15), parameterValues=[{column,TABLE1.C4}])"); + Assert.assertEquals(simplifiedMeasures.get(7).toString(), + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(16), parameterValues=[{column,TABLE1.C4}])"); + + } + +} From 5b949619d9278e84f01e3c444fcca57bd29be067 Mon Sep 17 00:00:00 2001 From: yangjiang Date: Mon, 6 Mar 2023 17:29:40 +0800 Subject: [PATCH 2/3] rewrite DDLParserResult to ModelRequest --- .../kylin/rest/service/ModelService.java | 94 +++++++++++++++++++ .../apache/kylin/query/util/DDLParser.java | 2 +- 2 files changed, 95 insertions(+), 1 deletion(-) diff --git a/src/modeling-service/src/main/java/org/apache/kylin/rest/service/ModelService.java b/src/modeling-service/src/main/java/org/apache/kylin/rest/service/ModelService.java index d83c15c7511..a96e7cd8a9b 100644 --- a/src/modeling-service/src/main/java/org/apache/kylin/rest/service/ModelService.java +++ b/src/modeling-service/src/main/java/org/apache/kylin/rest/service/ModelService.java @@ -70,6 +70,7 @@ import static org.apache.kylin.job.execution.JobTypeEnum.INDEX_MERGE; import static org.apache.kylin.job.execution.JobTypeEnum.INDEX_REFRESH; import static org.apache.kylin.metadata.model.FunctionDesc.PARAMETER_TYPE_COLUMN; +import static org.apache.kylin.query.util.DDLParser.UNDEFINED_TYPE; import java.io.IOException; import java.math.BigDecimal; @@ -198,6 +199,8 @@ import org.apache.kylin.metadata.project.ProjectInstance; import org.apache.kylin.metadata.realization.RealizationStatusEnum; import org.apache.kylin.metadata.streaming.KafkaConfig; +import org.apache.kylin.query.engine.KECalciteConfig; +import org.apache.kylin.query.util.DDLParser; import org.apache.kylin.query.util.PushDownUtil; import org.apache.kylin.query.util.QueryParams; import org.apache.kylin.query.util.QueryUtil; @@ -232,6 +235,7 @@ import org.apache.kylin.rest.response.NDataModelResponse; import org.apache.kylin.rest.response.NDataSegmentResponse; import org.apache.kylin.rest.response.NModelDescResponse; +import org.apache.kylin.rest.response.ParameterResponse; import org.apache.kylin.rest.response.PurgeModelAffectedResponse; import org.apache.kylin.rest.response.RefreshAffectedSegmentsResponse; import org.apache.kylin.rest.response.RelatedModelResponse; @@ -331,6 +335,9 @@ public class ModelService extends AbstractModelService implements TableModelSupp @Autowired private IndexPlanService indexPlanService; + @Autowired + private TableService tableService; + @Autowired(required = false) @Qualifier("modelBuildService") private ModelBuildSupporter modelBuildService; @@ -2003,6 +2010,93 @@ public NDataModel createModel(String project, ModelRequest modelRequest) { }, project); } + public NDataModel createModelByDDl(String sql) throws Exception { + KylinConfig config = KylinConfig.getInstanceFromEnv(); + String convertedSql = QueryUtil.normalMassageSql(config, sql, 0, 0); + DDLParser ddlParser = DDLParser.CreateParser(KECalciteConfig.fromKapConfig(config)); + DDLParser.DDLParserResult ddlResult = ddlParser.parseSQL(convertedSql); + String project = ddlResult.getProjectName(); + ModelRequest modelRequest = convertToRequest(ddlResult); + aclEvaluate.checkProjectOperationPermission(modelRequest.getProject()); + + return createModel(project, modelRequest); + } + + private ModelRequest convertToRequest(DDLParser.DDLParserResult ddlResult) { + val request = new ModelRequest(); + request.setProject(ddlResult.getProjectName()); + request.setAlias(ddlResult.getModelName()); + + //join relations + request.setJoinTables(ddlResult.getJoinTables()); + request.setRootFactTableName(ddlResult.getFactTable()); + + // set partitionCol + PartitionDesc desc = new PartitionDesc(); + if (ddlResult.getPartitionColName() != null) { + desc.setPartitionDateColumn(ddlResult.getPartitionColName()); + desc.setPartitionDateFormat(setPartitionColType(ddlResult)); + } + request.setPartitionDesc(desc); + + // set dimensions and measures + request.setSimplifiedDimensions(ddlResult.getSimplifiedDimensions()); + request.setSimplifiedMeasures(convertToSimplifiedMeasure(ddlResult.getProjectName(), + ddlResult.getSimplifiedMeasures(), ddlResult.getFactTable())); + + // Default add base index + request.setWithBaseIndex(true); + return request; + } + + private String setPartitionColType(DDLParser.DDLParserResult ddlResult) { + NTableMetadataManager tableManager = tableService.getManager(NTableMetadataManager.class, + ddlResult.getProjectName()); + ColumnDesc col = tableManager + .getTableDesc(ddlResult.getFactTable().split("\\.")[0] + "." + + ddlResult.getPartitionColName().split("\\.")[0]) + .findColumnByName(ddlResult.getPartitionColName()); + if (col == null) { + throw new KylinException(INVALID_PARAMETER, "Can not find partition col" + ddlResult.getPartitionColName()); + } + if (col.getDatatype().toLowerCase().contains("int")) { + return "yyyyMMdd"; + } else { + return "yyyy-MM-dd"; + } + } + + private List convertToSimplifiedMeasure(String project, + List innerMeasures, String factTable) { + int id = 100000; + List result = Lists.newArrayList(); + NTableMetadataManager tableManager = tableService.getManager(NTableMetadataManager.class, project); + for (DDLParser.InnerMeasure innerMeasure : innerMeasures) { + SimplifiedMeasure simplifiedMeasure = new SimplifiedMeasure(); + simplifiedMeasure.setExpression(innerMeasure.getExpression()); + simplifiedMeasure.setId(id++); + simplifiedMeasure.setParameterValue(innerMeasure.getParameterValues().stream().map(pair -> + // Fist is type, second is colName + new ParameterResponse(pair.getFirst(), pair.getSecond())).collect(Collectors.toList())); + //Must at least have on args + String colNameWithTable = innerMeasure.getParameterValues().get(0).getSecond(); + simplifiedMeasure.setName(colNameWithTable.toUpperCase(Locale.ROOT) + '_' + + innerMeasure.getExpression().toUpperCase(Locale.ROOT)); + if (innerMeasure.getReturnType() != UNDEFINED_TYPE) { + simplifiedMeasure.setReturnType(innerMeasure.getReturnType()); + } else { + // Simple measure like min,max,sum need infer col type + // use tableManager should pass db_name. + String datatype = tableManager + .getTableDesc(factTable.split("\\.")[0] + "." + colNameWithTable.split("\\.")[0]) + .findColumnByName(colNameWithTable).getDatatype(); + simplifiedMeasure.setReturnType(datatype); + } + result.add(simplifiedMeasure); + } + return result; + } + private NDataModel doCheckBeforeModelSave(String project, ModelRequest modelRequest) { checkAliasExist(modelRequest.getUuid(), modelRequest.getAlias(), project); modelRequest.setOwner(AclPermissionUtil.getCurrentUsername()); diff --git a/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java b/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java index bfa808ab7bd..6563aa2af97 100644 --- a/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java +++ b/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java @@ -119,7 +119,7 @@ private void checkJoin(SqlJoin sqlJoin) throws ParseException { throw new ParseException(emptyJoinConditionErr); } else { SqlCall cond = (SqlCall) sqlJoin.getCondition(); - if (cond.toString().contains(">") || cond.toString().contains(">")) { + if (cond.toString().contains(">") || cond.toString().contains("<")) { throw new ParseException(joinConditionErr); } } From 26078b505bee08c1fe4078abacd0903cb073fef1 Mon Sep 17 00:00:00 2001 From: yangjiang Date: Tue, 7 Mar 2023 10:37:05 +0800 Subject: [PATCH 3/3] fix code smell --- .../apache/kylin/query/util/DDLParser.java | 107 ++++++------ .../kylin/query/util/DDLParserTest.java | 156 ++++++++++-------- 2 files changed, 142 insertions(+), 121 deletions(-) diff --git a/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java b/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java index 6563aa2af97..d4bc5bc551e 100644 --- a/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java +++ b/src/query/src/main/java/org/apache/kylin/query/util/DDLParser.java @@ -57,6 +57,7 @@ import lombok.Getter; import lombok.Setter; import lombok.ToString; +import org.apache.kylin.query.exception.UnsupportedQueryException; public class DDLParser { private final SqlParser.Config config; @@ -206,7 +207,7 @@ private void parseMeasInner(List meas, DDLParserResult result) { try { checkMeasure(measureName); } catch (ParseException e) { - throw new RuntimeException(e); + throw new UnsupportedQueryException(e.toString()); } measure.setExpression(getMeasureExprInner(measureName)); @@ -217,7 +218,7 @@ private void parseMeasInner(List meas, DDLParserResult result) { try { pair.setSecond(getColNameWithTable(((SqlIdentifier) operand).names)); } catch (ParseException e) { - throw new RuntimeException(e); + throw new UnsupportedQueryException(e.toString()); } return pair; }).collect(Collectors.toList()); @@ -245,7 +246,7 @@ private void parseDimsInner(List dims, DDLParserResult result) { col.setAliasDotColumn(getColNameWithTable(d.names)); col.setName(getColNameWithTable(d.names).replace('.', '_')); } catch (ParseException e) { - throw new RuntimeException(e); + throw new UnsupportedQueryException(e.toString()); } return col; }).collect(Collectors.toList()); @@ -335,61 +336,61 @@ private String getTableFullName(ImmutableList names) throws ParseExcepti private String getMeasureTypeInner(String measureName) { switch (measureName) { - case "COUNT": - return "bigint"; - - case FUNC_PERCENTILE: - case FUNC_PERCENTILE_APPROX: - return PERCENTILE_TYPE; - - case FUNC_HLL_COUNT: - return HLL_COUNT_TYPE; - - case FUNC_BITMAP_COUNT: - return BITMAP_COUNT_TYPE; - // Support diff precise hll - case FUNC_HLL_COUNT_10: - return HLL_COUNT_TYPE_10; - case FUNC_HLL_COUNT_12: - return HLL_COUNT_TYPE_12; - case FUNC_HLL_COUNT_14: - return HLL_COUNT_TYPE_14; - case FUNC_HLL_COUNT_15: - return HLL_COUNT_TYPE_15; - case FUNC_HLL_COUNT_16: - return HLL_COUNT_TYPE_16; - // Support diff precise percentile - case FUNC_PERCENTILE_100: - return PERCENTILE_TYPE_100; - case FUNC_PERCENTILE_1000: - return PERCENTILE_TYPE_1000; - case FUNC_PERCENTILE_10000: - return PERCENTILE_TYPE_10000; - default: - return UNDEFINED_TYPE; + case "COUNT": + return "bigint"; + + case FUNC_PERCENTILE: + case FUNC_PERCENTILE_APPROX: + return PERCENTILE_TYPE; + + case FUNC_HLL_COUNT: + return HLL_COUNT_TYPE; + + case FUNC_BITMAP_COUNT: + return BITMAP_COUNT_TYPE; + // Support diff precise hll + case FUNC_HLL_COUNT_10: + return HLL_COUNT_TYPE_10; + case FUNC_HLL_COUNT_12: + return HLL_COUNT_TYPE_12; + case FUNC_HLL_COUNT_14: + return HLL_COUNT_TYPE_14; + case FUNC_HLL_COUNT_15: + return HLL_COUNT_TYPE_15; + case FUNC_HLL_COUNT_16: + return HLL_COUNT_TYPE_16; + // Support diff precise percentile + case FUNC_PERCENTILE_100: + return PERCENTILE_TYPE_100; + case FUNC_PERCENTILE_1000: + return PERCENTILE_TYPE_1000; + case FUNC_PERCENTILE_10000: + return PERCENTILE_TYPE_10000; + default: + return UNDEFINED_TYPE; } } private String getMeasureExprInner(String measureName) { switch (measureName) { - case FUNC_PERCENTILE: - case FUNC_PERCENTILE_APPROX: - case FUNC_PERCENTILE_100: - case FUNC_PERCENTILE_1000: - case FUNC_PERCENTILE_10000: - return PERCENTILE_EXPR; - - case FUNC_HLL_COUNT: - case FUNC_BITMAP_COUNT: - case FUNC_HLL_COUNT_10: - case FUNC_HLL_COUNT_12: - case FUNC_HLL_COUNT_14: - case FUNC_HLL_COUNT_15: - case FUNC_HLL_COUNT_16: - return COUNT_DISTINCT_EXPR; - - default: - return measureName.toUpperCase(); + case FUNC_PERCENTILE: + case FUNC_PERCENTILE_APPROX: + case FUNC_PERCENTILE_100: + case FUNC_PERCENTILE_1000: + case FUNC_PERCENTILE_10000: + return PERCENTILE_EXPR; + + case FUNC_HLL_COUNT: + case FUNC_BITMAP_COUNT: + case FUNC_HLL_COUNT_10: + case FUNC_HLL_COUNT_12: + case FUNC_HLL_COUNT_14: + case FUNC_HLL_COUNT_15: + case FUNC_HLL_COUNT_16: + return COUNT_DISTINCT_EXPR; + + default: + return measureName.toUpperCase(); } } diff --git a/src/query/src/test/java/org/apache/kylin/query/util/DDLParserTest.java b/src/query/src/test/java/org/apache/kylin/query/util/DDLParserTest.java index 7bd6bdc64d9..22d7060914b 100644 --- a/src/query/src/test/java/org/apache/kylin/query/util/DDLParserTest.java +++ b/src/query/src/test/java/org/apache/kylin/query/util/DDLParserTest.java @@ -46,33 +46,39 @@ public void test_multi_join() throws Exception { DDLParser ddlParser = DDLParser.CreateParser(config); DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); - Assert.assertEquals(result.getProjectName(), "project"); - Assert.assertEquals(result.getModelName(), "test_model"); - Assert.assertEquals(result.getPartitionColName(), "table1.c1_1".toUpperCase()); + Assert.assertEquals("project", result.getProjectName()); + Assert.assertEquals("test_model", result.getModelName()); + Assert.assertEquals("table1.c1_1".toUpperCase(), result.getPartitionColName()); List simplifiedDimensions = result.getSimplifiedDimensions(); - Assert.assertEquals(simplifiedDimensions.size(), 2); + Assert.assertEquals(2, simplifiedDimensions.size()); //Not use id info - Assert.assertEquals(simplifiedDimensions.toString(), - "[NDataModel.NamedColumn(id=0, name=TABLE1_C1_1, aliasDotColumn=TABLE1.C1_1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE2_C2_2, aliasDotColumn=TABLE2.C2_2, status=EXIST)]"); + Assert.assertEquals( + "[NDataModel.NamedColumn(id=0, name=TABLE1_C1_1, aliasDotColumn=TABLE1.C1_1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE2_C2_2, aliasDotColumn=TABLE2.C2_2, status=EXIST)]", + simplifiedDimensions.toString()); List simplifiedMeasures = result.getSimplifiedMeasures(); - Assert.assertEquals(simplifiedMeasures.size(), 3); - Assert.assertEquals(simplifiedMeasures.get(0).toString(), - "DDLParser.InnerMeasure(expression=MAX, returnType=UNDEFINED, parameterValues=[{column,TABLE1.C3}])"); - Assert.assertEquals(simplifiedMeasures.get(1).toString(), - "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(14), parameterValues=[{column,TABLE1.C4}])"); - Assert.assertEquals(simplifiedMeasures.get(2).toString(), - "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=bitmap, parameterValues=[{column,TABLE1.C5}])"); - - Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); + Assert.assertEquals(3, simplifiedMeasures.size()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=MAX, returnType=UNDEFINED, parameterValues=[{column,TABLE1.C3}])", + simplifiedMeasures.get(0).toString()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(14), parameterValues=[{column,TABLE1.C4}])", + simplifiedMeasures.get(1).toString()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=bitmap, parameterValues=[{column,TABLE1.C5}])", + simplifiedMeasures.get(2).toString()); + + Assert.assertEquals("db.table1".toUpperCase(), result.getFactTable()); List joinTables = result.getJoinTables(); - Assert.assertEquals(joinTables.size(), 2); - Assert.assertEquals(joinTables.get(0).toString(), - "JoinTableDesc(table=DB.TABLE3, kind=LOOKUP, alias=TABLE3, join=JoinDesc [type=INNER, primary_key=[TABLE2.C2_2], foreign_key=[TABLE1.C1_2]], flattenable=null, joinRelationTypeEnum=MANY_TO_ONE, tableRef=null)"); - Assert.assertEquals(joinTables.get(1).toString(), - "JoinTableDesc(table=DB.TABLE2, kind=LOOKUP, alias=TABLE2, join=JoinDesc [type=INNER, primary_key=[TABLE2.C2_1, TABLE2.C2_3], foreign_key=[TABLE1.C1_1, TABLE1.C1_3]], flattenable=null, joinRelationTypeEnum=MANY_TO_ONE, tableRef=null)"); + Assert.assertEquals(2, joinTables.size()); + Assert.assertEquals( + "JoinTableDesc(table=DB.TABLE3, kind=LOOKUP, alias=TABLE3, join=JoinDesc [type=INNER, primary_key=[TABLE2.C2_2], foreign_key=[TABLE1.C1_2]], flattenable=null, joinRelationTypeEnum=MANY_TO_ONE, tableRef=null)", + joinTables.get(0).toString()); + Assert.assertEquals( + "JoinTableDesc(table=DB.TABLE2, kind=LOOKUP, alias=TABLE2, join=JoinDesc [type=INNER, primary_key=[TABLE2.C2_1, TABLE2.C2_3], foreign_key=[TABLE1.C1_1, TABLE1.C1_3]], flattenable=null, joinRelationTypeEnum=MANY_TO_ONE, tableRef=null)", + joinTables.get(1).toString()); } @@ -88,23 +94,25 @@ public void test_without_join() throws Exception { DDLParser ddlParser = DDLParser.CreateParser(config); DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); - Assert.assertEquals(result.getProjectName(), "project"); - Assert.assertEquals(result.getModelName(), "test_model"); + Assert.assertEquals("project", result.getProjectName()); + Assert.assertEquals("test_model", result.getModelName()); Assert.assertNull(result.getPartitionColName()); List simplifiedDimensions = result.getSimplifiedDimensions(); - Assert.assertEquals(simplifiedDimensions.size(), 2); - Assert.assertEquals(simplifiedDimensions.toString(), - "[NDataModel.NamedColumn(id=0, name=TABLE1_C1, aliasDotColumn=TABLE1.C1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE1_C2, aliasDotColumn=TABLE1.C2, status=EXIST)]"); + Assert.assertEquals(2, simplifiedDimensions.size()); + Assert.assertEquals( + "[NDataModel.NamedColumn(id=0, name=TABLE1_C1, aliasDotColumn=TABLE1.C1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE1_C2, aliasDotColumn=TABLE1.C2, status=EXIST)]", + simplifiedDimensions.toString()); List simplifiedMeasures = result.getSimplifiedMeasures(); - Assert.assertEquals(simplifiedMeasures.size(), 1); - Assert.assertEquals(simplifiedMeasures.get(0).toString(), - "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(100), parameterValues=[{column,TABLE1.C3}])"); + Assert.assertEquals(1, simplifiedMeasures.size()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(100), parameterValues=[{column,TABLE1.C3}])", + simplifiedMeasures.get(0).toString()); Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); List joinTables = result.getJoinTables(); - Assert.assertEquals(joinTables.size(), 0); + Assert.assertEquals(0, joinTables.size()); } @@ -122,27 +130,30 @@ public void test_one_join() throws Exception { DDLParser ddlParser = DDLParser.CreateParser(config); DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); - Assert.assertEquals(result.getProjectName(), "project"); - Assert.assertEquals(result.getModelName(), "test_model"); - Assert.assertEquals(result.getPartitionColName(), "table1.c1_1".toUpperCase()); + Assert.assertEquals("project", result.getProjectName()); + Assert.assertEquals("test_model", result.getModelName()); + Assert.assertEquals("table1.c1_1".toUpperCase(), result.getPartitionColName()); List simplifiedDimensions = result.getSimplifiedDimensions(); Assert.assertEquals(simplifiedDimensions.size(), 2); //Not use id info - Assert.assertEquals(simplifiedDimensions.toString(), - "[NDataModel.NamedColumn(id=0, name=TABLE1_C1_1, aliasDotColumn=TABLE1.C1_1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE2_C2_2, aliasDotColumn=TABLE2.C2_2, status=EXIST)]"); + Assert.assertEquals( + "[NDataModel.NamedColumn(id=0, name=TABLE1_C1_1, aliasDotColumn=TABLE1.C1_1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE2_C2_2, aliasDotColumn=TABLE2.C2_2, status=EXIST)]", + simplifiedDimensions.toString()); List simplifiedMeasures = result.getSimplifiedMeasures(); - Assert.assertEquals(simplifiedMeasures.size(), 1); - Assert.assertEquals(simplifiedMeasures.get(0).toString(), - "DDLParser.InnerMeasure(expression=MAX, returnType=UNDEFINED, parameterValues=[{column,TABLE1.C3}])"); + Assert.assertEquals(1, simplifiedMeasures.size()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=MAX, returnType=UNDEFINED, parameterValues=[{column,TABLE1.C3}])", + simplifiedMeasures.get(0).toString()); - Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); + Assert.assertEquals("db.table1".toUpperCase(), result.getFactTable()); List joinTables = result.getJoinTables(); - Assert.assertEquals(joinTables.size(), 1); - Assert.assertEquals(joinTables.get(0).toString(), - "JoinTableDesc(table=DB.TABLE2, kind=LOOKUP, alias=TABLE2, join=JoinDesc [type=INNER, primary_key=[TABLE2.C2_1, TABLE2.C2_3], foreign_key=[TABLE1.C1_1, TABLE1.C1_3]], flattenable=null, joinRelationTypeEnum=MANY_TO_ONE, tableRef=null)"); + Assert.assertEquals(1, joinTables.size()); + Assert.assertEquals( + "JoinTableDesc(table=DB.TABLE2, kind=LOOKUP, alias=TABLE2, join=JoinDesc [type=INNER, primary_key=[TABLE2.C2_1, TABLE2.C2_3], foreign_key=[TABLE1.C1_1, TABLE1.C1_3]], flattenable=null, joinRelationTypeEnum=MANY_TO_ONE, tableRef=null)", + joinTables.get(0).toString()); } @Test @@ -159,9 +170,9 @@ public void model_project_name_case() throws Exception { DDLParser ddlParser = DDLParser.CreateParser(config); DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); - Assert.assertEquals(result.getProjectName(), "proJect_Name"); - Assert.assertEquals(result.getModelName(), "tesT_model"); - Assert.assertEquals(result.getPartitionColName(), "table1.c1_1".toUpperCase()); + Assert.assertEquals("proJect_Name", result.getProjectName()); + Assert.assertEquals("tesT_model", result.getModelName()); + Assert.assertEquals("table1.c1_1".toUpperCase(), result.getPartitionColName()); } @Test @@ -215,32 +226,41 @@ public void test_measure_diff_accuracy_type() throws Exception { DDLParser.DDLParserResult result = ddlParser.parseSQL(sql1); List simplifiedDimensions = result.getSimplifiedDimensions(); - Assert.assertEquals(simplifiedDimensions.size(), 2); - Assert.assertEquals(simplifiedDimensions.toString(), - "[NDataModel.NamedColumn(id=0, name=TABLE1_C1, aliasDotColumn=TABLE1.C1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE1_C2, aliasDotColumn=TABLE1.C2, status=EXIST)]"); + Assert.assertEquals(2, simplifiedDimensions.size()); + Assert.assertEquals( + "[NDataModel.NamedColumn(id=0, name=TABLE1_C1, aliasDotColumn=TABLE1.C1, status=EXIST), NDataModel.NamedColumn(id=0, name=TABLE1_C2, aliasDotColumn=TABLE1.C2, status=EXIST)]", + simplifiedDimensions.toString()); List simplifiedMeasures = result.getSimplifiedMeasures(); - Assert.assertEquals(simplifiedMeasures.size(), 8); - Assert.assertEquals(simplifiedMeasures.get(0).toString(), - "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(100), parameterValues=[{column,TABLE1.C3}])"); - Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); - Assert.assertEquals(simplifiedMeasures.get(1).toString(), - "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(1000), parameterValues=[{column,TABLE1.C3}])"); - Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); - Assert.assertEquals(simplifiedMeasures.get(2).toString(), - "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(10000), parameterValues=[{column,TABLE1.C3}])"); - Assert.assertEquals(result.getFactTable(), "db.table1".toUpperCase()); - - Assert.assertEquals(simplifiedMeasures.get(3).toString(), - "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(10), parameterValues=[{column,TABLE1.C4}])"); - Assert.assertEquals(simplifiedMeasures.get(4).toString(), - "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(12), parameterValues=[{column,TABLE1.C4}])"); - Assert.assertEquals(simplifiedMeasures.get(5).toString(), - "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(14), parameterValues=[{column,TABLE1.C4}])"); - Assert.assertEquals(simplifiedMeasures.get(6).toString(), - "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(15), parameterValues=[{column,TABLE1.C4}])"); - Assert.assertEquals(simplifiedMeasures.get(7).toString(), - "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(16), parameterValues=[{column,TABLE1.C4}])"); + Assert.assertEquals(8, simplifiedMeasures.size()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(100), parameterValues=[{column,TABLE1.C3}])", + simplifiedMeasures.get(0).toString()); + Assert.assertEquals("db.table1".toUpperCase(), result.getFactTable()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(1000), parameterValues=[{column,TABLE1.C3}])", + simplifiedMeasures.get(1).toString()); + Assert.assertEquals("db.table1".toUpperCase(), result.getFactTable()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=PERCENTILE_APPROX, returnType=percentile(10000), parameterValues=[{column,TABLE1.C3}])", + simplifiedMeasures.get(2).toString()); + Assert.assertEquals("db.table1".toUpperCase(), result.getFactTable()); + + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(10), parameterValues=[{column,TABLE1.C4}])", + simplifiedMeasures.get(3).toString()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(12), parameterValues=[{column,TABLE1.C4}])", + simplifiedMeasures.get(4).toString()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(14), parameterValues=[{column,TABLE1.C4}])", + simplifiedMeasures.get(5).toString()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(15), parameterValues=[{column,TABLE1.C4}])", + simplifiedMeasures.get(6).toString()); + Assert.assertEquals( + "DDLParser.InnerMeasure(expression=COUNT_DISTINCT, returnType=hllc(16), parameterValues=[{column,TABLE1.C4}])", + simplifiedMeasures.get(7).toString()); }