diff --git a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseLexer.g4 b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseLexer.g4 index 4a1064209..64b99c264 100644 --- a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseLexer.g4 +++ b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseLexer.g4 @@ -386,11 +386,18 @@ JSON_TRUE : 'true'; // Tokens +// Order matters: quoted identifiers must come before unquoted IDENTIFIER +BACKTICK_ID: + BACKQUOTE ( ~([\\`]) | (BACKSLASH .) | (BACKQUOTE BACKQUOTE))* BACKQUOTE +; + +QUOTED_IDENTIFIER: + QUOTE_DOUBLE (~([\\"]) | (BACKSLASH .) | (QUOTE_DOUBLE QUOTE_DOUBLE))* QUOTE_DOUBLE +; + IDENTIFIER: (LETTER | UNDERSCORE) (LETTER | UNDERSCORE | DEC_DIGIT)* | DEC_DIGIT+ (LETTER | UNDERSCORE) (LETTER | UNDERSCORE | DEC_DIGIT)* - | BACKQUOTE ( ~([\\`]) | (BACKSLASH .) | (BACKQUOTE BACKQUOTE))* BACKQUOTE - | QUOTE_DOUBLE (~([\\"]) | (BACKSLASH .) | (QUOTE_DOUBLE QUOTE_DOUBLE))* QUOTE_DOUBLE ; FLOATING_LITERAL: HEXADECIMAL_LITERAL DOT HEX_DIGIT* (P | E) (PLUS | DASH)? DEC_DIGIT+ diff --git a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 index 82c2b6cab..a45ac2fe8 100644 --- a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 +++ b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 @@ -196,7 +196,7 @@ nameCollectionKey ; userIdentifier - : (IDENTIFIER | STRING_LITERAL) + : (BACKTICK_ID | QUOTED_IDENTIFIER | IDENTIFIER | STRING_LITERAL) ; userIdentifiedClause @@ -1152,7 +1152,8 @@ tableFunctionExpr ; tableIdentifier - : (databaseIdentifier DOT)? identifier + : databaseIdentifier DOT identifier + | identifier ; viewIdentifier @@ -1172,7 +1173,7 @@ tableArgExpr // Databases databaseIdentifier - : identifier + : identifier (DOT identifier)* ; // Basics @@ -1450,7 +1451,9 @@ alias ; // |interval| can't be an alias, otherwise 'INTERVAL 1 SOMETHING' becomes ambiguous. identifier - : IDENTIFIER + : BACKTICK_ID + | QUOTED_IDENTIFIER + | IDENTIFIER | interval | keyword ; diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/ParsedPreparedStatement.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/ParsedPreparedStatement.java index b17db6703..4f02dd5ab 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/ParsedPreparedStatement.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/ParsedPreparedStatement.java @@ -9,6 +9,8 @@ public final class ParsedPreparedStatement { private String table; + + private String database; private String useDatabase; @@ -80,6 +82,14 @@ public void setTable(String table) { this.table = table; } + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + public int[] getParamPositions() { return paramPositions; } diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java index 284228595..afb5dbf39 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java @@ -7,6 +7,7 @@ import com.clickhouse.jdbc.internal.parser.antlr4.ClickHouseParserBaseListener; import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlParser; import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlStatement; +import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlUtils; import com.clickhouse.jdbc.internal.parser.javacc.JdbcParseHandler; import com.clickhouse.jdbc.internal.parser.javacc.StatementType; import org.antlr.v4.runtime.BaseErrorListener; @@ -24,6 +25,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; public abstract class SqlParserFacade { @@ -136,6 +138,14 @@ public ParsedStatement parsedStatement(String sql) { public ParsedPreparedStatement parsePreparedStatement(String sql) { ParsedPreparedStatement stmt = new ParsedPreparedStatement(); parseSQL(sql, new ParsedPreparedStatementListener(stmt)); + + // Combine database and table like JavaCC does + String tableName = stmt.getTable(); + if (stmt.getDatabase() != null && stmt.getTable() != null) { + tableName = String.format("%s.%s", stmt.getDatabase(), stmt.getTable()); + } + stmt.setTable(tableName); + parseParameters(sql, stmt); return stmt; } @@ -264,11 +274,10 @@ public void enterAssignmentValuesList(ClickHouseParser.AssignmentValuesListConte parsedStatement.setAssignValuesListStopPosition(ctx.getStop().getStopIndex()); } - @Override public void enterTableExprIdentifier(ClickHouseParser.TableExprIdentifierContext ctx) { if (ctx.tableIdentifier() != null) { - parsedStatement.setTable(SQLUtils.unquoteIdentifier(ctx.tableIdentifier().getText())); + extractAndSetDatabaseAndTable(ctx.tableIdentifier()); } } @@ -276,7 +285,7 @@ public void enterTableExprIdentifier(ClickHouseParser.TableExprIdentifierContext public void enterInsertStmt(ClickHouseParser.InsertStmtContext ctx) { ClickHouseParser.TableIdentifierContext tableId = ctx.tableIdentifier(); if (tableId != null) { - parsedStatement.setTable(SQLUtils.unquoteIdentifier(tableId.getText())); + extractAndSetDatabaseAndTable(tableId); } ClickHouseParser.ColumnsClauseContext columns = ctx.columnsClause(); @@ -292,6 +301,35 @@ public void enterInsertStmt(ClickHouseParser.InsertStmtContext ctx) { parsedStatement.setInsert(true); } + /** + * Extracts database and table from parse tree using grammar structure. + * Grammar: tableIdentifier = (databaseIdentifier DOT)? identifier + * The grammar itself defines what's database vs table! + * + * Examples: + * table -> databaseIdentifier=null, identifier="table" + * db.table -> databaseIdentifier="db", identifier="table" + * a.b.c -> databaseIdentifier="a.b", identifier="c" + */ + private void extractAndSetDatabaseAndTable(ClickHouseParser.TableIdentifierContext tableId) { + if (tableId == null) { + return; + } + + // Table is always the standalone identifier (last part) + if (tableId.identifier() != null) { + parsedStatement.setTable(ClickHouseSqlUtils.unescape(tableId.identifier().getText())); + } + + // Database is the databaseIdentifier part (if present) + if (tableId.databaseIdentifier() != null) { + String database = tableId.databaseIdentifier().identifier().stream() + .map(id -> ClickHouseSqlUtils.unescape(id.getText())) + .collect(Collectors.joining(".")); + parsedStatement.setDatabase(database); + } + } + @Override public void enterDataClauseSelect(ClickHouseParser.DataClauseSelectContext ctx) { parsedStatement.setInsertWithSelect(true); @@ -315,6 +353,14 @@ private static class ANTLR4AndParamsParser extends ANTLR4Parser { public ParsedPreparedStatement parsePreparedStatement(String sql) { ParsedPreparedStatement stmt = new ParsedPreparedStatement(); parseSQL(sql, new ParseStatementAndParamsListener(stmt)); + + // Combine database and table like JavaCC does + String tableName = stmt.getTable(); + if (stmt.getDatabase() != null && stmt.getTable() != null) { + tableName = String.format("%s.%s", stmt.getDatabase(), stmt.getTable()); + } + stmt.setTable(tableName); + return stmt; } diff --git a/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj b/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj index cb9fac0ca..e6922223a 100644 --- a/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj +++ b/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj @@ -903,17 +903,39 @@ void nestedIdentifier(): {} { ( | anyIdentifier()) (LOOKAHEAD(2) ( | anyIdentifier()))* } -void tableIdentifier(boolean record): { Token t; } { +void tableIdentifier(boolean record): { } { ( - (LOOKAHEAD(2) databaseIdentifier(record) )? t = anyIdentifier() - (LOOKAHEAD(2) - { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_START, token); } - anyExprList() - { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_END, token); } - )? - ) + // Match all identifiers followed by DOT as database parts + LOOKAHEAD(anyIdentifier() ) databasePart(record) + )* + // Last identifier is the table + tablePart(record) + // Optional column list + (LOOKAHEAD(2) + { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_START, token); } + anyExprList() + { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_END, token); } + )? +} + +void databasePart(boolean record): { Token t; } { + t = anyIdentifier() + { + if (record && token_source.table == null) { + String part = ClickHouseSqlUtils.unescape(t.image); + if (token_source.database == null) { + token_source.database = part; + } else { + token_source.database = token_source.database + "." + part; + } + } + } +} + +void tablePart(boolean record): { Token t; } { + t = anyIdentifier() { - if (record && t != null && token_source.table == null) { + if (record && token_source.table == null) { token_source.table = ClickHouseSqlUtils.unescape(t.image); } } diff --git a/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java b/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java index f4f0d6556..b97c2f3e3 100644 --- a/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java +++ b/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java @@ -160,6 +160,90 @@ public void testStmtWithUUID() { Assert.assertFalse(stmt.isHasErrors()); } + @Test + public void testMultiDotNotation() { + // Test with three parts: a.b.c where a.b is database and c is table + String sql1 = "SELECT * FROM a.b.c WHERE id = ?"; + ParsedPreparedStatement stmt1 = parser.parsePreparedStatement(sql1); + Assert.assertEquals(stmt1.getArgCount(), 1); + Assert.assertFalse(stmt1.isHasErrors()); + Assert.assertEquals(stmt1.getTable(), "a.b.c"); + + // Test with quoted identifiers + String sql2 = "SELECT * FROM `db.part1`.`table` WHERE id = ?"; + ParsedPreparedStatement stmt2 = parser.parsePreparedStatement(sql2); + Assert.assertEquals(stmt2.getArgCount(), 1); + Assert.assertFalse(stmt2.isHasErrors()); + Assert.assertEquals(stmt2.getTable(), "db.part1.table"); + + // Test INSERT with multi-dot notation + String sql3 = "INSERT INTO a.b.c (col1, col2) VALUES (?, ?)"; + ParsedPreparedStatement stmt3 = parser.parsePreparedStatement(sql3); + Assert.assertEquals(stmt3.getArgCount(), 2); + Assert.assertFalse(stmt3.isHasErrors()); + Assert.assertTrue(stmt3.isInsert()); + Assert.assertEquals(stmt3.getTable(), "a.b.c"); + } + + @Test + public void testQuotedIdentifiersWithDots() { + /* + * Comprehensive test for quoted identifiers containing dots. + * These cases are all valid in ClickHouse with MySQL-style backtick quoting. + */ + + // Case 1: Unquoted database + unquoted table + testCase("SELECT * FROM db.table WHERE id = ?", "db.table"); + + // Case 2: Quoted database + quoted table + testCase("SELECT * FROM `db`.`table` WHERE id = ?", "db.table"); + + // Case 3: Dots inside quoted table name + testCase("SELECT * FROM db.`table.name` WHERE id = ?", "db.table.name"); + + // Case 4: Dots inside quoted database name + testCase("SELECT * FROM `db.part1`.`table` WHERE id = ?", "db.part1.table"); + + // Case 5: Mixed quoted/unquoted identifiers + testCase("SELECT * FROM db.`table.name` WHERE id = ?", "db.table.name"); + + // Case 6: Mixed quoted/unquoted (reverse) + testCase("SELECT * FROM `db.part1`.table WHERE id = ?", "db.part1.table"); + + // Case 7: Escaped backticks inside quoted identifier + testCase("SELECT * FROM db.`tab``le` WHERE id = ?", "db.tab`le"); + + // Case 8: Weird characters inside quoted identifiers (spaces, symbols) + testCase("SELECT * FROM `my db`.`table name!@#` WHERE id = ?", "my db.table name!@#"); + + // Case 9: Alias on table identifier + testCase("SELECT * FROM `db.part1`.`table.name` AS t WHERE id = ?", "db.part1.table.name"); + + // Case 10: Quoted table name containing multiple dots + testCase("SELECT * FROM db.`a.b.c.d` WHERE id = ?", "db.a.b.c.d"); + + // Case 11: Quoted database name containing multiple dots + testCase("SELECT * FROM `db.part1.part2`.`table` WHERE id = ?", "db.part1.part2.table"); + + // Case 12: Multi-part unquoted chain (3-part identifier) + testCase("SELECT * FROM db.part1.table2 WHERE id = ?", "db.part1.table2"); + + // Case 13: Multi-part quoted chain + testCase("SELECT * FROM `db.part1`.`part2`.`table` WHERE id = ?", "db.part1.part2.table"); + + // Case 14: Mixed multi-part unquoted + quoted + testCase("SELECT * FROM db.part1.`table.name` WHERE id = ?", "db.part1.table.name"); + + // Case 15: Mixed multi-part quoted + unquoted + testCase("SELECT * FROM `db.part1`.part2.table3 WHERE id = ?", "db.part1.part2.table3"); + } + + private void testCase(String sql, String expectedTableName) { + ParsedPreparedStatement stmt = parser.parsePreparedStatement(sql); + Assert.assertFalse(stmt.isHasErrors(), "Query should parse without errors: " + sql); + Assert.assertEquals(stmt.getTable(), expectedTableName, "Table name mismatch for: " + sql); + } + @Test(dataProvider = "testCreateStmtDP") public void testCreateStatement(String sql) { ParsedPreparedStatement stmt = parser.parsePreparedStatement(sql);