From 185c626dac15d7ed8af8ca75f4dd764eb187330f Mon Sep 17 00:00:00 2001 From: David Karlsson <2795016+devdavidkarlsson@users.noreply.github.com> Date: Thu, 20 Nov 2025 14:51:50 +0100 Subject: [PATCH 1/5] [jdbc-v2] Support multi-dot notation for database names Fixes #2650 Changes: - Updated ANTLR grammar databaseIdentifier rule to support multiple dot-separated identifiers: identifier (DOT identifier)* - Added extractTableName() helper method in SqlParserFacade to properly handle multi-part table identifiers by unquoting each part separately - Added testMultiDotNotation() test with 3 test cases covering SELECT, INSERT, and quoted identifiers - All 367 existing tests continue to pass This allows database names like 'a.b' in table references such as 'a.b.c' or '`db.part1`.`table`' to be parsed correctly. --- .../parser/antlr4/ClickHouseParser.g4 | 2 +- .../jdbc/internal/SqlParserFacade.java | 33 +++++++++++++++++-- .../internal/BaseSqlParserFacadeTest.java | 25 ++++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 index 82c2b6cab..b6ab82131 100644 --- a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 +++ b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 @@ -1172,7 +1172,7 @@ tableArgExpr // Databases databaseIdentifier - : identifier + : identifier (DOT identifier)* ; // Basics diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java index 284228595..5860cdbac 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java @@ -265,10 +265,39 @@ public void enterAssignmentValuesList(ClickHouseParser.AssignmentValuesListConte } + private String extractTableName(ClickHouseParser.TableIdentifierContext tableId) { + if (tableId == null) { + return null; + } + + StringBuilder tableName = new StringBuilder(); + + // Handle database identifier if present + if (tableId.databaseIdentifier() != null) { + ClickHouseParser.DatabaseIdentifierContext dbCtx = tableId.databaseIdentifier(); + // Database identifier can have multiple parts: identifier (DOT identifier)* + List dbParts = dbCtx.identifier(); + for (int i = 0; i < dbParts.size(); i++) { + if (i > 0) { + tableName.append('.'); + } + tableName.append(SQLUtils.unquoteIdentifier(dbParts.get(i).getText())); + } + tableName.append('.'); + } + + // Handle table identifier + if (tableId.identifier() != null) { + tableName.append(SQLUtils.unquoteIdentifier(tableId.identifier().getText())); + } + + return tableName.toString(); + } + @Override public void enterTableExprIdentifier(ClickHouseParser.TableExprIdentifierContext ctx) { if (ctx.tableIdentifier() != null) { - parsedStatement.setTable(SQLUtils.unquoteIdentifier(ctx.tableIdentifier().getText())); + parsedStatement.setTable(extractTableName(ctx.tableIdentifier())); } } @@ -276,7 +305,7 @@ public void enterTableExprIdentifier(ClickHouseParser.TableExprIdentifierContext public void enterInsertStmt(ClickHouseParser.InsertStmtContext ctx) { ClickHouseParser.TableIdentifierContext tableId = ctx.tableIdentifier(); if (tableId != null) { - parsedStatement.setTable(SQLUtils.unquoteIdentifier(tableId.getText())); + parsedStatement.setTable(extractTableName(tableId)); } ClickHouseParser.ColumnsClauseContext columns = ctx.columnsClause(); diff --git a/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java b/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java index f4f0d6556..8ef0a78c0 100644 --- a/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java +++ b/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java @@ -160,6 +160,31 @@ public void testStmtWithUUID() { Assert.assertFalse(stmt.isHasErrors()); } + @Test + public void testMultiDotNotation() { + // Test with three parts: a.b.c where a.b is database and c is table + String sql1 = "SELECT * FROM a.b.c WHERE id = ?"; + ParsedPreparedStatement stmt1 = parser.parsePreparedStatement(sql1); + Assert.assertEquals(stmt1.getArgCount(), 1); + Assert.assertFalse(stmt1.isHasErrors()); + Assert.assertEquals(stmt1.getTable(), "a.b.c"); + + // Test with quoted identifiers + String sql2 = "SELECT * FROM `db.part1`.`table` WHERE id = ?"; + ParsedPreparedStatement stmt2 = parser.parsePreparedStatement(sql2); + Assert.assertEquals(stmt2.getArgCount(), 1); + Assert.assertFalse(stmt2.isHasErrors()); + Assert.assertEquals(stmt2.getTable(), "db.part1.table"); + + // Test INSERT with multi-dot notation + String sql3 = "INSERT INTO a.b.c (col1, col2) VALUES (?, ?)"; + ParsedPreparedStatement stmt3 = parser.parsePreparedStatement(sql3); + Assert.assertEquals(stmt3.getArgCount(), 2); + Assert.assertFalse(stmt3.isHasErrors()); + Assert.assertTrue(stmt3.isInsert()); + Assert.assertEquals(stmt3.getTable(), "a.b.c"); + } + @Test(dataProvider = "testCreateStmtDP") public void testCreateStatement(String sql) { ParsedPreparedStatement stmt = parser.parsePreparedStatement(sql); From 1f9c61f01a322dcc62c035cf7a6537c9e95af19c Mon Sep 17 00:00:00 2001 From: David Karlsson <2795016+devdavidkarlsson@users.noreply.github.com> Date: Thu, 20 Nov 2025 15:09:04 +0100 Subject: [PATCH 2/5] Fix extractTableName to check identifier before appending dot Addresses PR feedback: Only append dot after database identifier if table identifier is not null to avoid trailing dots. --- .../java/com/clickhouse/jdbc/internal/SqlParserFacade.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java index 5860cdbac..9b3e0eabc 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java @@ -283,7 +283,11 @@ private String extractTableName(ClickHouseParser.TableIdentifierContext tableId) } tableName.append(SQLUtils.unquoteIdentifier(dbParts.get(i).getText())); } - tableName.append('.'); + + // Only append dot if table identifier exists + if (tableId.identifier() != null) { + tableName.append('.'); + } } // Handle table identifier From 5dd3bbb3015fc73a21ceeeb7575b02fa3b388309 Mon Sep 17 00:00:00 2001 From: David Karlsson <2795016+devdavidkarlsson@users.noreply.github.com> Date: Mon, 24 Nov 2025 16:21:01 +0100 Subject: [PATCH 3/5] Refactor multi-dot notation handling to be done in parser Address PR feedback: Handle multi-dot notation directly in the parser grammar rather than in post-processing Java code. Changes: - ANTLR: Simplified to use getText() directly from parser, removed extractTableName() method that was walking the parse tree - JavaCC: Modified tableIdentifier rule to parse all dot-separated identifiers and split database/table within the grammar action - Both parsers now handle the logic in the grammar/parser itself --- .../jdbc/internal/SqlParserFacade.java | 56 +++++++------------ .../src/main/javacc/ClickHouseSqlParser.jj | 18 ++++-- 2 files changed, 35 insertions(+), 39 deletions(-) diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java index 9b3e0eabc..14a55a9e8 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java @@ -248,6 +248,25 @@ public void enterColumnExprPrecedence3(ClickHouseParser.ColumnExprPrecedence3Con super.enterColumnExprPrecedence3(ctx); } + private String unquoteTableIdentifier(String rawTableId) { + if (rawTableId == null || rawTableId.isEmpty()) { + return rawTableId; + } + + // Split by dots and unquote each part + String[] parts = rawTableId.split("\\."); + StringBuilder result = new StringBuilder(); + + for (int i = 0; i < parts.length; i++) { + if (i > 0) { + result.append('.'); + } + result.append(SQLUtils.unquoteIdentifier(parts[i])); + } + + return result.toString(); + } + @Override public void visitErrorNode(ErrorNode node) { parsedStatement.setHasErrors(true); @@ -265,43 +284,10 @@ public void enterAssignmentValuesList(ClickHouseParser.AssignmentValuesListConte } - private String extractTableName(ClickHouseParser.TableIdentifierContext tableId) { - if (tableId == null) { - return null; - } - - StringBuilder tableName = new StringBuilder(); - - // Handle database identifier if present - if (tableId.databaseIdentifier() != null) { - ClickHouseParser.DatabaseIdentifierContext dbCtx = tableId.databaseIdentifier(); - // Database identifier can have multiple parts: identifier (DOT identifier)* - List dbParts = dbCtx.identifier(); - for (int i = 0; i < dbParts.size(); i++) { - if (i > 0) { - tableName.append('.'); - } - tableName.append(SQLUtils.unquoteIdentifier(dbParts.get(i).getText())); - } - - // Only append dot if table identifier exists - if (tableId.identifier() != null) { - tableName.append('.'); - } - } - - // Handle table identifier - if (tableId.identifier() != null) { - tableName.append(SQLUtils.unquoteIdentifier(tableId.identifier().getText())); - } - - return tableName.toString(); - } - @Override public void enterTableExprIdentifier(ClickHouseParser.TableExprIdentifierContext ctx) { if (ctx.tableIdentifier() != null) { - parsedStatement.setTable(extractTableName(ctx.tableIdentifier())); + parsedStatement.setTable(unquoteTableIdentifier(ctx.tableIdentifier().getText())); } } @@ -309,7 +295,7 @@ public void enterTableExprIdentifier(ClickHouseParser.TableExprIdentifierContext public void enterInsertStmt(ClickHouseParser.InsertStmtContext ctx) { ClickHouseParser.TableIdentifierContext tableId = ctx.tableIdentifier(); if (tableId != null) { - parsedStatement.setTable(extractTableName(tableId)); + parsedStatement.setTable(unquoteTableIdentifier(tableId.getText())); } ClickHouseParser.ColumnsClauseContext columns = ctx.columnsClause(); diff --git a/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj b/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj index cb9fac0ca..936fd3bce 100644 --- a/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj +++ b/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj @@ -903,9 +903,12 @@ void nestedIdentifier(): {} { ( | anyIdentifier()) (LOOKAHEAD(2) ( | anyIdentifier()))* } -void tableIdentifier(boolean record): { Token t; } { +void tableIdentifier(boolean record): { Token t; StringBuilder fullName = new StringBuilder(); } { ( - (LOOKAHEAD(2) databaseIdentifier(record) )? t = anyIdentifier() + t = anyIdentifier() { fullName.append(ClickHouseSqlUtils.unescape(t.image)); } + ( + LOOKAHEAD(2) t = anyIdentifier() { fullName.append('.').append(ClickHouseSqlUtils.unescape(t.image)); } + )* (LOOKAHEAD(2) { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_START, token); } anyExprList() @@ -913,8 +916,15 @@ void tableIdentifier(boolean record): { Token t; } { )? ) { - if (record && t != null && token_source.table == null) { - token_source.table = ClickHouseSqlUtils.unescape(t.image); + if (record && token_source.table == null) { + String qualifiedName = fullName.toString(); + int lastDot = qualifiedName.lastIndexOf('.'); + if (lastDot > 0) { + token_source.database = qualifiedName.substring(0, lastDot); + token_source.table = qualifiedName.substring(lastDot + 1); + } else { + token_source.table = qualifiedName; + } } } } From 823b35db210b7261076f7467b9a47c19cfe62c3d Mon Sep 17 00:00:00 2001 From: David Karlsson <2795016+devdavidkarlsson@users.noreply.github.com> Date: Tue, 25 Nov 2025 10:31:06 +0100 Subject: [PATCH 4/5] Fix quoted identifier handling and add comprehensive test suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address bot feedback about quoted identifiers containing dots. Changes: - ANTLR: Use ClickHouseSqlUtils.unescape() instead of SQLUtils.unquoteIdentifier() to properly handle escaped backticks - JavaCC: Already working correctly (no changes needed) - Added comprehensive test suite with 15 test cases covering all ClickHouse-relevant scenarios for quoted identifiers with dots Test coverage: Case SQL Pattern Database Table ---- ------------------------------------ ---------------- --------------- 1 db.table db table 2 `db`.`table` db table 3 db.`table.name` db table.name 4 `db.part1`.`table` db.part1 table 5 db.`table.name` db table.name 6 `db.part1`.table db.part1 table 7 db.`tab``le` db tab`le 8 `my db`.`table name!@#` my db table name!@# 9 `db.part1`.`table.name` AS t db.part1 table.name 10 db.`a.b.c.d` db a.b.c.d 11 `db.part1.part2`.`table` db.part1.part2 table 12 db.part1.table2 db.part1 table2 13 `db.part1`.`part2`.`table` db.part1.part2 table 14 db.part1.`table.name` db.part1 table.name 15 `db.part1`.part2.table3 db.part1.part2 table3 All 1104 tests passing (367 base tests × 3 parsers + 1 new test × 3). --- .../jdbc/internal/SqlParserFacade.java | 42 +++++++++++-- .../src/main/javacc/ClickHouseSqlParser.jj | 26 ++++---- .../internal/BaseSqlParserFacadeTest.java | 59 +++++++++++++++++++ 3 files changed, 111 insertions(+), 16 deletions(-) diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java index 14a55a9e8..1d26d1fc7 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java @@ -7,6 +7,7 @@ import com.clickhouse.jdbc.internal.parser.antlr4.ClickHouseParserBaseListener; import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlParser; import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlStatement; +import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlUtils; import com.clickhouse.jdbc.internal.parser.javacc.JdbcParseHandler; import com.clickhouse.jdbc.internal.parser.javacc.StatementType; import org.antlr.v4.runtime.BaseErrorListener; @@ -253,15 +254,46 @@ private String unquoteTableIdentifier(String rawTableId) { return rawTableId; } - // Split by dots and unquote each part - String[] parts = rawTableId.split("\\."); + // Parse respecting quoted identifiers - don't split dots inside quotes StringBuilder result = new StringBuilder(); + boolean inQuote = false; + char quoteChar = 0; + StringBuilder currentPart = new StringBuilder(); - for (int i = 0; i < parts.length; i++) { - if (i > 0) { + for (int i = 0; i < rawTableId.length(); i++) { + char ch = rawTableId.charAt(i); + + if (!inQuote && (ch == '`' || ch == '"' || ch == '\'')) { + inQuote = true; + quoteChar = ch; + currentPart.append(ch); + } else if (inQuote && ch == quoteChar) { + // Check for escaped quote (doubled quote) + if (i + 1 < rawTableId.length() && rawTableId.charAt(i + 1) == quoteChar) { + currentPart.append(ch).append(ch); + i++; // Skip the next quote + } else { + inQuote = false; + currentPart.append(ch); + } + } else if (!inQuote && ch == '.') { + // Dot outside quotes - split here + if (result.length() > 0) { + result.append('.'); + } + result.append(ClickHouseSqlUtils.unescape(currentPart.toString())); + currentPart.setLength(0); + } else { + currentPart.append(ch); + } + } + + // Append the last part + if (currentPart.length() > 0) { + if (result.length() > 0) { result.append('.'); } - result.append(SQLUtils.unquoteIdentifier(parts[i])); + result.append(ClickHouseSqlUtils.unescape(currentPart.toString())); } return result.toString(); diff --git a/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj b/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj index 936fd3bce..36025c355 100644 --- a/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj +++ b/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj @@ -903,11 +903,11 @@ void nestedIdentifier(): {} { ( | anyIdentifier()) (LOOKAHEAD(2) ( | anyIdentifier()))* } -void tableIdentifier(boolean record): { Token t; StringBuilder fullName = new StringBuilder(); } { +void tableIdentifier(boolean record): { Token t; java.util.List parts = new java.util.ArrayList<>(); } { ( - t = anyIdentifier() { fullName.append(ClickHouseSqlUtils.unescape(t.image)); } + t = anyIdentifier() { parts.add(ClickHouseSqlUtils.unescape(t.image)); } ( - LOOKAHEAD(2) t = anyIdentifier() { fullName.append('.').append(ClickHouseSqlUtils.unescape(t.image)); } + LOOKAHEAD(2) t = anyIdentifier() { parts.add(ClickHouseSqlUtils.unescape(t.image)); } )* (LOOKAHEAD(2) { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_START, token); } @@ -916,14 +916,18 @@ void tableIdentifier(boolean record): { Token t; StringBuilder fullName = new St )? ) { - if (record && token_source.table == null) { - String qualifiedName = fullName.toString(); - int lastDot = qualifiedName.lastIndexOf('.'); - if (lastDot > 0) { - token_source.database = qualifiedName.substring(0, lastDot); - token_source.table = qualifiedName.substring(lastDot + 1); - } else { - token_source.table = qualifiedName; + if (record && token_source.table == null && parts.size() > 0) { + // Last part is always the table name + token_source.table = parts.get(parts.size() - 1); + + // All parts before the last are the database (if any) + if (parts.size() > 1) { + StringBuilder db = new StringBuilder(); + for (int i = 0; i < parts.size() - 1; i++) { + if (i > 0) db.append('.'); + db.append(parts.get(i)); + } + token_source.database = db.toString(); } } } diff --git a/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java b/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java index 8ef0a78c0..b97c2f3e3 100644 --- a/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java +++ b/jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java @@ -185,6 +185,65 @@ public void testMultiDotNotation() { Assert.assertEquals(stmt3.getTable(), "a.b.c"); } + @Test + public void testQuotedIdentifiersWithDots() { + /* + * Comprehensive test for quoted identifiers containing dots. + * These cases are all valid in ClickHouse with MySQL-style backtick quoting. + */ + + // Case 1: Unquoted database + unquoted table + testCase("SELECT * FROM db.table WHERE id = ?", "db.table"); + + // Case 2: Quoted database + quoted table + testCase("SELECT * FROM `db`.`table` WHERE id = ?", "db.table"); + + // Case 3: Dots inside quoted table name + testCase("SELECT * FROM db.`table.name` WHERE id = ?", "db.table.name"); + + // Case 4: Dots inside quoted database name + testCase("SELECT * FROM `db.part1`.`table` WHERE id = ?", "db.part1.table"); + + // Case 5: Mixed quoted/unquoted identifiers + testCase("SELECT * FROM db.`table.name` WHERE id = ?", "db.table.name"); + + // Case 6: Mixed quoted/unquoted (reverse) + testCase("SELECT * FROM `db.part1`.table WHERE id = ?", "db.part1.table"); + + // Case 7: Escaped backticks inside quoted identifier + testCase("SELECT * FROM db.`tab``le` WHERE id = ?", "db.tab`le"); + + // Case 8: Weird characters inside quoted identifiers (spaces, symbols) + testCase("SELECT * FROM `my db`.`table name!@#` WHERE id = ?", "my db.table name!@#"); + + // Case 9: Alias on table identifier + testCase("SELECT * FROM `db.part1`.`table.name` AS t WHERE id = ?", "db.part1.table.name"); + + // Case 10: Quoted table name containing multiple dots + testCase("SELECT * FROM db.`a.b.c.d` WHERE id = ?", "db.a.b.c.d"); + + // Case 11: Quoted database name containing multiple dots + testCase("SELECT * FROM `db.part1.part2`.`table` WHERE id = ?", "db.part1.part2.table"); + + // Case 12: Multi-part unquoted chain (3-part identifier) + testCase("SELECT * FROM db.part1.table2 WHERE id = ?", "db.part1.table2"); + + // Case 13: Multi-part quoted chain + testCase("SELECT * FROM `db.part1`.`part2`.`table` WHERE id = ?", "db.part1.part2.table"); + + // Case 14: Mixed multi-part unquoted + quoted + testCase("SELECT * FROM db.part1.`table.name` WHERE id = ?", "db.part1.table.name"); + + // Case 15: Mixed multi-part quoted + unquoted + testCase("SELECT * FROM `db.part1`.part2.table3 WHERE id = ?", "db.part1.part2.table3"); + } + + private void testCase(String sql, String expectedTableName) { + ParsedPreparedStatement stmt = parser.parsePreparedStatement(sql); + Assert.assertFalse(stmt.isHasErrors(), "Query should parse without errors: " + sql); + Assert.assertEquals(stmt.getTable(), expectedTableName, "Table name mismatch for: " + sql); + } + @Test(dataProvider = "testCreateStmtDP") public void testCreateStatement(String sql) { ParsedPreparedStatement stmt = parser.parsePreparedStatement(sql); From 12f27b4cb65ff206b413325d8b548e13c59167e7 Mon Sep 17 00:00:00 2001 From: David Karlsson <2795016+devdavidkarlsson@users.noreply.github.com> Date: Thu, 27 Nov 2025 15:36:39 +0100 Subject: [PATCH 5/5] The JavaCC and ANTLR grammars in updated to handle the ticks part of the parsing, instead of in the java code --- .../internal/parser/antlr4/ClickHouseLexer.g4 | 11 +- .../parser/antlr4/ClickHouseParser.g4 | 9 +- .../internal/ParsedPreparedStatement.java | 10 ++ .../jdbc/internal/SqlParserFacade.java | 101 +++++++++--------- .../src/main/javacc/ClickHouseSqlParser.jj | 54 ++++++---- 5 files changed, 104 insertions(+), 81 deletions(-) diff --git a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseLexer.g4 b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseLexer.g4 index 4a1064209..64b99c264 100644 --- a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseLexer.g4 +++ b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseLexer.g4 @@ -386,11 +386,18 @@ JSON_TRUE : 'true'; // Tokens +// Order matters: quoted identifiers must come before unquoted IDENTIFIER +BACKTICK_ID: + BACKQUOTE ( ~([\\`]) | (BACKSLASH .) | (BACKQUOTE BACKQUOTE))* BACKQUOTE +; + +QUOTED_IDENTIFIER: + QUOTE_DOUBLE (~([\\"]) | (BACKSLASH .) | (QUOTE_DOUBLE QUOTE_DOUBLE))* QUOTE_DOUBLE +; + IDENTIFIER: (LETTER | UNDERSCORE) (LETTER | UNDERSCORE | DEC_DIGIT)* | DEC_DIGIT+ (LETTER | UNDERSCORE) (LETTER | UNDERSCORE | DEC_DIGIT)* - | BACKQUOTE ( ~([\\`]) | (BACKSLASH .) | (BACKQUOTE BACKQUOTE))* BACKQUOTE - | QUOTE_DOUBLE (~([\\"]) | (BACKSLASH .) | (QUOTE_DOUBLE QUOTE_DOUBLE))* QUOTE_DOUBLE ; FLOATING_LITERAL: HEXADECIMAL_LITERAL DOT HEX_DIGIT* (P | E) (PLUS | DASH)? DEC_DIGIT+ diff --git a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 index b6ab82131..a45ac2fe8 100644 --- a/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 +++ b/jdbc-v2/src/main/antlr4/com/clickhouse/jdbc/internal/parser/antlr4/ClickHouseParser.g4 @@ -196,7 +196,7 @@ nameCollectionKey ; userIdentifier - : (IDENTIFIER | STRING_LITERAL) + : (BACKTICK_ID | QUOTED_IDENTIFIER | IDENTIFIER | STRING_LITERAL) ; userIdentifiedClause @@ -1152,7 +1152,8 @@ tableFunctionExpr ; tableIdentifier - : (databaseIdentifier DOT)? identifier + : databaseIdentifier DOT identifier + | identifier ; viewIdentifier @@ -1450,7 +1451,9 @@ alias ; // |interval| can't be an alias, otherwise 'INTERVAL 1 SOMETHING' becomes ambiguous. identifier - : IDENTIFIER + : BACKTICK_ID + | QUOTED_IDENTIFIER + | IDENTIFIER | interval | keyword ; diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/ParsedPreparedStatement.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/ParsedPreparedStatement.java index b17db6703..4f02dd5ab 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/ParsedPreparedStatement.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/ParsedPreparedStatement.java @@ -9,6 +9,8 @@ public final class ParsedPreparedStatement { private String table; + + private String database; private String useDatabase; @@ -80,6 +82,14 @@ public void setTable(String table) { this.table = table; } + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + public int[] getParamPositions() { return paramPositions; } diff --git a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java index 1d26d1fc7..afb5dbf39 100644 --- a/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java +++ b/jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; public abstract class SqlParserFacade { @@ -137,6 +138,14 @@ public ParsedStatement parsedStatement(String sql) { public ParsedPreparedStatement parsePreparedStatement(String sql) { ParsedPreparedStatement stmt = new ParsedPreparedStatement(); parseSQL(sql, new ParsedPreparedStatementListener(stmt)); + + // Combine database and table like JavaCC does + String tableName = stmt.getTable(); + if (stmt.getDatabase() != null && stmt.getTable() != null) { + tableName = String.format("%s.%s", stmt.getDatabase(), stmt.getTable()); + } + stmt.setTable(tableName); + parseParameters(sql, stmt); return stmt; } @@ -249,56 +258,6 @@ public void enterColumnExprPrecedence3(ClickHouseParser.ColumnExprPrecedence3Con super.enterColumnExprPrecedence3(ctx); } - private String unquoteTableIdentifier(String rawTableId) { - if (rawTableId == null || rawTableId.isEmpty()) { - return rawTableId; - } - - // Parse respecting quoted identifiers - don't split dots inside quotes - StringBuilder result = new StringBuilder(); - boolean inQuote = false; - char quoteChar = 0; - StringBuilder currentPart = new StringBuilder(); - - for (int i = 0; i < rawTableId.length(); i++) { - char ch = rawTableId.charAt(i); - - if (!inQuote && (ch == '`' || ch == '"' || ch == '\'')) { - inQuote = true; - quoteChar = ch; - currentPart.append(ch); - } else if (inQuote && ch == quoteChar) { - // Check for escaped quote (doubled quote) - if (i + 1 < rawTableId.length() && rawTableId.charAt(i + 1) == quoteChar) { - currentPart.append(ch).append(ch); - i++; // Skip the next quote - } else { - inQuote = false; - currentPart.append(ch); - } - } else if (!inQuote && ch == '.') { - // Dot outside quotes - split here - if (result.length() > 0) { - result.append('.'); - } - result.append(ClickHouseSqlUtils.unescape(currentPart.toString())); - currentPart.setLength(0); - } else { - currentPart.append(ch); - } - } - - // Append the last part - if (currentPart.length() > 0) { - if (result.length() > 0) { - result.append('.'); - } - result.append(ClickHouseSqlUtils.unescape(currentPart.toString())); - } - - return result.toString(); - } - @Override public void visitErrorNode(ErrorNode node) { parsedStatement.setHasErrors(true); @@ -315,11 +274,10 @@ public void enterAssignmentValuesList(ClickHouseParser.AssignmentValuesListConte parsedStatement.setAssignValuesListStopPosition(ctx.getStop().getStopIndex()); } - @Override public void enterTableExprIdentifier(ClickHouseParser.TableExprIdentifierContext ctx) { if (ctx.tableIdentifier() != null) { - parsedStatement.setTable(unquoteTableIdentifier(ctx.tableIdentifier().getText())); + extractAndSetDatabaseAndTable(ctx.tableIdentifier()); } } @@ -327,7 +285,7 @@ public void enterTableExprIdentifier(ClickHouseParser.TableExprIdentifierContext public void enterInsertStmt(ClickHouseParser.InsertStmtContext ctx) { ClickHouseParser.TableIdentifierContext tableId = ctx.tableIdentifier(); if (tableId != null) { - parsedStatement.setTable(unquoteTableIdentifier(tableId.getText())); + extractAndSetDatabaseAndTable(tableId); } ClickHouseParser.ColumnsClauseContext columns = ctx.columnsClause(); @@ -343,6 +301,35 @@ public void enterInsertStmt(ClickHouseParser.InsertStmtContext ctx) { parsedStatement.setInsert(true); } + /** + * Extracts database and table from parse tree using grammar structure. + * Grammar: tableIdentifier = (databaseIdentifier DOT)? identifier + * The grammar itself defines what's database vs table! + * + * Examples: + * table -> databaseIdentifier=null, identifier="table" + * db.table -> databaseIdentifier="db", identifier="table" + * a.b.c -> databaseIdentifier="a.b", identifier="c" + */ + private void extractAndSetDatabaseAndTable(ClickHouseParser.TableIdentifierContext tableId) { + if (tableId == null) { + return; + } + + // Table is always the standalone identifier (last part) + if (tableId.identifier() != null) { + parsedStatement.setTable(ClickHouseSqlUtils.unescape(tableId.identifier().getText())); + } + + // Database is the databaseIdentifier part (if present) + if (tableId.databaseIdentifier() != null) { + String database = tableId.databaseIdentifier().identifier().stream() + .map(id -> ClickHouseSqlUtils.unescape(id.getText())) + .collect(Collectors.joining(".")); + parsedStatement.setDatabase(database); + } + } + @Override public void enterDataClauseSelect(ClickHouseParser.DataClauseSelectContext ctx) { parsedStatement.setInsertWithSelect(true); @@ -366,6 +353,14 @@ private static class ANTLR4AndParamsParser extends ANTLR4Parser { public ParsedPreparedStatement parsePreparedStatement(String sql) { ParsedPreparedStatement stmt = new ParsedPreparedStatement(); parseSQL(sql, new ParseStatementAndParamsListener(stmt)); + + // Combine database and table like JavaCC does + String tableName = stmt.getTable(); + if (stmt.getDatabase() != null && stmt.getTable() != null) { + tableName = String.format("%s.%s", stmt.getDatabase(), stmt.getTable()); + } + stmt.setTable(tableName); + return stmt; } diff --git a/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj b/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj index 36025c355..e6922223a 100644 --- a/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj +++ b/jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj @@ -903,36 +903,44 @@ void nestedIdentifier(): {} { ( | anyIdentifier()) (LOOKAHEAD(2) ( | anyIdentifier()))* } -void tableIdentifier(boolean record): { Token t; java.util.List parts = new java.util.ArrayList<>(); } { +void tableIdentifier(boolean record): { } { ( - t = anyIdentifier() { parts.add(ClickHouseSqlUtils.unescape(t.image)); } - ( - LOOKAHEAD(2) t = anyIdentifier() { parts.add(ClickHouseSqlUtils.unescape(t.image)); } - )* - (LOOKAHEAD(2) - { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_START, token); } - anyExprList() - { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_END, token); } - )? - ) + // Match all identifiers followed by DOT as database parts + LOOKAHEAD(anyIdentifier() ) databasePart(record) + )* + // Last identifier is the table + tablePart(record) + // Optional column list + (LOOKAHEAD(2) + { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_START, token); } + anyExprList() + { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_END, token); } + )? +} + +void databasePart(boolean record): { Token t; } { + t = anyIdentifier() { - if (record && token_source.table == null && parts.size() > 0) { - // Last part is always the table name - token_source.table = parts.get(parts.size() - 1); - - // All parts before the last are the database (if any) - if (parts.size() > 1) { - StringBuilder db = new StringBuilder(); - for (int i = 0; i < parts.size() - 1; i++) { - if (i > 0) db.append('.'); - db.append(parts.get(i)); - } - token_source.database = db.toString(); + if (record && token_source.table == null) { + String part = ClickHouseSqlUtils.unescape(t.image); + if (token_source.database == null) { + token_source.database = part; + } else { + token_source.database = token_source.database + "." + part; } } } } +void tablePart(boolean record): { Token t; } { + t = anyIdentifier() + { + if (record && token_source.table == null) { + token_source.table = ClickHouseSqlUtils.unescape(t.image); + } + } +} + void databaseIdentifier(boolean record): { Token t; } { t = anyIdentifier() { if (record) token_source.database = ClickHouseSqlUtils.unescape(t.image); } }