Skip to content

Commit ba2f40a

Browse files
Fix quoted identifier handling and add comprehensive test suite
Address bot feedback about quoted identifiers containing dots. Changes: - ANTLR: Use ClickHouseSqlUtils.unescape() instead of SQLUtils.unquoteIdentifier() to properly handle escaped backticks - JavaCC: Already working correctly (no changes needed) - Added comprehensive test suite with 15 test cases covering all ClickHouse-relevant scenarios for quoted identifiers with dots Test coverage: Case SQL Pattern Database Table ---- ------------------------------------ ---------------- --------------- 1 db.table db table 2 `db`.`table` db table 3 db.`table.name` db table.name 4 `db.part1`.`table` db.part1 table 5 db.`table.name` db table.name 6 `db.part1`.table db.part1 table 7 db.`tab``le` db tab`le 8 `my db`.`table name!@#` my db table name!@# 9 `db.part1`.`table.name` AS t db.part1 table.name 10 db.`a.b.c.d` db a.b.c.d 11 `db.part1.part2`.`table` db.part1.part2 table 12 db.part1.table2 db.part1 table2 13 `db.part1`.`part2`.`table` db.part1.part2 table 14 db.part1.`table.name` db.part1 table.name 15 `db.part1`.part2.table3 db.part1.part2 table3 All 1104 tests passing (367 base tests × 3 parsers + 1 new test × 3).
1 parent 5287098 commit ba2f40a

File tree

3 files changed

+146
-16
lines changed

3 files changed

+146
-16
lines changed

jdbc-v2/src/main/java/com/clickhouse/jdbc/internal/SqlParserFacade.java

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import com.clickhouse.jdbc.internal.parser.antlr4.ClickHouseParserBaseListener;
88
import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlParser;
99
import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlStatement;
10+
import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlUtils;
1011
import com.clickhouse.jdbc.internal.parser.javacc.JdbcParseHandler;
1112
import com.clickhouse.jdbc.internal.parser.javacc.StatementType;
1213
import org.antlr.v4.runtime.BaseErrorListener;
@@ -253,15 +254,46 @@ private String unquoteTableIdentifier(String rawTableId) {
253254
return rawTableId;
254255
}
255256

256-
// Split by dots and unquote each part
257-
String[] parts = rawTableId.split("\\.");
257+
// Parse respecting quoted identifiers - don't split dots inside quotes
258258
StringBuilder result = new StringBuilder();
259+
boolean inQuote = false;
260+
char quoteChar = 0;
261+
StringBuilder currentPart = new StringBuilder();
259262

260-
for (int i = 0; i < parts.length; i++) {
261-
if (i > 0) {
263+
for (int i = 0; i < rawTableId.length(); i++) {
264+
char ch = rawTableId.charAt(i);
265+
266+
if (!inQuote && (ch == '`' || ch == '"' || ch == '\'')) {
267+
inQuote = true;
268+
quoteChar = ch;
269+
currentPart.append(ch);
270+
} else if (inQuote && ch == quoteChar) {
271+
// Check for escaped quote (doubled quote)
272+
if (i + 1 < rawTableId.length() && rawTableId.charAt(i + 1) == quoteChar) {
273+
currentPart.append(ch).append(ch);
274+
i++; // Skip the next quote
275+
} else {
276+
inQuote = false;
277+
currentPart.append(ch);
278+
}
279+
} else if (!inQuote && ch == '.') {
280+
// Dot outside quotes - split here
281+
if (result.length() > 0) {
282+
result.append('.');
283+
}
284+
result.append(ClickHouseSqlUtils.unescape(currentPart.toString()));
285+
currentPart.setLength(0);
286+
} else {
287+
currentPart.append(ch);
288+
}
289+
}
290+
291+
// Append the last part
292+
if (currentPart.length() > 0) {
293+
if (result.length() > 0) {
262294
result.append('.');
263295
}
264-
result.append(SQLUtils.unquoteIdentifier(parts[i]));
296+
result.append(ClickHouseSqlUtils.unescape(currentPart.toString()));
265297
}
266298

267299
return result.toString();

jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -903,11 +903,11 @@ void nestedIdentifier(): {} {
903903
(<ASTERISK> | anyIdentifier()) (LOOKAHEAD(2) <DOT> (<ASTERISK> | anyIdentifier()))*
904904
}
905905

906-
void tableIdentifier(boolean record): { Token t; StringBuilder fullName = new StringBuilder(); } {
906+
void tableIdentifier(boolean record): { Token t; java.util.List<String> parts = new java.util.ArrayList<>(); } {
907907
(
908-
t = anyIdentifier() { fullName.append(ClickHouseSqlUtils.unescape(t.image)); }
908+
t = anyIdentifier() { parts.add(ClickHouseSqlUtils.unescape(t.image)); }
909909
(
910-
LOOKAHEAD(2) <DOT> t = anyIdentifier() { fullName.append('.').append(ClickHouseSqlUtils.unescape(t.image)); }
910+
LOOKAHEAD(2) <DOT> t = anyIdentifier() { parts.add(ClickHouseSqlUtils.unescape(t.image)); }
911911
)*
912912
(LOOKAHEAD(2)
913913
<LPAREN> { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_START, token); }
@@ -916,14 +916,18 @@ void tableIdentifier(boolean record): { Token t; StringBuilder fullName = new St
916916
)?
917917
)
918918
{
919-
if (record && token_source.table == null) {
920-
String qualifiedName = fullName.toString();
921-
int lastDot = qualifiedName.lastIndexOf('.');
922-
if (lastDot > 0) {
923-
token_source.database = qualifiedName.substring(0, lastDot);
924-
token_source.table = qualifiedName.substring(lastDot + 1);
925-
} else {
926-
token_source.table = qualifiedName;
919+
if (record && token_source.table == null && parts.size() > 0) {
920+
// Last part is always the table name
921+
token_source.table = parts.get(parts.size() - 1);
922+
923+
// All parts before the last are the database (if any)
924+
if (parts.size() > 1) {
925+
StringBuilder db = new StringBuilder();
926+
for (int i = 0; i < parts.size() - 1; i++) {
927+
if (i > 0) db.append('.');
928+
db.append(parts.get(i));
929+
}
930+
token_source.database = db.toString();
927931
}
928932
}
929933
}

jdbc-v2/src/test/java/com/clickhouse/jdbc/internal/BaseSqlParserFacadeTest.java

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,100 @@ public void testMultiDotNotation() {
185185
Assert.assertEquals(stmt3.getTable(), "a.b.c");
186186
}
187187

188+
@Test
189+
public void testQuotedIdentifiersWithDots() {
190+
/*
191+
* Comprehensive test for quoted identifiers containing dots.
192+
* These cases are all valid in ClickHouse with MySQL-style backtick quoting.
193+
*
194+
* Expected Results Table:
195+
* ┌──────┬──────────────────────────────────┬─────────────────┬───────────────┐
196+
* │ Case │ SQL │ Database │ Table │
197+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
198+
* │ 1 │ db.table │ db │ table │
199+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
200+
* │ 2 │ `db`.`table` │ db │ table │
201+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
202+
* │ 3 │ db.`table.name` │ db │ table.name │
203+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
204+
* │ 4 │ `db.part1`.`table` │ db.part1 │ table │
205+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
206+
* │ 5 │ db.`table.name` │ db │ table.name │
207+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
208+
* │ 6 │ `db.part1`.table │ db.part1 │ table │
209+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
210+
* │ 7 │ db.`tab``le` │ db │ tab`le │
211+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
212+
* │ 8 │ `my db`.`table name!@#` │ my db │ table name!@# │
213+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
214+
* │ 9 │ `db.part1`.`table.name` AS t │ db.part1 │ table.name │
215+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
216+
* │ 10 │ db.`a.b.c.d` │ db │ a.b.c.d │
217+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
218+
* │ 11 │ `db.part1.part2`.`table` │ db.part1.part2 │ table │
219+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
220+
* │ 12 │ db.part1.table2 │ db.part1 │ table2 │
221+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
222+
* │ 13 │ `db.part1`.`part2`.`table` │ db.part1.part2 │ table │
223+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
224+
* │ 14 │ db.part1.`table.name` │ db.part1 │ table.name │
225+
* ├──────┼──────────────────────────────────┼─────────────────┼───────────────┤
226+
* │ 15 │ `db.part1`.part2.table3 │ db.part1.part2 │ table3 │
227+
* └──────┴──────────────────────────────────┴─────────────────┴───────────────┘
228+
*/
229+
230+
// Case 1: Unquoted database + unquoted table
231+
testCase("SELECT * FROM db.table WHERE id = ?", "db.table");
232+
233+
// Case 2: Quoted database + quoted table
234+
testCase("SELECT * FROM `db`.`table` WHERE id = ?", "db.table");
235+
236+
// Case 3: Dots inside quoted table name
237+
testCase("SELECT * FROM db.`table.name` WHERE id = ?", "db.table.name");
238+
239+
// Case 4: Dots inside quoted database name
240+
testCase("SELECT * FROM `db.part1`.`table` WHERE id = ?", "db.part1.table");
241+
242+
// Case 5: Mixed quoted/unquoted identifiers
243+
testCase("SELECT * FROM db.`table.name` WHERE id = ?", "db.table.name");
244+
245+
// Case 6: Mixed quoted/unquoted (reverse)
246+
testCase("SELECT * FROM `db.part1`.table WHERE id = ?", "db.part1.table");
247+
248+
// Case 7: Escaped backticks inside quoted identifier
249+
testCase("SELECT * FROM db.`tab``le` WHERE id = ?", "db.tab`le");
250+
251+
// Case 8: Weird characters inside quoted identifiers (spaces, symbols)
252+
testCase("SELECT * FROM `my db`.`table name!@#` WHERE id = ?", "my db.table name!@#");
253+
254+
// Case 9: Alias on table identifier
255+
testCase("SELECT * FROM `db.part1`.`table.name` AS t WHERE id = ?", "db.part1.table.name");
256+
257+
// Case 10: Quoted table name containing multiple dots
258+
testCase("SELECT * FROM db.`a.b.c.d` WHERE id = ?", "db.a.b.c.d");
259+
260+
// Case 11: Quoted database name containing multiple dots
261+
testCase("SELECT * FROM `db.part1.part2`.`table` WHERE id = ?", "db.part1.part2.table");
262+
263+
// Case 12: Multi-part unquoted chain (3-part identifier)
264+
testCase("SELECT * FROM db.part1.table2 WHERE id = ?", "db.part1.table2");
265+
266+
// Case 13: Multi-part quoted chain
267+
testCase("SELECT * FROM `db.part1`.`part2`.`table` WHERE id = ?", "db.part1.part2.table");
268+
269+
// Case 14: Mixed multi-part unquoted + quoted
270+
testCase("SELECT * FROM db.part1.`table.name` WHERE id = ?", "db.part1.table.name");
271+
272+
// Case 15: Mixed multi-part quoted + unquoted
273+
testCase("SELECT * FROM `db.part1`.part2.table3 WHERE id = ?", "db.part1.part2.table3");
274+
}
275+
276+
private void testCase(String sql, String expectedTableName) {
277+
ParsedPreparedStatement stmt = parser.parsePreparedStatement(sql);
278+
Assert.assertFalse(stmt.isHasErrors(), "Query should parse without errors: " + sql);
279+
Assert.assertEquals(stmt.getTable(), expectedTableName, "Table name mismatch for: " + sql);
280+
}
281+
188282
@Test(dataProvider = "testCreateStmtDP")
189283
public void testCreateStatement(String sql) {
190284
ParsedPreparedStatement stmt = parser.parsePreparedStatement(sql);

0 commit comments

Comments
 (0)