Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1172,7 +1172,7 @@ tableArgExpr
// Databases

databaseIdentifier
: identifier
: identifier (DOT identifier)*
;

// Basics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.clickhouse.jdbc.internal.parser.antlr4.ClickHouseParserBaseListener;
import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlParser;
import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlStatement;
import com.clickhouse.jdbc.internal.parser.javacc.ClickHouseSqlUtils;
import com.clickhouse.jdbc.internal.parser.javacc.JdbcParseHandler;
import com.clickhouse.jdbc.internal.parser.javacc.StatementType;
import org.antlr.v4.runtime.BaseErrorListener;
Expand Down Expand Up @@ -248,6 +249,56 @@ public void enterColumnExprPrecedence3(ClickHouseParser.ColumnExprPrecedence3Con
super.enterColumnExprPrecedence3(ctx);
}

private String unquoteTableIdentifier(String rawTableId) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

strange - I thought I've left the comment that it should be actually handle by parser.
Because one of the points to have parser is to avoid writing this kind of methods.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. The logic you commented on has been moved into the parsers themselves. In particular, the unquoteTableIdentifier helper was removed, and both the JavaCC and ANTLR grammars were updated to handle quoted identifiers directly (commit 95cce2ae).

In the current code, the listener just consumes the parser structure and applies unescape() to individual identifier tokens, following the same pattern used in enterUseStmt (i.e. getText() + utility), which I hope aligns with the existing style.

if (rawTableId == null || rawTableId.isEmpty()) {
return rawTableId;
}

// Parse respecting quoted identifiers - don't split dots inside quotes
StringBuilder result = new StringBuilder();
boolean inQuote = false;
char quoteChar = 0;
StringBuilder currentPart = new StringBuilder();

for (int i = 0; i < rawTableId.length(); i++) {
char ch = rawTableId.charAt(i);

if (!inQuote && (ch == '`' || ch == '"' || ch == '\'')) {
inQuote = true;
quoteChar = ch;
currentPart.append(ch);
} else if (inQuote && ch == quoteChar) {
// Check for escaped quote (doubled quote)
if (i + 1 < rawTableId.length() && rawTableId.charAt(i + 1) == quoteChar) {
currentPart.append(ch).append(ch);
i++; // Skip the next quote
} else {
inQuote = false;
currentPart.append(ch);
}
} else if (!inQuote && ch == '.') {
// Dot outside quotes - split here
if (result.length() > 0) {
result.append('.');
}
result.append(ClickHouseSqlUtils.unescape(currentPart.toString()));
currentPart.setLength(0);
} else {
currentPart.append(ch);
}
}

// Append the last part
if (currentPart.length() > 0) {
if (result.length() > 0) {
result.append('.');
}
result.append(ClickHouseSqlUtils.unescape(currentPart.toString()));
}

return result.toString();
}

@Override
public void visitErrorNode(ErrorNode node) {
parsedStatement.setHasErrors(true);
Expand All @@ -268,15 +319,15 @@ public void enterAssignmentValuesList(ClickHouseParser.AssignmentValuesListConte
@Override
public void enterTableExprIdentifier(ClickHouseParser.TableExprIdentifierContext ctx) {
if (ctx.tableIdentifier() != null) {
parsedStatement.setTable(SQLUtils.unquoteIdentifier(ctx.tableIdentifier().getText()));
parsedStatement.setTable(unquoteTableIdentifier(ctx.tableIdentifier().getText()));
}
}

@Override
public void enterInsertStmt(ClickHouseParser.InsertStmtContext ctx) {
ClickHouseParser.TableIdentifierContext tableId = ctx.tableIdentifier();
if (tableId != null) {
parsedStatement.setTable(SQLUtils.unquoteIdentifier(tableId.getText()));
parsedStatement.setTable(unquoteTableIdentifier(tableId.getText()));
}

ClickHouseParser.ColumnsClauseContext columns = ctx.columnsClause();
Expand Down
22 changes: 18 additions & 4 deletions jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -903,18 +903,32 @@ void nestedIdentifier(): {} {
(<ASTERISK> | anyIdentifier()) (LOOKAHEAD(2) <DOT> (<ASTERISK> | anyIdentifier()))*
}

void tableIdentifier(boolean record): { Token t; } {
void tableIdentifier(boolean record): { Token t; java.util.List<String> parts = new java.util.ArrayList<>(); } {
(
(LOOKAHEAD(2) databaseIdentifier(record) <DOT>)? t = anyIdentifier()
t = anyIdentifier() { parts.add(ClickHouseSqlUtils.unescape(t.image)); }
(
LOOKAHEAD(2) <DOT> t = anyIdentifier() { parts.add(ClickHouseSqlUtils.unescape(t.image)); }
)*
(LOOKAHEAD(2)
<LPAREN> { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_START, token); }
anyExprList()
<RPAREN> { token_source.addCustomKeywordPosition(ClickHouseSqlStatement.KEYWORD_TABLE_COLUMNS_END, token); }
)?
)
{
if (record && t != null && token_source.table == null) {
token_source.table = ClickHouseSqlUtils.unescape(t.image);
if (record && token_source.table == null && parts.size() > 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why parser cannot do it for us?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Refactored JavaCC to use grammar structure. That is: instead of collecting parts into a List with post-processing, added databasePart/tablePart helper rules (I think consistent with existing *Part rules like formatPart, compressionPart). The grammar now determines what's database vs table via LOOKAHEAD.

// Last part is always the table name
token_source.table = parts.get(parts.size() - 1);

// All parts before the last are the database (if any)
if (parts.size() > 1) {
StringBuilder db = new StringBuilder();
for (int i = 0; i < parts.size() - 1; i++) {
if (i > 0) db.append('.');
db.append(parts.get(i));
}
token_source.database = db.toString();
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,90 @@ public void testStmtWithUUID() {
Assert.assertFalse(stmt.isHasErrors());
}

@Test
public void testMultiDotNotation() {
// Test with three parts: a.b.c where a.b is database and c is table
String sql1 = "SELECT * FROM a.b.c WHERE id = ?";
ParsedPreparedStatement stmt1 = parser.parsePreparedStatement(sql1);
Assert.assertEquals(stmt1.getArgCount(), 1);
Assert.assertFalse(stmt1.isHasErrors());
Assert.assertEquals(stmt1.getTable(), "a.b.c");

// Test with quoted identifiers
String sql2 = "SELECT * FROM `db.part1`.`table` WHERE id = ?";
ParsedPreparedStatement stmt2 = parser.parsePreparedStatement(sql2);
Assert.assertEquals(stmt2.getArgCount(), 1);
Assert.assertFalse(stmt2.isHasErrors());
Assert.assertEquals(stmt2.getTable(), "db.part1.table");

// Test INSERT with multi-dot notation
String sql3 = "INSERT INTO a.b.c (col1, col2) VALUES (?, ?)";
ParsedPreparedStatement stmt3 = parser.parsePreparedStatement(sql3);
Assert.assertEquals(stmt3.getArgCount(), 2);
Assert.assertFalse(stmt3.isHasErrors());
Assert.assertTrue(stmt3.isInsert());
Assert.assertEquals(stmt3.getTable(), "a.b.c");
}

@Test
public void testQuotedIdentifiersWithDots() {
/*
* Comprehensive test for quoted identifiers containing dots.
* These cases are all valid in ClickHouse with MySQL-style backtick quoting.
*/

// Case 1: Unquoted database + unquoted table
testCase("SELECT * FROM db.table WHERE id = ?", "db.table");

// Case 2: Quoted database + quoted table
testCase("SELECT * FROM `db`.`table` WHERE id = ?", "db.table");

// Case 3: Dots inside quoted table name
testCase("SELECT * FROM db.`table.name` WHERE id = ?", "db.table.name");

// Case 4: Dots inside quoted database name
testCase("SELECT * FROM `db.part1`.`table` WHERE id = ?", "db.part1.table");

// Case 5: Mixed quoted/unquoted identifiers
testCase("SELECT * FROM db.`table.name` WHERE id = ?", "db.table.name");

// Case 6: Mixed quoted/unquoted (reverse)
testCase("SELECT * FROM `db.part1`.table WHERE id = ?", "db.part1.table");

// Case 7: Escaped backticks inside quoted identifier
testCase("SELECT * FROM db.`tab``le` WHERE id = ?", "db.tab`le");

// Case 8: Weird characters inside quoted identifiers (spaces, symbols)
testCase("SELECT * FROM `my db`.`table name!@#` WHERE id = ?", "my db.table name!@#");

// Case 9: Alias on table identifier
testCase("SELECT * FROM `db.part1`.`table.name` AS t WHERE id = ?", "db.part1.table.name");

// Case 10: Quoted table name containing multiple dots
testCase("SELECT * FROM db.`a.b.c.d` WHERE id = ?", "db.a.b.c.d");

// Case 11: Quoted database name containing multiple dots
testCase("SELECT * FROM `db.part1.part2`.`table` WHERE id = ?", "db.part1.part2.table");

// Case 12: Multi-part unquoted chain (3-part identifier)
testCase("SELECT * FROM db.part1.table2 WHERE id = ?", "db.part1.table2");

// Case 13: Multi-part quoted chain
testCase("SELECT * FROM `db.part1`.`part2`.`table` WHERE id = ?", "db.part1.part2.table");

// Case 14: Mixed multi-part unquoted + quoted
testCase("SELECT * FROM db.part1.`table.name` WHERE id = ?", "db.part1.table.name");

// Case 15: Mixed multi-part quoted + unquoted
testCase("SELECT * FROM `db.part1`.part2.table3 WHERE id = ?", "db.part1.part2.table3");
}

private void testCase(String sql, String expectedTableName) {
ParsedPreparedStatement stmt = parser.parsePreparedStatement(sql);
Assert.assertFalse(stmt.isHasErrors(), "Query should parse without errors: " + sql);
Assert.assertEquals(stmt.getTable(), expectedTableName, "Table name mismatch for: " + sql);
}

@Test(dataProvider = "testCreateStmtDP")
public void testCreateStatement(String sql) {
ParsedPreparedStatement stmt = parser.parsePreparedStatement(sql);
Expand Down
Loading