Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,6 +1,80 @@
package com.clickhouse.jdbc.internal.parser.javacc;

import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Set;

public final class ClickHouseSqlUtils {
public static final String KEYWORD_GROUP_ALLOWED_ALIASES = "allowed_keyword_aliases";

private static final Set<String> ALLOWED_KEYWORD_ALIASES = initAllowedKeywordAliases();

private static Set<String> initAllowedKeywordAliases() {
return buildKeywordSet(
"ACCESS", "ACTION", "ADD", "ADMIN", "AFTER", "ALGORITHM", "ALIAS", "ALLOWED_LATENESS", "ALTER",
"AND", "APPEND", "APPLY", "ASC", "ASCENDING", "ASSUME", "AST", "ASYNC", "ATTACH",
"AUTHENTICATION", "AUTO_INCREMENT", "AZURE", "BACKUP", "BAGEXPANSION", "BASE_BACKUP",
"BCRYPT_HASH", "BCRYPT_PASSWORD", "BEGIN", "BIDIRECTIONAL", "BOTH", "BY", "CACHE", "CACHES",
"CASCADE", "CASE", "CAST", "CHANGE", "CHANGEABLE_IN_READONLY", "CHANGED", "CHAR", "CHARACTER",
"CHECK", "CLEANUP", "CLEAR", "CLONE", "CLUSTER", "CLUSTERS", "CLUSTER_HOST_IDS", "CN", "CODEC",
"COLLATE", "COLLECTION", "COLUMN", "COLUMNS", "COMMENT", "COMMIT", "COMPRESSION", "CONNECTIONS",
"CONST", "CONSTRAINT", "COPY", "CREATE", "CUBE", "CURRENT", "CURRENT_USER", "CURRENTUSER",
"D", "DATA", "DATABASE", "DATABASES", "DATE", "DAY", "DAYS", "DD", "DDL", "DEALLOCATE",
"DEDUPLICATE", "DEFAULT", "DEFINER", "DELAY", "DELETE", "DELETED", "DEPENDS", "DESC",
"DESCENDING", "DESCRIBE", "DETACH", "DETACHED", "DICTIONARIES", "DICTIONARY", "DISK", "DISTINCT",
"DIV", "DOUBLE_SHA1_HASH", "DOUBLE_SHA1_PASSWORD", "DROP", "EMPTY", "ENABLED", "END", "ENFORCED",
"ENGINE", "ENGINES", "EPHEMERAL", "ESTIMATE", "EVENT", "EVENTS", "EVERY", "EXCHANGE", "EXECUTE",
"EXISTS", "EXPLAIN", "EXPRESSION", "EXTENDED", "EXTERNAL", "FAKE", "FALSE", "FETCH", "FIELDS",
"FILE", "FILES", "FILESYSTEM", "FILL", "FILTER", "FIRST", "FOLLOWING", "FOR", "FORCE", "FOREIGN",
"FORGET", "FREEZE", "FULLTEXT", "FUNCTION", "FUNCTIONS", "GRANT", "GRANTEES", "GRANTS",
"GRANULARITY", "GROUPING", "GROUPS", "H", "HASH", "HDFS", "HH", "HIERARCHICAL", "HOST", "HOUR",
"HOURS", "HTTP", "ID", "IDENTIFIED", "IF", "IGNORE", "IMPLICIT", "IN", "INDEX", "INDEXES",
"INDICES", "INFILE", "INHERIT", "INJECTIVE", "INSERT", "INTERPOLATE", "INTERVAL", "INVISIBLE",
"INVOKER", "IP", "IS", "IS_OBJECT_ID", "JWT", "KERBEROS", "KEY", "KEYED", "KEYS", "KILL", "KIND",
"LARGE", "LAST", "LAYOUT", "LDAP", "LEADING", "LESS", "LEVEL", "LIFETIME", "LIGHTWEIGHT",
"LIMITS", "LINEAR", "LIST", "LIVE", "LOCAL", "M", "MASK", "MASKING", "MASTER", "MATCH",
"MATERIALIZE", "MATERIALIZED", "MAX", "MCS", "MEMORY", "MERGES", "METHODS", "METRICS", "MI",
"MICROSECOND", "MICROSECONDS", "MILLISECOND", "MILLISECONDS", "MIN", "MINUTE", "MINUTES", "MM",
"MOD", "MODIFY", "MONTH", "MONTHS", "MOVE", "MS", "MUTATION", "N", "NAME", "NAMED", "NANOSECOND",
"NANOSECONDS", "NEW", "NEXT", "NO", "NO_AUTHENTICATION", "NONE", "NO_PASSWORD", "NS", "NULL",
"NULLS", "OBJECT", "OPTIMIZE", "OPTION", "OR", "OUTER", "OUTFILE", "OVER", "OVERRIDABLE",
"OVERRIDE", "PART", "PARTIAL", "PARTITION", "PARTITIONS", "PART_MOVE_TO_SHARD", "PARTS",
"PATCHES", "PAUSE", "PERIODIC", "PERMANENTLY", "PERMISSIVE", "PERSISTENT", "PIPELINE", "PLAN",
"PLAINTEXT_PASSWORD", "POLICY", "POPULATE", "PRECEDING", "PRECISION", "PREFIX", "PREPARE",
"PRIMARY", "PRIORITY", "PRIVILEGES", "PROCESSLIST", "PROFILE", "PROFILES", "PROJECTION",
"PROTOBUF", "PULL", "Q", "QQ", "QUARTER", "QUARTERS", "QUERY", "QUOTA", "RANDOMIZE",
"RANDOMIZED", "RANGE", "READ", "READONLY", "REALM", "RECOMPRESS", "RECURSIVE", "REFERENCES",
"REFRESH", "REGEXP", "REMOVE", "RENAME", "REPLACE", "REPLICATED", "RESET", "RESOURCE", "RESPECT",
"RESTORE", "RESTRICT", "RESTRICTIVE", "RESUME", "REVOKE", "REWRITE", "ROLE", "ROLES", "ROLLBACK",
"ROLLUP", "ROW", "ROWS", "S", "S3", "SALT", "SAN", "SCHEME", "SCRAM_SHA256_HASH",
"SCRAM_SHA256_PASSWORD", "SECOND", "SECONDS", "SECURITY", "SELECT", "SEQUENTIAL", "SERVER",
"SET", "SETS", "SETTING", "SHA256_HASH", "SHA256_PASSWORD", "SHARD", "SHOW", "SIGNED", "SIMPLE",
"SKIP", "SNAPSHOT", "SOURCE", "SPATIAL", "SQL", "SQL_TSI_DAY", "SQL_TSI_HOUR",
"SQL_TSI_MICROSECOND", "SQL_TSI_MILLISECOND", "SQL_TSI_MINUTE", "SQL_TSI_MONTH",
"SQL_TSI_NANOSECOND", "SQL_TSI_QUARTER", "SQL_TSI_SECOND", "SQL_TSI_WEEK", "SQL_TSI_YEAR", "SS",
"SSH_KEY", "SSL_CERTIFICATE", "STALENESS", "START", "STATISTIC", "STATISTICS", "STDOUT", "STEP",
"STORAGE", "STRICT", "STRICTLY_ASCENDING", "SUBPARTITION", "SUBPARTITIONS", "SUSPEND", "SYNC",
"SYNTAX", "SYSTEM", "TABLE", "TABLES", "TAG", "TAGS", "TEMPORARY", "TEST", "THAN", "THEN",
"THREAD", "TIES", "TIME", "TIMESTAMP", "TO", "TOP", "TOTALS", "TRACKING", "TRAILING",
"TRANSACTION", "TREE", "TRIGGER", "TRUE", "TRUNCATE", "TTL", "TYPE", "TYPEOF", "UNBOUNDED",
"UNDROP", "UNFREEZE", "UNIQUE", "UNLOCK", "UNSET", "UNSIGNED", "UNTIL", "UPDATE", "URL", "USE",
"USER", "VALID", "VALUES", "VARYING", "VIEW", "VISIBLE", "VOLUME", "WATCH", "WATERMARK", "WEEK",
"WEEKS", "WHEN", "WITH_ITEMINDEX", "WK", "WORKER", "WORKLOAD", "WRITABLE", "WRITE", "WW",
"YEAR", "YEARS", "YY", "YYYY", "ZKPATH");
}

private static Set<String> buildKeywordSet(String... values) {
LinkedHashSet<String> keywords = new LinkedHashSet<>();
if (values != null) {
Collections.addAll(keywords, values);
}
return Collections.unmodifiableSet(keywords);
}

public static Set<String> getKeywordGroup(String groupName) {
return KEYWORD_GROUP_ALLOWED_ALIASES.equals(groupName) ? ALLOWED_KEYWORD_ALIASES : Collections.emptySet();
}

public static boolean isQuote(char ch) {
return ch == '"' || ch == '\'' || ch == '`';
}
Expand Down
96 changes: 48 additions & 48 deletions jdbc-v2/src/main/javacc/ClickHouseSqlParser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ public class ClickHouseSqlParser {

private static final Logger log = LoggerFactory.getLogger(ClickHouseSqlParser.class);

private static final Set<String> ALLOWED_ALIAS_KEYWORDS =
ClickHouseSqlUtils.getKeywordGroup(ClickHouseSqlUtils.KEYWORD_GROUP_ALLOWED_ALIASES);

private static boolean isAllowedAlias(Token t) {
return t != null && ALLOWED_ALIAS_KEYWORDS.contains(t.image.toUpperCase(Locale.ROOT));
}

private final List<ClickHouseSqlStatement> statements = new ArrayList<>();

private ParseHandler handler;
Expand Down Expand Up @@ -520,7 +527,12 @@ void deleteStmt(): {} {
// https://clickhouse.tech/docs/en/sql-reference/statements/describe-table/
void describeStmt(): {} {
(<DESCRIBE> | <DESC>) { token_source.table = "columns"; }
(LOOKAHEAD({ getToken(1).kind == TABLE }) <TABLE>)? tableIdentifier(true) (anyExprList())?
(LOOKAHEAD({ getToken(1).kind == TABLE }) <TABLE>)?
(
LOOKAHEAD({ getToken(1).kind == LPAREN }) <LPAREN> anyExprList() <RPAREN>
| tableIdentifier(true)
)
(anyExprList())?
}

// https://clickhouse.tech/docs/en/sql-reference/statements/detach/
Expand Down Expand Up @@ -575,8 +587,13 @@ void grantStmt(): {} { // not interested
void insertStmt(): {} {
<INSERT> <INTO>
(
LOOKAHEAD({ getToken(1).kind == FUNCTION }) <FUNCTION> functionExpr()
| (LOOKAHEAD(2) <TABLE>)? tableIdentifier(true)
LOOKAHEAD({ getToken(1).kind == FUNCTION
&& !tokenIn(2, VALUES, FORMAT, SETTINGS, SELECT, WITH, INFILE)
&& getToken(3).kind == LPAREN }) <FUNCTION> functionExpr()
| (
LOOKAHEAD({ getToken(1).kind == TABLE
&& !tokenIn(2, VALUES, FORMAT, SETTINGS, SELECT, WITH, LPAREN) }) <TABLE>
)? tableIdentifier(true)
)
(
LOOKAHEAD(2) infilePart()
Expand Down Expand Up @@ -678,6 +695,7 @@ void showStmt(): {} {
(<DATABASE> databaseIdentifier(true))
| LOOKAHEAD(2) (LOOKAHEAD(1) <CREATE>)? (LOOKAHEAD(1) <SETTINGS>)? <PROFILE> anyIdentifier()
| LOOKAHEAD(2) (<DICTIONARY> tableIdentifier(true))
| LOOKAHEAD(2) (LOOKAHEAD(1) <CHANGED>)? <SETTINGS> { token_source.table = "settings"; }
| LOOKAHEAD(2) ((LOOKAHEAD(2) <TEMPORARY>)? (LOOKAHEAD(2) <TABLE>)? tableIdentifier(true))
)
)
Expand Down Expand Up @@ -725,12 +743,12 @@ void columnExprList(): {} {

void withExpr(): {} {
nestedExpr()
(LOOKAHEAD(2) <LBRACKET> anyExprList() <RBRACKET>)*
(
(
LOOKAHEAD({ getToken(1).kind == FLOATING_LITERAL })
<FLOATING_LITERAL> | <DOT> <DECIMAL_LITERAL>
)+
| (LOOKAHEAD(2) <LBRACKET> anyExprList() <RBRACKET>)+
| LOOKAHEAD(2) <IS> (<NOT>)? <NULL>
| LOOKAHEAD(2) (<NOT>)? betweenExpr()
| LOOKAHEAD(2) (<NOT>)? (<ILIKE> | <LIKE>) nestedExpr()
Expand All @@ -747,12 +765,12 @@ void columnsExpr(): {} {
LOOKAHEAD(2) (<APPLY> | <EXCEPT> | <REPLACE>) <LPAREN> anyExprList() <RPAREN>
)*
| nestedExpr()
(LOOKAHEAD(2) <LBRACKET> anyExprList() <RBRACKET>)*
(
(
LOOKAHEAD({ getToken(1).kind == FLOATING_LITERAL })
<FLOATING_LITERAL> | <DOT> <DECIMAL_LITERAL>
)+
| (LOOKAHEAD(2) <LBRACKET> anyExprList() <RBRACKET>)+
| LOOKAHEAD(2) <IS> (<NOT>)? <NULL>
| LOOKAHEAD(2) (<NOT>)? betweenExpr()
| LOOKAHEAD(2) (<NOT>)? (<ILIKE> | <LIKE>) nestedExpr()
Expand All @@ -773,11 +791,11 @@ void nestedExpr(): {} {
(<WHEN> nestedExpr() <THEN> nestedExpr())+ (<ELSE> nestedExpr())? <END>
| LOOKAHEAD(2) <INTERVAL> (LOOKAHEAD(2) <STRING_LITERAL> | nestedExpr() interval())
| columnExpr()
(LOOKAHEAD(2) <LBRACKET> anyExprList() <RBRACKET>)*
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent FLOATING_LITERAL LOOKAHEAD across three expression functions

Low Severity

The LBRACKET handling was moved outside the choice block identically in withExpr(), columnsExpr(), and nestedExpr(). However, the LOOKAHEAD({ getToken(1).kind == FLOATING_LITERAL }) guard was added before <FLOATING_LITERAL> in withExpr() and columnsExpr() but omitted in nestedExpr(). All three functions share the same structural pattern for this code block, so the inconsistent application of the fix is surprising and could confuse future maintainers.

Additional Locations (2)

Fix in Cursor Fix in Web

(
(
<FLOATING_LITERAL> | <DOT> <DECIMAL_LITERAL>
)+
| (LOOKAHEAD(2) <LBRACKET> anyExprList() <RBRACKET>)+
| LOOKAHEAD(2) <IS> (<NOT>)? <NULL>
| LOOKAHEAD(2) (<NOT>)? betweenExpr()
| LOOKAHEAD(2) (<NOT>)? (<ILIKE> | <LIKE>) nestedExpr()
Expand Down Expand Up @@ -843,7 +861,7 @@ void outfilePart(): {} {
}

void settingsPart(): {} {
<SETTINGS> { token_source.addPosition(token); } settingExprList()
(LOOKAHEAD(1) <CHANGED>)? <SETTINGS> { token_source.addPosition(token); } (LOOKAHEAD(2) settingExprList())?
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional settingExprList silently consumes bare SETTINGS keyword everywhere

Medium Severity

Making settingExprList() optional in settingsPart() enables SHOW CHANGED SETTINGS but also changes behavior in all other call sites (aliasExpr, anyNestedExpr, infilePart, insertStmt). Previously, a bare SETTINGS without key=value pairs would produce a parse error, acting as a guard against incorrect consumption. Now SETTINGS is silently consumed everywhere, even in contexts like INSERT INTO t SETTINGS VALUES (1) where the old parser would correctly error and fall back.

Fix in Cursor Fix in Web

}

void withTotalPart(): {} {
Expand Down Expand Up @@ -888,13 +906,25 @@ void anyColumnExpr(): {} {
| nestedIdentifier()
}

Token aliasIdentifier(): { Token t; } {
(
t = <IDENTIFIER>
| t = <BACK_QUOTED_NAME>
| t = <DOUBLE_QUOTED_NAME>
| t = variable()
| LOOKAHEAD({ isAllowedAlias(getToken(1)) })
t = anyKeyword()
)
{ return t; }
}

Token aliasExpr(): { Token t = null; } {
(
LOOKAHEAD(2) <AS> t = anyIdentifier()
LOOKAHEAD(2) <AS> t = aliasIdentifier()
| LOOKAHEAD(2) formatPart()
| LOOKAHEAD(2) settingsPart()
| LOOKAHEAD(2) outfilePart()
| t = identifier()
| t = aliasIdentifier()
)
{ return t; }
}
Expand Down Expand Up @@ -950,10 +980,12 @@ void settingExprList(): {} {
}

void settingExpr(): { String key; } {
identifier() { key = token.image; } <EQ_SINGLE> literal() { token_source.addSetting(key, token.image); }
anyIdentifier() { key = token.image; } <EQ_SINGLE> literal() { token_source.addSetting(key, token.image); }
}

// basics
// --- Base definitions


Token anyIdentifier(): { Token t; } {
(
t = <BACK_QUOTED_NAME>
Expand All @@ -965,17 +997,6 @@ Token anyIdentifier(): { Token t; } {
{ return t; }
}

Token identifier(): { Token t; } {
(
t = <BACK_QUOTED_NAME>
| t = <DOUBLE_QUOTED_NAME>
| t = variable()
| t = <IDENTIFIER>
| t = keyword()
)
{ return t; }
}

void interval(): {} {
<SECOND> | <MINUTE> | <HOUR> | <DAY> | <WEEK> | <MONTH> | <QUARTER> | <YEAR>
}
Expand Down Expand Up @@ -1045,15 +1066,15 @@ Token anyKeyword(): { Token t; } {
| t = <MOVE> | t = <OPTIMIZE> | t = <RENAME> | t = <REVOKE> | t = <SELECT> | t = <SET> | t = <SHOW> | t = <SYSTEM>
| t = <TRUNCATE> | t = <UPDATE> | t = <USE> | t = <WATCH> | t = <UNDROP>
// others
| t = <ALL> | t = <AND> | t = <APPLY> | t = <ARRAY> | t = <AS> | t = <ASOF> | t = <BETWEEN> | t = <CASE>
| t = <CLUSTER> | t = <COMPRESSION> | t = <DATE> | t = <DATABASE> | t = <DATABASES> | t = <DICTIONARY> | t = <DICTIONARIES>
| t = <ALL> | t = <AND> | t = <APPLY> | t = <ARRAY> | t = <AS> | t = <ASOF> | t = <BEGIN> | t = <BETWEEN> | t = <CASE> | t = <CHANGED>
| t = <CLUSTER> | t = <COMMIT> | t = <COMPRESSION> | t = <DATE> | t = <DATABASE> | t = <DATABASES> | t = <DICTIONARY> | t = <DICTIONARIES>
| t = <DISTINCT> | t = <ELSE> | t = <END> | t = <EXCEPT> | t = <FORMAT> | t = <FROM> | t = <FINAL> | t = <FULL>
| t = <FUNCTION> | t = <GLOBAL> | t = <GROUP> | t = <HAVING> | t = <IF> | t = <ILIKE> | t = <IN> | t = <INFILE> | t = <INNER>
| t = <INPUT> | t = <INTERVAL> | t = <INTO> | t = <IS> | t = <LEVEL> | t = <JOIN> | t = <LEFT> | t = <LIKE> | t = <LIMIT> | t = <LIVE>
| t = <MATERIALIZED> | t = <NOT> | t = <OFFSET> | t = <ON> | t = <OR> | t = <ORDER> | t = <OUTFILE> | t = <POLICY>
| t = <PREWHERE> | t = <PROFILE> | t = <QUOTA> | t = <REPLACE> | t = <RIGHT> | t = <ROLE> | t = <ROW> | t = <SAMPLE>
| t = <PREWHERE> | t = <PROFILE> | t = <QUOTA> | t = <REPLACE> | t = <RIGHT> | t = <ROLE> | t = <ROLLBACK> | t = <ROW> | t = <SAMPLE>
| t = <SETTINGS> | t = <STDOUT> | t = <TEMPORARY> | t = <TABLE> | t = <TABLES> | t = <THEN> | t = <TIES> | t = <TIMESTAMP>
| t = <TOP> | t = <TOTALS> | t = <VALUES> | t = <VIEW> | t = <USER> | t = <UNION> | t = <USING>
| t = <TOP> | t = <TOTALS> | t = <TRANSACTION> | t = <VALUES> | t = <VIEW> | t = <USER> | t = <UNION> | t = <USING>
| t = <WHEN> | t = <WHERE> | t = <WITH> | t = <REGEXP>
// interval
| t = <SECOND> | t = <MINUTE> | t = <HOUR> | t = <DAY> | t = <WEEK> | t = <MONTH> | t = <QUARTER> | t = <YEAR>
Expand All @@ -1063,32 +1084,11 @@ Token anyKeyword(): { Token t; } {
{ return t; }
}

Token keyword(): { Token t; } {
(
// leading keywords(except with)
t = <ALTER> | t = <ATTACH> | t = <CHECK> | t = <CREATE> | t = <DELETE> | t = <DESC> | t = <DESCRIBE>
| t = <DETACH> | t = <DROP> | t = <EXCHANGE> | t = <EXISTS> | t = <EXPLAIN> | t = <GRANT> | t = <INSERT> | t = <KILL>
| t = <MOVE> | t = <OPTIMIZE> | t = <RENAME> | t = <REVOKE> | t = <SELECT> | t = <SET> | t = <SHOW> | t = <SYSTEM>
| t = <TRUNCATE> | t = <UPDATE> | t = <USE> | t = <WATCH> | t = <UNDROP>
// others
| t = <CASE> | t = <CLUSTER> | t = <DATE> | t = <DATABASE> | t = <DATABASES> | t = <DICTIONARY>
| t = <DICTIONARIES> | t = <DISTINCT> | t = <ELSE> | t = <END> | t = <EXCEPT>| t = <FUNCTION>
| t = <IF> | t = <INTERVAL> | t = <IS> | t = <INFILE> | t = <LIVE> | t = <MATERIALIZED> | t = <OUTFILE> | t = <POLICY>
| t = <PROFILE> | t = <QUOTA> | t = <REPLACE> | t = <ROLE> | t = <ROW> | t = <TEMPORARY>
| t = <TABLE> | t = <TABLES> | t = <THEN> | t = <TIES> | t = <TIMESTAMP> | t = <TOP> | t = <TOTALS>
| t = <VALUES> | t = <VIEW> | t = <WHEN> | t = <USER> | t = <REGEXP>
// interval
| t = <SECOND> | t = <MINUTE> | t = <HOUR> | t = <DAY> | t = <WEEK> | t = <MONTH> | t = <QUARTER> | t = <YEAR>
// values
| t = <INF> | t = <NAN> | t = <NULL>
)
{ return t; }
}

// keywords
TOKEN: {
<ALTER : <A> <L> <T> <E> <R>>
| <ATTACH : <A> <T> <T> <A> <C> <H> >
| <CHANGED : <C> <H> <A> <N> <G> <E> <D> >
| <CHECK : <C> <H> <E> <C> <K> >
| <CREATE : <C> <R> <E> <A> <T> <E> >
| <DELETE : <D> <E> <L> <E> <T> <E> >
Expand Down
Loading