sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 arg_max_or_min_no_count, 10 build_date_delta, 11 build_formatted_time, 12 inline_array_sql, 13 json_extract_segments, 14 json_path_key_only_name, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 var_map_sql, 20 timestamptrunc_sql, 21 unit_to_var, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26 27DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 28 29 30def _build_date_format(args: t.List) -> exp.TimeToStr: 31 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 32 33 timezone = seq_get(args, 2) 34 if timezone: 35 expr.set("timezone", timezone) 36 37 return expr 38 39 40def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 41 scale = expression.args.get("scale") 42 timestamp = expression.this 43 44 if scale in (None, exp.UnixToTime.SECONDS): 45 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 46 if scale == exp.UnixToTime.MILLIS: 47 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MICROS: 49 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.NANOS: 51 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 53 return self.func( 54 "fromUnixTimestamp", 55 exp.cast( 56 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 57 ), 58 ) 59 60 61def _lower_func(sql: str) -> str: 62 index = sql.index("(") 63 return sql[:index].lower() + sql[index:] 64 65 66def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 67 quantile = expression.args["quantile"] 68 args = f"({self.sql(expression, 'this')})" 69 70 if isinstance(quantile, exp.Array): 71 func = self.func("quantiles", *quantile) 72 else: 73 func = self.func("quantile", quantile) 74 75 return func + args 76 77 78def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 79 if len(args) == 1: 80 return exp.CountIf(this=seq_get(args, 0)) 81 82 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 83 84 85def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 86 if len(args) == 3: 87 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 88 89 strtodate = exp.StrToDate.from_arg_list(args) 90 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 91 92 93def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 94 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 95 if not expression.unit: 96 return rename_func(name)(self, expression) 97 98 return self.func( 99 name, 100 unit_to_var(expression), 101 expression.expression, 102 expression.this, 103 ) 104 105 return _delta_sql 106 107 108class ClickHouse(Dialect): 109 NORMALIZE_FUNCTIONS: bool | str = False 110 NULL_ORDERING = "nulls_are_last" 111 SUPPORTS_USER_DEFINED_TYPES = False 112 SAFE_DIVISION = True 113 LOG_BASE_FIRST: t.Optional[bool] = None 114 FORCE_EARLY_ALIAS_REF_EXPANSION = True 115 116 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 117 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 118 119 UNESCAPED_SEQUENCES = { 120 "\\0": "\0", 121 } 122 123 class Tokenizer(tokens.Tokenizer): 124 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 125 IDENTIFIERS = ['"', "`"] 126 STRING_ESCAPES = ["'", "\\"] 127 BIT_STRINGS = [("0b", "")] 128 HEX_STRINGS = [("0x", ""), ("0X", "")] 129 HEREDOC_STRINGS = ["$"] 130 131 KEYWORDS = { 132 **tokens.Tokenizer.KEYWORDS, 133 "ATTACH": TokenType.COMMAND, 134 "DATE32": TokenType.DATE32, 135 "DATETIME64": TokenType.DATETIME64, 136 "DICTIONARY": TokenType.DICTIONARY, 137 "ENUM8": TokenType.ENUM8, 138 "ENUM16": TokenType.ENUM16, 139 "FINAL": TokenType.FINAL, 140 "FIXEDSTRING": TokenType.FIXEDSTRING, 141 "FLOAT32": TokenType.FLOAT, 142 "FLOAT64": TokenType.DOUBLE, 143 "GLOBAL": TokenType.GLOBAL, 144 "INT256": TokenType.INT256, 145 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 146 "MAP": TokenType.MAP, 147 "NESTED": TokenType.NESTED, 148 "SAMPLE": TokenType.TABLE_SAMPLE, 149 "TUPLE": TokenType.STRUCT, 150 "UINT128": TokenType.UINT128, 151 "UINT16": TokenType.USMALLINT, 152 "UINT256": TokenType.UINT256, 153 "UINT32": TokenType.UINT, 154 "UINT64": TokenType.UBIGINT, 155 "UINT8": TokenType.UTINYINT, 156 "IPV4": TokenType.IPV4, 157 "IPV6": TokenType.IPV6, 158 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 159 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 160 "SYSTEM": TokenType.COMMAND, 161 "PREWHERE": TokenType.PREWHERE, 162 } 163 KEYWORDS.pop("/*+") 164 165 SINGLE_TOKENS = { 166 **tokens.Tokenizer.SINGLE_TOKENS, 167 "$": TokenType.HEREDOC_STRING, 168 } 169 170 class Parser(parser.Parser): 171 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 172 # * select x from t1 union all select x from t2 limit 1; 173 # * select x from t1 union all (select x from t2 limit 1); 174 MODIFIERS_ATTACHED_TO_SET_OP = False 175 INTERVAL_SPANS = False 176 177 FUNCTIONS = { 178 **parser.Parser.FUNCTIONS, 179 "ANY": exp.AnyValue.from_arg_list, 180 "ARRAYSUM": exp.ArraySum.from_arg_list, 181 "COUNTIF": _build_count_if, 182 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 183 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 184 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 185 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 186 "DATE_FORMAT": _build_date_format, 187 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 188 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 189 "FORMATDATETIME": _build_date_format, 190 "JSONEXTRACTSTRING": build_json_extract_path( 191 exp.JSONExtractScalar, zero_based_indexing=False 192 ), 193 "MAP": parser.build_var_map, 194 "MATCH": exp.RegexpLike.from_arg_list, 195 "RANDCANONICAL": exp.Rand.from_arg_list, 196 "STR_TO_DATE": _build_str_to_date, 197 "TUPLE": exp.Struct.from_arg_list, 198 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 199 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 200 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 201 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 202 "UNIQ": exp.ApproxDistinct.from_arg_list, 203 "XOR": lambda args: exp.Xor(expressions=args), 204 "MD5": exp.MD5Digest.from_arg_list, 205 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 206 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 207 } 208 209 AGG_FUNCTIONS = { 210 "count", 211 "min", 212 "max", 213 "sum", 214 "avg", 215 "any", 216 "stddevPop", 217 "stddevSamp", 218 "varPop", 219 "varSamp", 220 "corr", 221 "covarPop", 222 "covarSamp", 223 "entropy", 224 "exponentialMovingAverage", 225 "intervalLengthSum", 226 "kolmogorovSmirnovTest", 227 "mannWhitneyUTest", 228 "median", 229 "rankCorr", 230 "sumKahan", 231 "studentTTest", 232 "welchTTest", 233 "anyHeavy", 234 "anyLast", 235 "boundingRatio", 236 "first_value", 237 "last_value", 238 "argMin", 239 "argMax", 240 "avgWeighted", 241 "topK", 242 "topKWeighted", 243 "deltaSum", 244 "deltaSumTimestamp", 245 "groupArray", 246 "groupArrayLast", 247 "groupUniqArray", 248 "groupArrayInsertAt", 249 "groupArrayMovingAvg", 250 "groupArrayMovingSum", 251 "groupArraySample", 252 "groupBitAnd", 253 "groupBitOr", 254 "groupBitXor", 255 "groupBitmap", 256 "groupBitmapAnd", 257 "groupBitmapOr", 258 "groupBitmapXor", 259 "sumWithOverflow", 260 "sumMap", 261 "minMap", 262 "maxMap", 263 "skewSamp", 264 "skewPop", 265 "kurtSamp", 266 "kurtPop", 267 "uniq", 268 "uniqExact", 269 "uniqCombined", 270 "uniqCombined64", 271 "uniqHLL12", 272 "uniqTheta", 273 "quantile", 274 "quantiles", 275 "quantileExact", 276 "quantilesExact", 277 "quantileExactLow", 278 "quantilesExactLow", 279 "quantileExactHigh", 280 "quantilesExactHigh", 281 "quantileExactWeighted", 282 "quantilesExactWeighted", 283 "quantileTiming", 284 "quantilesTiming", 285 "quantileTimingWeighted", 286 "quantilesTimingWeighted", 287 "quantileDeterministic", 288 "quantilesDeterministic", 289 "quantileTDigest", 290 "quantilesTDigest", 291 "quantileTDigestWeighted", 292 "quantilesTDigestWeighted", 293 "quantileBFloat16", 294 "quantilesBFloat16", 295 "quantileBFloat16Weighted", 296 "quantilesBFloat16Weighted", 297 "simpleLinearRegression", 298 "stochasticLinearRegression", 299 "stochasticLogisticRegression", 300 "categoricalInformationValue", 301 "contingency", 302 "cramersV", 303 "cramersVBiasCorrected", 304 "theilsU", 305 "maxIntersections", 306 "maxIntersectionsPosition", 307 "meanZTest", 308 "quantileInterpolatedWeighted", 309 "quantilesInterpolatedWeighted", 310 "quantileGK", 311 "quantilesGK", 312 "sparkBar", 313 "sumCount", 314 "largestTriangleThreeBuckets", 315 "histogram", 316 "sequenceMatch", 317 "sequenceCount", 318 "windowFunnel", 319 "retention", 320 "uniqUpTo", 321 "sequenceNextNode", 322 "exponentialTimeDecayedAvg", 323 } 324 325 AGG_FUNCTIONS_SUFFIXES = [ 326 "If", 327 "Array", 328 "ArrayIf", 329 "Map", 330 "SimpleState", 331 "State", 332 "Merge", 333 "MergeState", 334 "ForEach", 335 "Distinct", 336 "OrDefault", 337 "OrNull", 338 "Resample", 339 "ArgMin", 340 "ArgMax", 341 ] 342 343 FUNC_TOKENS = { 344 *parser.Parser.FUNC_TOKENS, 345 TokenType.SET, 346 } 347 348 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 349 350 ID_VAR_TOKENS = { 351 *parser.Parser.ID_VAR_TOKENS, 352 TokenType.LIKE, 353 } 354 355 AGG_FUNC_MAPPING = ( 356 lambda functions, suffixes: { 357 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 358 } 359 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 360 361 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 362 363 FUNCTION_PARSERS = { 364 **parser.Parser.FUNCTION_PARSERS, 365 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 366 "QUANTILE": lambda self: self._parse_quantile(), 367 } 368 369 FUNCTION_PARSERS.pop("MATCH") 370 371 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 372 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 373 374 RANGE_PARSERS = { 375 **parser.Parser.RANGE_PARSERS, 376 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 377 and self._parse_in(this, is_global=True), 378 } 379 380 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 381 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 382 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 383 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 384 385 JOIN_KINDS = { 386 *parser.Parser.JOIN_KINDS, 387 TokenType.ANY, 388 TokenType.ASOF, 389 TokenType.ARRAY, 390 } 391 392 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 393 TokenType.ANY, 394 TokenType.ARRAY, 395 TokenType.FINAL, 396 TokenType.FORMAT, 397 TokenType.SETTINGS, 398 } 399 400 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 401 TokenType.FORMAT, 402 } 403 404 LOG_DEFAULTS_TO_LN = True 405 406 QUERY_MODIFIER_PARSERS = { 407 **parser.Parser.QUERY_MODIFIER_PARSERS, 408 TokenType.SETTINGS: lambda self: ( 409 "settings", 410 self._advance() or self._parse_csv(self._parse_assignment), 411 ), 412 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 413 } 414 415 CONSTRAINT_PARSERS = { 416 **parser.Parser.CONSTRAINT_PARSERS, 417 "INDEX": lambda self: self._parse_index_constraint(), 418 "CODEC": lambda self: self._parse_compress(), 419 } 420 421 ALTER_PARSERS = { 422 **parser.Parser.ALTER_PARSERS, 423 "REPLACE": lambda self: self._parse_alter_table_replace(), 424 } 425 426 SCHEMA_UNNAMED_CONSTRAINTS = { 427 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 428 "INDEX", 429 } 430 431 def _parse_create(self) -> exp.Create | exp.Command: 432 create = super()._parse_create() 433 434 # DATABASE in ClickHouse is the same as SCHEMA in other dialects 435 if isinstance(create, exp.Create) and create.kind == "DATABASE": 436 create.set("kind", "SCHEMA") 437 438 return create 439 440 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 441 index = self._index 442 this = self._parse_bitwise() 443 if self._match(TokenType.FROM): 444 self._retreat(index) 445 return super()._parse_extract() 446 447 # We return Anonymous here because extract and regexpExtract have different semantics, 448 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 449 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 450 # 451 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 452 self._match(TokenType.COMMA) 453 return self.expression( 454 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 455 ) 456 457 def _parse_assignment(self) -> t.Optional[exp.Expression]: 458 this = super()._parse_assignment() 459 460 if self._match(TokenType.PLACEHOLDER): 461 return self.expression( 462 exp.If, 463 this=this, 464 true=self._parse_assignment(), 465 false=self._match(TokenType.COLON) and self._parse_assignment(), 466 ) 467 468 return this 469 470 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 471 """ 472 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 473 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 474 """ 475 if not self._match(TokenType.L_BRACE): 476 return None 477 478 this = self._parse_id_var() 479 self._match(TokenType.COLON) 480 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 481 self._match_text_seq("IDENTIFIER") and "Identifier" 482 ) 483 484 if not kind: 485 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 486 elif not self._match(TokenType.R_BRACE): 487 self.raise_error("Expecting }") 488 489 return self.expression(exp.Placeholder, this=this, kind=kind) 490 491 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 492 this = super()._parse_in(this) 493 this.set("is_global", is_global) 494 return this 495 496 def _parse_table( 497 self, 498 schema: bool = False, 499 joins: bool = False, 500 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 501 parse_bracket: bool = False, 502 is_db_reference: bool = False, 503 parse_partition: bool = False, 504 ) -> t.Optional[exp.Expression]: 505 this = super()._parse_table( 506 schema=schema, 507 joins=joins, 508 alias_tokens=alias_tokens, 509 parse_bracket=parse_bracket, 510 is_db_reference=is_db_reference, 511 ) 512 513 if self._match(TokenType.FINAL): 514 this = self.expression(exp.Final, this=this) 515 516 return this 517 518 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 519 return super()._parse_position(haystack_first=True) 520 521 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 522 def _parse_cte(self) -> exp.CTE: 523 # WITH <identifier> AS <subquery expression> 524 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 525 526 if not cte: 527 # WITH <expression> AS <identifier> 528 cte = self.expression( 529 exp.CTE, 530 this=self._parse_assignment(), 531 alias=self._parse_table_alias(), 532 scalar=True, 533 ) 534 535 return cte 536 537 def _parse_join_parts( 538 self, 539 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 540 is_global = self._match(TokenType.GLOBAL) and self._prev 541 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 542 543 if kind_pre: 544 kind = self._match_set(self.JOIN_KINDS) and self._prev 545 side = self._match_set(self.JOIN_SIDES) and self._prev 546 return is_global, side, kind 547 548 return ( 549 is_global, 550 self._match_set(self.JOIN_SIDES) and self._prev, 551 self._match_set(self.JOIN_KINDS) and self._prev, 552 ) 553 554 def _parse_join( 555 self, skip_join_token: bool = False, parse_bracket: bool = False 556 ) -> t.Optional[exp.Join]: 557 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 558 if join: 559 join.set("global", join.args.pop("method", None)) 560 561 return join 562 563 def _parse_function( 564 self, 565 functions: t.Optional[t.Dict[str, t.Callable]] = None, 566 anonymous: bool = False, 567 optional_parens: bool = True, 568 any_token: bool = False, 569 ) -> t.Optional[exp.Expression]: 570 expr = super()._parse_function( 571 functions=functions, 572 anonymous=anonymous, 573 optional_parens=optional_parens, 574 any_token=any_token, 575 ) 576 577 func = expr.this if isinstance(expr, exp.Window) else expr 578 579 # Aggregate functions can be split in 2 parts: <func_name><suffix> 580 parts = ( 581 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 582 ) 583 584 if parts: 585 params = self._parse_func_params(func) 586 587 kwargs = { 588 "this": func.this, 589 "expressions": func.expressions, 590 } 591 if parts[1]: 592 kwargs["parts"] = parts 593 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 594 else: 595 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 596 597 kwargs["exp_class"] = exp_class 598 if params: 599 kwargs["params"] = params 600 601 func = self.expression(**kwargs) 602 603 if isinstance(expr, exp.Window): 604 # The window's func was parsed as Anonymous in base parser, fix its 605 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 606 expr.set("this", func) 607 elif params: 608 # Params have blocked super()._parse_function() from parsing the following window 609 # (if that exists) as they're standing between the function call and the window spec 610 expr = self._parse_window(func) 611 else: 612 expr = func 613 614 return expr 615 616 def _parse_func_params( 617 self, this: t.Optional[exp.Func] = None 618 ) -> t.Optional[t.List[exp.Expression]]: 619 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 620 return self._parse_csv(self._parse_lambda) 621 622 if self._match(TokenType.L_PAREN): 623 params = self._parse_csv(self._parse_lambda) 624 self._match_r_paren(this) 625 return params 626 627 return None 628 629 def _parse_quantile(self) -> exp.Quantile: 630 this = self._parse_lambda() 631 params = self._parse_func_params() 632 if params: 633 return self.expression(exp.Quantile, this=params[0], quantile=this) 634 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 635 636 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 637 return super()._parse_wrapped_id_vars(optional=True) 638 639 def _parse_primary_key( 640 self, wrapped_optional: bool = False, in_props: bool = False 641 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 642 return super()._parse_primary_key( 643 wrapped_optional=wrapped_optional or in_props, in_props=in_props 644 ) 645 646 def _parse_on_property(self) -> t.Optional[exp.Expression]: 647 index = self._index 648 if self._match_text_seq("CLUSTER"): 649 this = self._parse_id_var() 650 if this: 651 return self.expression(exp.OnCluster, this=this) 652 else: 653 self._retreat(index) 654 return None 655 656 def _parse_index_constraint( 657 self, kind: t.Optional[str] = None 658 ) -> exp.IndexColumnConstraint: 659 # INDEX name1 expr TYPE type1(args) GRANULARITY value 660 this = self._parse_id_var() 661 expression = self._parse_assignment() 662 663 index_type = self._match_text_seq("TYPE") and ( 664 self._parse_function() or self._parse_var() 665 ) 666 667 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 668 669 return self.expression( 670 exp.IndexColumnConstraint, 671 this=this, 672 expression=expression, 673 index_type=index_type, 674 granularity=granularity, 675 ) 676 677 def _parse_partition(self) -> t.Optional[exp.Partition]: 678 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 679 if not self._match(TokenType.PARTITION): 680 return None 681 682 if self._match_text_seq("ID"): 683 # Corresponds to the PARTITION ID <string_value> syntax 684 expressions: t.List[exp.Expression] = [ 685 self.expression(exp.PartitionId, this=self._parse_string()) 686 ] 687 else: 688 expressions = self._parse_expressions() 689 690 return self.expression(exp.Partition, expressions=expressions) 691 692 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 693 partition = self._parse_partition() 694 695 if not partition or not self._match(TokenType.FROM): 696 return None 697 698 return self.expression( 699 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 700 ) 701 702 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 703 if not self._match_text_seq("PROJECTION"): 704 return None 705 706 return self.expression( 707 exp.ProjectionDef, 708 this=self._parse_id_var(), 709 expression=self._parse_wrapped(self._parse_statement), 710 ) 711 712 def _parse_constraint(self) -> t.Optional[exp.Expression]: 713 return super()._parse_constraint() or self._parse_projection_def() 714 715 class Generator(generator.Generator): 716 QUERY_HINTS = False 717 STRUCT_DELIMITER = ("(", ")") 718 NVL2_SUPPORTED = False 719 TABLESAMPLE_REQUIRES_PARENS = False 720 TABLESAMPLE_SIZE_IS_ROWS = False 721 TABLESAMPLE_KEYWORDS = "SAMPLE" 722 LAST_DAY_SUPPORTS_DATE_PART = False 723 CAN_IMPLEMENT_ARRAY_ANY = True 724 SUPPORTS_TO_NUMBER = False 725 JOIN_HINTS = False 726 TABLE_HINTS = False 727 EXPLICIT_SET_OP = True 728 GROUPINGS_SEP = "" 729 SET_OP_MODIFIERS = False 730 SUPPORTS_TABLE_ALIAS_COLUMNS = False 731 VALUES_AS_TABLE = False 732 733 STRING_TYPE_MAPPING = { 734 exp.DataType.Type.CHAR: "String", 735 exp.DataType.Type.LONGBLOB: "String", 736 exp.DataType.Type.LONGTEXT: "String", 737 exp.DataType.Type.MEDIUMBLOB: "String", 738 exp.DataType.Type.MEDIUMTEXT: "String", 739 exp.DataType.Type.TINYBLOB: "String", 740 exp.DataType.Type.TINYTEXT: "String", 741 exp.DataType.Type.TEXT: "String", 742 exp.DataType.Type.VARBINARY: "String", 743 exp.DataType.Type.VARCHAR: "String", 744 } 745 746 SUPPORTED_JSON_PATH_PARTS = { 747 exp.JSONPathKey, 748 exp.JSONPathRoot, 749 exp.JSONPathSubscript, 750 } 751 752 TYPE_MAPPING = { 753 **generator.Generator.TYPE_MAPPING, 754 **STRING_TYPE_MAPPING, 755 exp.DataType.Type.ARRAY: "Array", 756 exp.DataType.Type.BIGINT: "Int64", 757 exp.DataType.Type.DATE32: "Date32", 758 exp.DataType.Type.DATETIME64: "DateTime64", 759 exp.DataType.Type.DOUBLE: "Float64", 760 exp.DataType.Type.ENUM: "Enum", 761 exp.DataType.Type.ENUM8: "Enum8", 762 exp.DataType.Type.ENUM16: "Enum16", 763 exp.DataType.Type.FIXEDSTRING: "FixedString", 764 exp.DataType.Type.FLOAT: "Float32", 765 exp.DataType.Type.INT: "Int32", 766 exp.DataType.Type.MEDIUMINT: "Int32", 767 exp.DataType.Type.INT128: "Int128", 768 exp.DataType.Type.INT256: "Int256", 769 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 770 exp.DataType.Type.MAP: "Map", 771 exp.DataType.Type.NESTED: "Nested", 772 exp.DataType.Type.NULLABLE: "Nullable", 773 exp.DataType.Type.SMALLINT: "Int16", 774 exp.DataType.Type.STRUCT: "Tuple", 775 exp.DataType.Type.TINYINT: "Int8", 776 exp.DataType.Type.UBIGINT: "UInt64", 777 exp.DataType.Type.UINT: "UInt32", 778 exp.DataType.Type.UINT128: "UInt128", 779 exp.DataType.Type.UINT256: "UInt256", 780 exp.DataType.Type.USMALLINT: "UInt16", 781 exp.DataType.Type.UTINYINT: "UInt8", 782 exp.DataType.Type.IPV4: "IPv4", 783 exp.DataType.Type.IPV6: "IPv6", 784 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 785 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 786 } 787 788 TRANSFORMS = { 789 **generator.Generator.TRANSFORMS, 790 exp.AnyValue: rename_func("any"), 791 exp.ApproxDistinct: rename_func("uniq"), 792 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 793 exp.ArraySize: rename_func("LENGTH"), 794 exp.ArraySum: rename_func("arraySum"), 795 exp.ArgMax: arg_max_or_min_no_count("argMax"), 796 exp.ArgMin: arg_max_or_min_no_count("argMin"), 797 exp.Array: inline_array_sql, 798 exp.CastToStrType: rename_func("CAST"), 799 exp.CountIf: rename_func("countIf"), 800 exp.CompressColumnConstraint: lambda self, 801 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 802 exp.ComputedColumnConstraint: lambda self, 803 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 804 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 805 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 806 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 807 exp.DateStrToDate: rename_func("toDate"), 808 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 809 exp.Explode: rename_func("arrayJoin"), 810 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 811 exp.IsNan: rename_func("isNaN"), 812 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 813 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 814 exp.JSONPathKey: json_path_key_only_name, 815 exp.JSONPathRoot: lambda *_: "", 816 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 817 exp.Nullif: rename_func("nullIf"), 818 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 819 exp.Pivot: no_pivot_sql, 820 exp.Quantile: _quantile_sql, 821 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 822 exp.Rand: rename_func("randCanonical"), 823 exp.StartsWith: rename_func("startsWith"), 824 exp.StrPosition: lambda self, e: self.func( 825 "position", e.this, e.args.get("substr"), e.args.get("position") 826 ), 827 exp.TimeToStr: lambda self, e: self.func( 828 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 829 ), 830 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 831 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 832 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 833 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 834 exp.MD5Digest: rename_func("MD5"), 835 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 836 exp.SHA: rename_func("SHA1"), 837 exp.SHA2: sha256_sql, 838 exp.UnixToTime: _unix_to_time_sql, 839 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 840 exp.Variance: rename_func("varSamp"), 841 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 842 exp.Stddev: rename_func("stddevSamp"), 843 } 844 845 PROPERTIES_LOCATION = { 846 **generator.Generator.PROPERTIES_LOCATION, 847 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 848 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 849 exp.OnCluster: exp.Properties.Location.POST_NAME, 850 } 851 852 # there's no list in docs, but it can be found in Clickhouse code 853 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 854 ON_CLUSTER_TARGETS = { 855 "DATABASE", 856 "TABLE", 857 "VIEW", 858 "DICTIONARY", 859 "INDEX", 860 "FUNCTION", 861 "NAMED COLLECTION", 862 } 863 864 # https://github.com/ClickHouse/ClickHouse/blob/275de04b8f6bb8c9334bf8070001afe2dab0b17d/src/Functions/FunctionsConversion.cpp#L2939-L2989 865 TRY_CAST_TYPES = { 866 "DATE", 867 "DATE32", 868 "DATETIME", 869 "DATETIME64", 870 "DECIMAL32", 871 "DECIMAL64", 872 "DECIMAL128", 873 "DECIMAL256", 874 "FLOAT32", 875 "FLOAT64", 876 "INT8", 877 "INT16", 878 "INT32", 879 "INT64", 880 "INT128", 881 "INT256", 882 "IPV4", 883 "IPV6", 884 "UINT8", 885 "UINT16", 886 "UINT32", 887 "UINT64", 888 "UINT128", 889 "UINT256", 890 "UUID", 891 } 892 893 def strtodate_sql(self, expression: exp.StrToDate) -> str: 894 strtodate_sql = self.function_fallback_sql(expression) 895 896 if not isinstance(expression.parent, exp.Cast): 897 # StrToDate returns DATEs in other dialects (eg. postgres), so 898 # this branch aims to improve the transpilation to clickhouse 899 return f"CAST({strtodate_sql} AS DATE)" 900 901 return strtodate_sql 902 903 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 904 this = expression.this 905 906 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 907 return self.sql(this) 908 909 return super().cast_sql(expression, safe_prefix=safe_prefix) 910 911 def trycast_sql(self, expression: exp.TryCast) -> str: 912 target_type = self.sql(expression.to) 913 if target_type.upper() in self.TRY_CAST_TYPES: 914 return self.func(f"to{target_type}OrNull", expression.this) 915 916 self.unsupported(f"There is no `to<Type>OrNull` for type {target_type}.") 917 return super().cast_sql(expression) 918 919 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 920 this = self.json_path_part(expression.this) 921 return str(int(this) + 1) if is_int(this) else this 922 923 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 924 return f"AS {self.sql(expression, 'this')}" 925 926 def _any_to_has( 927 self, 928 expression: exp.EQ | exp.NEQ, 929 default: t.Callable[[t.Any], str], 930 prefix: str = "", 931 ) -> str: 932 if isinstance(expression.left, exp.Any): 933 arr = expression.left 934 this = expression.right 935 elif isinstance(expression.right, exp.Any): 936 arr = expression.right 937 this = expression.left 938 else: 939 return default(expression) 940 941 return prefix + self.func("has", arr.this.unnest(), this) 942 943 def eq_sql(self, expression: exp.EQ) -> str: 944 return self._any_to_has(expression, super().eq_sql) 945 946 def neq_sql(self, expression: exp.NEQ) -> str: 947 return self._any_to_has(expression, super().neq_sql, "NOT ") 948 949 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 950 # Manually add a flag to make the search case-insensitive 951 regex = self.func("CONCAT", "'(?i)'", expression.expression) 952 return self.func("match", expression.this, regex) 953 954 def datatype_sql(self, expression: exp.DataType) -> str: 955 # String is the standard ClickHouse type, every other variant is just an alias. 956 # Additionally, any supplied length parameter will be ignored. 957 # 958 # https://clickhouse.com/docs/en/sql-reference/data-types/string 959 if expression.this in self.STRING_TYPE_MAPPING: 960 return "String" 961 962 return super().datatype_sql(expression) 963 964 def cte_sql(self, expression: exp.CTE) -> str: 965 if expression.args.get("scalar"): 966 this = self.sql(expression, "this") 967 alias = self.sql(expression, "alias") 968 return f"{this} AS {alias}" 969 970 return super().cte_sql(expression) 971 972 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 973 return super().after_limit_modifiers(expression) + [ 974 ( 975 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 976 if expression.args.get("settings") 977 else "" 978 ), 979 ( 980 self.seg("FORMAT ") + self.sql(expression, "format") 981 if expression.args.get("format") 982 else "" 983 ), 984 ] 985 986 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 987 params = self.expressions(expression, key="params", flat=True) 988 return self.func(expression.name, *expression.expressions) + f"({params})" 989 990 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 991 return self.func(expression.name, *expression.expressions) 992 993 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 994 return self.anonymousaggfunc_sql(expression) 995 996 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 997 return self.parameterizedagg_sql(expression) 998 999 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1000 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1001 1002 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1003 return f"ON CLUSTER {self.sql(expression, 'this')}" 1004 1005 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1006 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1007 exp.Properties.Location.POST_NAME 1008 ): 1009 this_name = self.sql( 1010 expression.this if isinstance(expression.this, exp.Schema) else expression, 1011 "this", 1012 ) 1013 this_properties = " ".join( 1014 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1015 ) 1016 this_schema = self.schema_columns_sql(expression.this) 1017 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1018 1019 return super().createable_sql(expression, locations) 1020 1021 def create_sql(self, expression: exp.Create) -> str: 1022 # The comment property comes last in CTAS statements, i.e. after the query 1023 query = expression.expression 1024 if isinstance(query, exp.Query): 1025 comment_prop = expression.find(exp.SchemaCommentProperty) 1026 if comment_prop: 1027 comment_prop.pop() 1028 query.replace(exp.paren(query)) 1029 else: 1030 comment_prop = None 1031 1032 # ClickHouse only has DATABASEs and objects under them, eg. TABLEs, VIEWs, etc 1033 if expression.kind == "SCHEMA": 1034 expression.set("kind", "DATABASE") 1035 1036 create_sql = super().create_sql(expression) 1037 1038 comment_sql = self.sql(comment_prop) 1039 comment_sql = f" {comment_sql}" if comment_sql else "" 1040 1041 return f"{create_sql}{comment_sql}" 1042 1043 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1044 this = self.indent(self.sql(expression, "this")) 1045 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1046 1047 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1048 this = self.sql(expression, "this") 1049 this = f" {this}" if this else "" 1050 expr = self.sql(expression, "expression") 1051 expr = f" {expr}" if expr else "" 1052 index_type = self.sql(expression, "index_type") 1053 index_type = f" TYPE {index_type}" if index_type else "" 1054 granularity = self.sql(expression, "granularity") 1055 granularity = f" GRANULARITY {granularity}" if granularity else "" 1056 1057 return f"INDEX{this}{expr}{index_type}{granularity}" 1058 1059 def partition_sql(self, expression: exp.Partition) -> str: 1060 return f"PARTITION {self.expressions(expression, flat=True)}" 1061 1062 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1063 return f"ID {self.sql(expression.this)}" 1064 1065 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1066 return ( 1067 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1068 ) 1069 1070 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1071 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
109class ClickHouse(Dialect): 110 NORMALIZE_FUNCTIONS: bool | str = False 111 NULL_ORDERING = "nulls_are_last" 112 SUPPORTS_USER_DEFINED_TYPES = False 113 SAFE_DIVISION = True 114 LOG_BASE_FIRST: t.Optional[bool] = None 115 FORCE_EARLY_ALIAS_REF_EXPANSION = True 116 117 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 118 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 119 120 UNESCAPED_SEQUENCES = { 121 "\\0": "\0", 122 } 123 124 class Tokenizer(tokens.Tokenizer): 125 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 126 IDENTIFIERS = ['"', "`"] 127 STRING_ESCAPES = ["'", "\\"] 128 BIT_STRINGS = [("0b", "")] 129 HEX_STRINGS = [("0x", ""), ("0X", "")] 130 HEREDOC_STRINGS = ["$"] 131 132 KEYWORDS = { 133 **tokens.Tokenizer.KEYWORDS, 134 "ATTACH": TokenType.COMMAND, 135 "DATE32": TokenType.DATE32, 136 "DATETIME64": TokenType.DATETIME64, 137 "DICTIONARY": TokenType.DICTIONARY, 138 "ENUM8": TokenType.ENUM8, 139 "ENUM16": TokenType.ENUM16, 140 "FINAL": TokenType.FINAL, 141 "FIXEDSTRING": TokenType.FIXEDSTRING, 142 "FLOAT32": TokenType.FLOAT, 143 "FLOAT64": TokenType.DOUBLE, 144 "GLOBAL": TokenType.GLOBAL, 145 "INT256": TokenType.INT256, 146 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 147 "MAP": TokenType.MAP, 148 "NESTED": TokenType.NESTED, 149 "SAMPLE": TokenType.TABLE_SAMPLE, 150 "TUPLE": TokenType.STRUCT, 151 "UINT128": TokenType.UINT128, 152 "UINT16": TokenType.USMALLINT, 153 "UINT256": TokenType.UINT256, 154 "UINT32": TokenType.UINT, 155 "UINT64": TokenType.UBIGINT, 156 "UINT8": TokenType.UTINYINT, 157 "IPV4": TokenType.IPV4, 158 "IPV6": TokenType.IPV6, 159 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 160 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 161 "SYSTEM": TokenType.COMMAND, 162 "PREWHERE": TokenType.PREWHERE, 163 } 164 KEYWORDS.pop("/*+") 165 166 SINGLE_TOKENS = { 167 **tokens.Tokenizer.SINGLE_TOKENS, 168 "$": TokenType.HEREDOC_STRING, 169 } 170 171 class Parser(parser.Parser): 172 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 173 # * select x from t1 union all select x from t2 limit 1; 174 # * select x from t1 union all (select x from t2 limit 1); 175 MODIFIERS_ATTACHED_TO_SET_OP = False 176 INTERVAL_SPANS = False 177 178 FUNCTIONS = { 179 **parser.Parser.FUNCTIONS, 180 "ANY": exp.AnyValue.from_arg_list, 181 "ARRAYSUM": exp.ArraySum.from_arg_list, 182 "COUNTIF": _build_count_if, 183 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 184 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 185 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 186 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 187 "DATE_FORMAT": _build_date_format, 188 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 189 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 190 "FORMATDATETIME": _build_date_format, 191 "JSONEXTRACTSTRING": build_json_extract_path( 192 exp.JSONExtractScalar, zero_based_indexing=False 193 ), 194 "MAP": parser.build_var_map, 195 "MATCH": exp.RegexpLike.from_arg_list, 196 "RANDCANONICAL": exp.Rand.from_arg_list, 197 "STR_TO_DATE": _build_str_to_date, 198 "TUPLE": exp.Struct.from_arg_list, 199 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 200 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 201 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 202 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 203 "UNIQ": exp.ApproxDistinct.from_arg_list, 204 "XOR": lambda args: exp.Xor(expressions=args), 205 "MD5": exp.MD5Digest.from_arg_list, 206 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 207 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 208 } 209 210 AGG_FUNCTIONS = { 211 "count", 212 "min", 213 "max", 214 "sum", 215 "avg", 216 "any", 217 "stddevPop", 218 "stddevSamp", 219 "varPop", 220 "varSamp", 221 "corr", 222 "covarPop", 223 "covarSamp", 224 "entropy", 225 "exponentialMovingAverage", 226 "intervalLengthSum", 227 "kolmogorovSmirnovTest", 228 "mannWhitneyUTest", 229 "median", 230 "rankCorr", 231 "sumKahan", 232 "studentTTest", 233 "welchTTest", 234 "anyHeavy", 235 "anyLast", 236 "boundingRatio", 237 "first_value", 238 "last_value", 239 "argMin", 240 "argMax", 241 "avgWeighted", 242 "topK", 243 "topKWeighted", 244 "deltaSum", 245 "deltaSumTimestamp", 246 "groupArray", 247 "groupArrayLast", 248 "groupUniqArray", 249 "groupArrayInsertAt", 250 "groupArrayMovingAvg", 251 "groupArrayMovingSum", 252 "groupArraySample", 253 "groupBitAnd", 254 "groupBitOr", 255 "groupBitXor", 256 "groupBitmap", 257 "groupBitmapAnd", 258 "groupBitmapOr", 259 "groupBitmapXor", 260 "sumWithOverflow", 261 "sumMap", 262 "minMap", 263 "maxMap", 264 "skewSamp", 265 "skewPop", 266 "kurtSamp", 267 "kurtPop", 268 "uniq", 269 "uniqExact", 270 "uniqCombined", 271 "uniqCombined64", 272 "uniqHLL12", 273 "uniqTheta", 274 "quantile", 275 "quantiles", 276 "quantileExact", 277 "quantilesExact", 278 "quantileExactLow", 279 "quantilesExactLow", 280 "quantileExactHigh", 281 "quantilesExactHigh", 282 "quantileExactWeighted", 283 "quantilesExactWeighted", 284 "quantileTiming", 285 "quantilesTiming", 286 "quantileTimingWeighted", 287 "quantilesTimingWeighted", 288 "quantileDeterministic", 289 "quantilesDeterministic", 290 "quantileTDigest", 291 "quantilesTDigest", 292 "quantileTDigestWeighted", 293 "quantilesTDigestWeighted", 294 "quantileBFloat16", 295 "quantilesBFloat16", 296 "quantileBFloat16Weighted", 297 "quantilesBFloat16Weighted", 298 "simpleLinearRegression", 299 "stochasticLinearRegression", 300 "stochasticLogisticRegression", 301 "categoricalInformationValue", 302 "contingency", 303 "cramersV", 304 "cramersVBiasCorrected", 305 "theilsU", 306 "maxIntersections", 307 "maxIntersectionsPosition", 308 "meanZTest", 309 "quantileInterpolatedWeighted", 310 "quantilesInterpolatedWeighted", 311 "quantileGK", 312 "quantilesGK", 313 "sparkBar", 314 "sumCount", 315 "largestTriangleThreeBuckets", 316 "histogram", 317 "sequenceMatch", 318 "sequenceCount", 319 "windowFunnel", 320 "retention", 321 "uniqUpTo", 322 "sequenceNextNode", 323 "exponentialTimeDecayedAvg", 324 } 325 326 AGG_FUNCTIONS_SUFFIXES = [ 327 "If", 328 "Array", 329 "ArrayIf", 330 "Map", 331 "SimpleState", 332 "State", 333 "Merge", 334 "MergeState", 335 "ForEach", 336 "Distinct", 337 "OrDefault", 338 "OrNull", 339 "Resample", 340 "ArgMin", 341 "ArgMax", 342 ] 343 344 FUNC_TOKENS = { 345 *parser.Parser.FUNC_TOKENS, 346 TokenType.SET, 347 } 348 349 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 350 351 ID_VAR_TOKENS = { 352 *parser.Parser.ID_VAR_TOKENS, 353 TokenType.LIKE, 354 } 355 356 AGG_FUNC_MAPPING = ( 357 lambda functions, suffixes: { 358 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 359 } 360 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 361 362 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 363 364 FUNCTION_PARSERS = { 365 **parser.Parser.FUNCTION_PARSERS, 366 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 367 "QUANTILE": lambda self: self._parse_quantile(), 368 } 369 370 FUNCTION_PARSERS.pop("MATCH") 371 372 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 373 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 374 375 RANGE_PARSERS = { 376 **parser.Parser.RANGE_PARSERS, 377 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 378 and self._parse_in(this, is_global=True), 379 } 380 381 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 382 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 383 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 384 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 385 386 JOIN_KINDS = { 387 *parser.Parser.JOIN_KINDS, 388 TokenType.ANY, 389 TokenType.ASOF, 390 TokenType.ARRAY, 391 } 392 393 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 394 TokenType.ANY, 395 TokenType.ARRAY, 396 TokenType.FINAL, 397 TokenType.FORMAT, 398 TokenType.SETTINGS, 399 } 400 401 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 402 TokenType.FORMAT, 403 } 404 405 LOG_DEFAULTS_TO_LN = True 406 407 QUERY_MODIFIER_PARSERS = { 408 **parser.Parser.QUERY_MODIFIER_PARSERS, 409 TokenType.SETTINGS: lambda self: ( 410 "settings", 411 self._advance() or self._parse_csv(self._parse_assignment), 412 ), 413 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "INDEX": lambda self: self._parse_index_constraint(), 419 "CODEC": lambda self: self._parse_compress(), 420 } 421 422 ALTER_PARSERS = { 423 **parser.Parser.ALTER_PARSERS, 424 "REPLACE": lambda self: self._parse_alter_table_replace(), 425 } 426 427 SCHEMA_UNNAMED_CONSTRAINTS = { 428 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 429 "INDEX", 430 } 431 432 def _parse_create(self) -> exp.Create | exp.Command: 433 create = super()._parse_create() 434 435 # DATABASE in ClickHouse is the same as SCHEMA in other dialects 436 if isinstance(create, exp.Create) and create.kind == "DATABASE": 437 create.set("kind", "SCHEMA") 438 439 return create 440 441 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 442 index = self._index 443 this = self._parse_bitwise() 444 if self._match(TokenType.FROM): 445 self._retreat(index) 446 return super()._parse_extract() 447 448 # We return Anonymous here because extract and regexpExtract have different semantics, 449 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 450 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 451 # 452 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 453 self._match(TokenType.COMMA) 454 return self.expression( 455 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 456 ) 457 458 def _parse_assignment(self) -> t.Optional[exp.Expression]: 459 this = super()._parse_assignment() 460 461 if self._match(TokenType.PLACEHOLDER): 462 return self.expression( 463 exp.If, 464 this=this, 465 true=self._parse_assignment(), 466 false=self._match(TokenType.COLON) and self._parse_assignment(), 467 ) 468 469 return this 470 471 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 472 """ 473 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 474 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 475 """ 476 if not self._match(TokenType.L_BRACE): 477 return None 478 479 this = self._parse_id_var() 480 self._match(TokenType.COLON) 481 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 482 self._match_text_seq("IDENTIFIER") and "Identifier" 483 ) 484 485 if not kind: 486 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 487 elif not self._match(TokenType.R_BRACE): 488 self.raise_error("Expecting }") 489 490 return self.expression(exp.Placeholder, this=this, kind=kind) 491 492 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 493 this = super()._parse_in(this) 494 this.set("is_global", is_global) 495 return this 496 497 def _parse_table( 498 self, 499 schema: bool = False, 500 joins: bool = False, 501 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 502 parse_bracket: bool = False, 503 is_db_reference: bool = False, 504 parse_partition: bool = False, 505 ) -> t.Optional[exp.Expression]: 506 this = super()._parse_table( 507 schema=schema, 508 joins=joins, 509 alias_tokens=alias_tokens, 510 parse_bracket=parse_bracket, 511 is_db_reference=is_db_reference, 512 ) 513 514 if self._match(TokenType.FINAL): 515 this = self.expression(exp.Final, this=this) 516 517 return this 518 519 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 520 return super()._parse_position(haystack_first=True) 521 522 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 523 def _parse_cte(self) -> exp.CTE: 524 # WITH <identifier> AS <subquery expression> 525 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 526 527 if not cte: 528 # WITH <expression> AS <identifier> 529 cte = self.expression( 530 exp.CTE, 531 this=self._parse_assignment(), 532 alias=self._parse_table_alias(), 533 scalar=True, 534 ) 535 536 return cte 537 538 def _parse_join_parts( 539 self, 540 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 541 is_global = self._match(TokenType.GLOBAL) and self._prev 542 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 543 544 if kind_pre: 545 kind = self._match_set(self.JOIN_KINDS) and self._prev 546 side = self._match_set(self.JOIN_SIDES) and self._prev 547 return is_global, side, kind 548 549 return ( 550 is_global, 551 self._match_set(self.JOIN_SIDES) and self._prev, 552 self._match_set(self.JOIN_KINDS) and self._prev, 553 ) 554 555 def _parse_join( 556 self, skip_join_token: bool = False, parse_bracket: bool = False 557 ) -> t.Optional[exp.Join]: 558 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 559 if join: 560 join.set("global", join.args.pop("method", None)) 561 562 return join 563 564 def _parse_function( 565 self, 566 functions: t.Optional[t.Dict[str, t.Callable]] = None, 567 anonymous: bool = False, 568 optional_parens: bool = True, 569 any_token: bool = False, 570 ) -> t.Optional[exp.Expression]: 571 expr = super()._parse_function( 572 functions=functions, 573 anonymous=anonymous, 574 optional_parens=optional_parens, 575 any_token=any_token, 576 ) 577 578 func = expr.this if isinstance(expr, exp.Window) else expr 579 580 # Aggregate functions can be split in 2 parts: <func_name><suffix> 581 parts = ( 582 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 583 ) 584 585 if parts: 586 params = self._parse_func_params(func) 587 588 kwargs = { 589 "this": func.this, 590 "expressions": func.expressions, 591 } 592 if parts[1]: 593 kwargs["parts"] = parts 594 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 595 else: 596 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 597 598 kwargs["exp_class"] = exp_class 599 if params: 600 kwargs["params"] = params 601 602 func = self.expression(**kwargs) 603 604 if isinstance(expr, exp.Window): 605 # The window's func was parsed as Anonymous in base parser, fix its 606 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 607 expr.set("this", func) 608 elif params: 609 # Params have blocked super()._parse_function() from parsing the following window 610 # (if that exists) as they're standing between the function call and the window spec 611 expr = self._parse_window(func) 612 else: 613 expr = func 614 615 return expr 616 617 def _parse_func_params( 618 self, this: t.Optional[exp.Func] = None 619 ) -> t.Optional[t.List[exp.Expression]]: 620 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 621 return self._parse_csv(self._parse_lambda) 622 623 if self._match(TokenType.L_PAREN): 624 params = self._parse_csv(self._parse_lambda) 625 self._match_r_paren(this) 626 return params 627 628 return None 629 630 def _parse_quantile(self) -> exp.Quantile: 631 this = self._parse_lambda() 632 params = self._parse_func_params() 633 if params: 634 return self.expression(exp.Quantile, this=params[0], quantile=this) 635 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 636 637 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 638 return super()._parse_wrapped_id_vars(optional=True) 639 640 def _parse_primary_key( 641 self, wrapped_optional: bool = False, in_props: bool = False 642 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 643 return super()._parse_primary_key( 644 wrapped_optional=wrapped_optional or in_props, in_props=in_props 645 ) 646 647 def _parse_on_property(self) -> t.Optional[exp.Expression]: 648 index = self._index 649 if self._match_text_seq("CLUSTER"): 650 this = self._parse_id_var() 651 if this: 652 return self.expression(exp.OnCluster, this=this) 653 else: 654 self._retreat(index) 655 return None 656 657 def _parse_index_constraint( 658 self, kind: t.Optional[str] = None 659 ) -> exp.IndexColumnConstraint: 660 # INDEX name1 expr TYPE type1(args) GRANULARITY value 661 this = self._parse_id_var() 662 expression = self._parse_assignment() 663 664 index_type = self._match_text_seq("TYPE") and ( 665 self._parse_function() or self._parse_var() 666 ) 667 668 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 669 670 return self.expression( 671 exp.IndexColumnConstraint, 672 this=this, 673 expression=expression, 674 index_type=index_type, 675 granularity=granularity, 676 ) 677 678 def _parse_partition(self) -> t.Optional[exp.Partition]: 679 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 680 if not self._match(TokenType.PARTITION): 681 return None 682 683 if self._match_text_seq("ID"): 684 # Corresponds to the PARTITION ID <string_value> syntax 685 expressions: t.List[exp.Expression] = [ 686 self.expression(exp.PartitionId, this=self._parse_string()) 687 ] 688 else: 689 expressions = self._parse_expressions() 690 691 return self.expression(exp.Partition, expressions=expressions) 692 693 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 694 partition = self._parse_partition() 695 696 if not partition or not self._match(TokenType.FROM): 697 return None 698 699 return self.expression( 700 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 701 ) 702 703 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 704 if not self._match_text_seq("PROJECTION"): 705 return None 706 707 return self.expression( 708 exp.ProjectionDef, 709 this=self._parse_id_var(), 710 expression=self._parse_wrapped(self._parse_statement), 711 ) 712 713 def _parse_constraint(self) -> t.Optional[exp.Expression]: 714 return super()._parse_constraint() or self._parse_projection_def() 715 716 class Generator(generator.Generator): 717 QUERY_HINTS = False 718 STRUCT_DELIMITER = ("(", ")") 719 NVL2_SUPPORTED = False 720 TABLESAMPLE_REQUIRES_PARENS = False 721 TABLESAMPLE_SIZE_IS_ROWS = False 722 TABLESAMPLE_KEYWORDS = "SAMPLE" 723 LAST_DAY_SUPPORTS_DATE_PART = False 724 CAN_IMPLEMENT_ARRAY_ANY = True 725 SUPPORTS_TO_NUMBER = False 726 JOIN_HINTS = False 727 TABLE_HINTS = False 728 EXPLICIT_SET_OP = True 729 GROUPINGS_SEP = "" 730 SET_OP_MODIFIERS = False 731 SUPPORTS_TABLE_ALIAS_COLUMNS = False 732 VALUES_AS_TABLE = False 733 734 STRING_TYPE_MAPPING = { 735 exp.DataType.Type.CHAR: "String", 736 exp.DataType.Type.LONGBLOB: "String", 737 exp.DataType.Type.LONGTEXT: "String", 738 exp.DataType.Type.MEDIUMBLOB: "String", 739 exp.DataType.Type.MEDIUMTEXT: "String", 740 exp.DataType.Type.TINYBLOB: "String", 741 exp.DataType.Type.TINYTEXT: "String", 742 exp.DataType.Type.TEXT: "String", 743 exp.DataType.Type.VARBINARY: "String", 744 exp.DataType.Type.VARCHAR: "String", 745 } 746 747 SUPPORTED_JSON_PATH_PARTS = { 748 exp.JSONPathKey, 749 exp.JSONPathRoot, 750 exp.JSONPathSubscript, 751 } 752 753 TYPE_MAPPING = { 754 **generator.Generator.TYPE_MAPPING, 755 **STRING_TYPE_MAPPING, 756 exp.DataType.Type.ARRAY: "Array", 757 exp.DataType.Type.BIGINT: "Int64", 758 exp.DataType.Type.DATE32: "Date32", 759 exp.DataType.Type.DATETIME64: "DateTime64", 760 exp.DataType.Type.DOUBLE: "Float64", 761 exp.DataType.Type.ENUM: "Enum", 762 exp.DataType.Type.ENUM8: "Enum8", 763 exp.DataType.Type.ENUM16: "Enum16", 764 exp.DataType.Type.FIXEDSTRING: "FixedString", 765 exp.DataType.Type.FLOAT: "Float32", 766 exp.DataType.Type.INT: "Int32", 767 exp.DataType.Type.MEDIUMINT: "Int32", 768 exp.DataType.Type.INT128: "Int128", 769 exp.DataType.Type.INT256: "Int256", 770 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 771 exp.DataType.Type.MAP: "Map", 772 exp.DataType.Type.NESTED: "Nested", 773 exp.DataType.Type.NULLABLE: "Nullable", 774 exp.DataType.Type.SMALLINT: "Int16", 775 exp.DataType.Type.STRUCT: "Tuple", 776 exp.DataType.Type.TINYINT: "Int8", 777 exp.DataType.Type.UBIGINT: "UInt64", 778 exp.DataType.Type.UINT: "UInt32", 779 exp.DataType.Type.UINT128: "UInt128", 780 exp.DataType.Type.UINT256: "UInt256", 781 exp.DataType.Type.USMALLINT: "UInt16", 782 exp.DataType.Type.UTINYINT: "UInt8", 783 exp.DataType.Type.IPV4: "IPv4", 784 exp.DataType.Type.IPV6: "IPv6", 785 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 786 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 787 } 788 789 TRANSFORMS = { 790 **generator.Generator.TRANSFORMS, 791 exp.AnyValue: rename_func("any"), 792 exp.ApproxDistinct: rename_func("uniq"), 793 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 794 exp.ArraySize: rename_func("LENGTH"), 795 exp.ArraySum: rename_func("arraySum"), 796 exp.ArgMax: arg_max_or_min_no_count("argMax"), 797 exp.ArgMin: arg_max_or_min_no_count("argMin"), 798 exp.Array: inline_array_sql, 799 exp.CastToStrType: rename_func("CAST"), 800 exp.CountIf: rename_func("countIf"), 801 exp.CompressColumnConstraint: lambda self, 802 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 803 exp.ComputedColumnConstraint: lambda self, 804 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 805 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 806 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 807 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 808 exp.DateStrToDate: rename_func("toDate"), 809 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 810 exp.Explode: rename_func("arrayJoin"), 811 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 812 exp.IsNan: rename_func("isNaN"), 813 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 814 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 815 exp.JSONPathKey: json_path_key_only_name, 816 exp.JSONPathRoot: lambda *_: "", 817 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 818 exp.Nullif: rename_func("nullIf"), 819 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 820 exp.Pivot: no_pivot_sql, 821 exp.Quantile: _quantile_sql, 822 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 823 exp.Rand: rename_func("randCanonical"), 824 exp.StartsWith: rename_func("startsWith"), 825 exp.StrPosition: lambda self, e: self.func( 826 "position", e.this, e.args.get("substr"), e.args.get("position") 827 ), 828 exp.TimeToStr: lambda self, e: self.func( 829 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 830 ), 831 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 832 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 833 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 834 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 835 exp.MD5Digest: rename_func("MD5"), 836 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 837 exp.SHA: rename_func("SHA1"), 838 exp.SHA2: sha256_sql, 839 exp.UnixToTime: _unix_to_time_sql, 840 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 841 exp.Variance: rename_func("varSamp"), 842 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 843 exp.Stddev: rename_func("stddevSamp"), 844 } 845 846 PROPERTIES_LOCATION = { 847 **generator.Generator.PROPERTIES_LOCATION, 848 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 849 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 850 exp.OnCluster: exp.Properties.Location.POST_NAME, 851 } 852 853 # there's no list in docs, but it can be found in Clickhouse code 854 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 855 ON_CLUSTER_TARGETS = { 856 "DATABASE", 857 "TABLE", 858 "VIEW", 859 "DICTIONARY", 860 "INDEX", 861 "FUNCTION", 862 "NAMED COLLECTION", 863 } 864 865 # https://github.com/ClickHouse/ClickHouse/blob/275de04b8f6bb8c9334bf8070001afe2dab0b17d/src/Functions/FunctionsConversion.cpp#L2939-L2989 866 TRY_CAST_TYPES = { 867 "DATE", 868 "DATE32", 869 "DATETIME", 870 "DATETIME64", 871 "DECIMAL32", 872 "DECIMAL64", 873 "DECIMAL128", 874 "DECIMAL256", 875 "FLOAT32", 876 "FLOAT64", 877 "INT8", 878 "INT16", 879 "INT32", 880 "INT64", 881 "INT128", 882 "INT256", 883 "IPV4", 884 "IPV6", 885 "UINT8", 886 "UINT16", 887 "UINT32", 888 "UINT64", 889 "UINT128", 890 "UINT256", 891 "UUID", 892 } 893 894 def strtodate_sql(self, expression: exp.StrToDate) -> str: 895 strtodate_sql = self.function_fallback_sql(expression) 896 897 if not isinstance(expression.parent, exp.Cast): 898 # StrToDate returns DATEs in other dialects (eg. postgres), so 899 # this branch aims to improve the transpilation to clickhouse 900 return f"CAST({strtodate_sql} AS DATE)" 901 902 return strtodate_sql 903 904 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 905 this = expression.this 906 907 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 908 return self.sql(this) 909 910 return super().cast_sql(expression, safe_prefix=safe_prefix) 911 912 def trycast_sql(self, expression: exp.TryCast) -> str: 913 target_type = self.sql(expression.to) 914 if target_type.upper() in self.TRY_CAST_TYPES: 915 return self.func(f"to{target_type}OrNull", expression.this) 916 917 self.unsupported(f"There is no `to<Type>OrNull` for type {target_type}.") 918 return super().cast_sql(expression) 919 920 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 921 this = self.json_path_part(expression.this) 922 return str(int(this) + 1) if is_int(this) else this 923 924 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 925 return f"AS {self.sql(expression, 'this')}" 926 927 def _any_to_has( 928 self, 929 expression: exp.EQ | exp.NEQ, 930 default: t.Callable[[t.Any], str], 931 prefix: str = "", 932 ) -> str: 933 if isinstance(expression.left, exp.Any): 934 arr = expression.left 935 this = expression.right 936 elif isinstance(expression.right, exp.Any): 937 arr = expression.right 938 this = expression.left 939 else: 940 return default(expression) 941 942 return prefix + self.func("has", arr.this.unnest(), this) 943 944 def eq_sql(self, expression: exp.EQ) -> str: 945 return self._any_to_has(expression, super().eq_sql) 946 947 def neq_sql(self, expression: exp.NEQ) -> str: 948 return self._any_to_has(expression, super().neq_sql, "NOT ") 949 950 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 951 # Manually add a flag to make the search case-insensitive 952 regex = self.func("CONCAT", "'(?i)'", expression.expression) 953 return self.func("match", expression.this, regex) 954 955 def datatype_sql(self, expression: exp.DataType) -> str: 956 # String is the standard ClickHouse type, every other variant is just an alias. 957 # Additionally, any supplied length parameter will be ignored. 958 # 959 # https://clickhouse.com/docs/en/sql-reference/data-types/string 960 if expression.this in self.STRING_TYPE_MAPPING: 961 return "String" 962 963 return super().datatype_sql(expression) 964 965 def cte_sql(self, expression: exp.CTE) -> str: 966 if expression.args.get("scalar"): 967 this = self.sql(expression, "this") 968 alias = self.sql(expression, "alias") 969 return f"{this} AS {alias}" 970 971 return super().cte_sql(expression) 972 973 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 974 return super().after_limit_modifiers(expression) + [ 975 ( 976 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 977 if expression.args.get("settings") 978 else "" 979 ), 980 ( 981 self.seg("FORMAT ") + self.sql(expression, "format") 982 if expression.args.get("format") 983 else "" 984 ), 985 ] 986 987 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 988 params = self.expressions(expression, key="params", flat=True) 989 return self.func(expression.name, *expression.expressions) + f"({params})" 990 991 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 992 return self.func(expression.name, *expression.expressions) 993 994 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 995 return self.anonymousaggfunc_sql(expression) 996 997 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 998 return self.parameterizedagg_sql(expression) 999 1000 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1001 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1002 1003 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1004 return f"ON CLUSTER {self.sql(expression, 'this')}" 1005 1006 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1007 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1008 exp.Properties.Location.POST_NAME 1009 ): 1010 this_name = self.sql( 1011 expression.this if isinstance(expression.this, exp.Schema) else expression, 1012 "this", 1013 ) 1014 this_properties = " ".join( 1015 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1016 ) 1017 this_schema = self.schema_columns_sql(expression.this) 1018 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1019 1020 return super().createable_sql(expression, locations) 1021 1022 def create_sql(self, expression: exp.Create) -> str: 1023 # The comment property comes last in CTAS statements, i.e. after the query 1024 query = expression.expression 1025 if isinstance(query, exp.Query): 1026 comment_prop = expression.find(exp.SchemaCommentProperty) 1027 if comment_prop: 1028 comment_prop.pop() 1029 query.replace(exp.paren(query)) 1030 else: 1031 comment_prop = None 1032 1033 # ClickHouse only has DATABASEs and objects under them, eg. TABLEs, VIEWs, etc 1034 if expression.kind == "SCHEMA": 1035 expression.set("kind", "DATABASE") 1036 1037 create_sql = super().create_sql(expression) 1038 1039 comment_sql = self.sql(comment_prop) 1040 comment_sql = f" {comment_sql}" if comment_sql else "" 1041 1042 return f"{create_sql}{comment_sql}" 1043 1044 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1045 this = self.indent(self.sql(expression, "this")) 1046 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1047 1048 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1049 this = self.sql(expression, "this") 1050 this = f" {this}" if this else "" 1051 expr = self.sql(expression, "expression") 1052 expr = f" {expr}" if expr else "" 1053 index_type = self.sql(expression, "index_type") 1054 index_type = f" TYPE {index_type}" if index_type else "" 1055 granularity = self.sql(expression, "granularity") 1056 granularity = f" GRANULARITY {granularity}" if granularity else "" 1057 1058 return f"INDEX{this}{expr}{index_type}{granularity}" 1059 1060 def partition_sql(self, expression: exp.Partition) -> str: 1061 return f"PARTITION {self.expressions(expression, flat=True)}" 1062 1063 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1064 return f"ID {self.sql(expression.this)}" 1065 1066 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1067 return ( 1068 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1069 ) 1070 1071 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1072 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
124 class Tokenizer(tokens.Tokenizer): 125 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 126 IDENTIFIERS = ['"', "`"] 127 STRING_ESCAPES = ["'", "\\"] 128 BIT_STRINGS = [("0b", "")] 129 HEX_STRINGS = [("0x", ""), ("0X", "")] 130 HEREDOC_STRINGS = ["$"] 131 132 KEYWORDS = { 133 **tokens.Tokenizer.KEYWORDS, 134 "ATTACH": TokenType.COMMAND, 135 "DATE32": TokenType.DATE32, 136 "DATETIME64": TokenType.DATETIME64, 137 "DICTIONARY": TokenType.DICTIONARY, 138 "ENUM8": TokenType.ENUM8, 139 "ENUM16": TokenType.ENUM16, 140 "FINAL": TokenType.FINAL, 141 "FIXEDSTRING": TokenType.FIXEDSTRING, 142 "FLOAT32": TokenType.FLOAT, 143 "FLOAT64": TokenType.DOUBLE, 144 "GLOBAL": TokenType.GLOBAL, 145 "INT256": TokenType.INT256, 146 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 147 "MAP": TokenType.MAP, 148 "NESTED": TokenType.NESTED, 149 "SAMPLE": TokenType.TABLE_SAMPLE, 150 "TUPLE": TokenType.STRUCT, 151 "UINT128": TokenType.UINT128, 152 "UINT16": TokenType.USMALLINT, 153 "UINT256": TokenType.UINT256, 154 "UINT32": TokenType.UINT, 155 "UINT64": TokenType.UBIGINT, 156 "UINT8": TokenType.UTINYINT, 157 "IPV4": TokenType.IPV4, 158 "IPV6": TokenType.IPV6, 159 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 160 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 161 "SYSTEM": TokenType.COMMAND, 162 "PREWHERE": TokenType.PREWHERE, 163 } 164 KEYWORDS.pop("/*+") 165 166 SINGLE_TOKENS = { 167 **tokens.Tokenizer.SINGLE_TOKENS, 168 "$": TokenType.HEREDOC_STRING, 169 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
171 class Parser(parser.Parser): 172 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 173 # * select x from t1 union all select x from t2 limit 1; 174 # * select x from t1 union all (select x from t2 limit 1); 175 MODIFIERS_ATTACHED_TO_SET_OP = False 176 INTERVAL_SPANS = False 177 178 FUNCTIONS = { 179 **parser.Parser.FUNCTIONS, 180 "ANY": exp.AnyValue.from_arg_list, 181 "ARRAYSUM": exp.ArraySum.from_arg_list, 182 "COUNTIF": _build_count_if, 183 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 184 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 185 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 186 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 187 "DATE_FORMAT": _build_date_format, 188 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 189 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 190 "FORMATDATETIME": _build_date_format, 191 "JSONEXTRACTSTRING": build_json_extract_path( 192 exp.JSONExtractScalar, zero_based_indexing=False 193 ), 194 "MAP": parser.build_var_map, 195 "MATCH": exp.RegexpLike.from_arg_list, 196 "RANDCANONICAL": exp.Rand.from_arg_list, 197 "STR_TO_DATE": _build_str_to_date, 198 "TUPLE": exp.Struct.from_arg_list, 199 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 200 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 201 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 202 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 203 "UNIQ": exp.ApproxDistinct.from_arg_list, 204 "XOR": lambda args: exp.Xor(expressions=args), 205 "MD5": exp.MD5Digest.from_arg_list, 206 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 207 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 208 } 209 210 AGG_FUNCTIONS = { 211 "count", 212 "min", 213 "max", 214 "sum", 215 "avg", 216 "any", 217 "stddevPop", 218 "stddevSamp", 219 "varPop", 220 "varSamp", 221 "corr", 222 "covarPop", 223 "covarSamp", 224 "entropy", 225 "exponentialMovingAverage", 226 "intervalLengthSum", 227 "kolmogorovSmirnovTest", 228 "mannWhitneyUTest", 229 "median", 230 "rankCorr", 231 "sumKahan", 232 "studentTTest", 233 "welchTTest", 234 "anyHeavy", 235 "anyLast", 236 "boundingRatio", 237 "first_value", 238 "last_value", 239 "argMin", 240 "argMax", 241 "avgWeighted", 242 "topK", 243 "topKWeighted", 244 "deltaSum", 245 "deltaSumTimestamp", 246 "groupArray", 247 "groupArrayLast", 248 "groupUniqArray", 249 "groupArrayInsertAt", 250 "groupArrayMovingAvg", 251 "groupArrayMovingSum", 252 "groupArraySample", 253 "groupBitAnd", 254 "groupBitOr", 255 "groupBitXor", 256 "groupBitmap", 257 "groupBitmapAnd", 258 "groupBitmapOr", 259 "groupBitmapXor", 260 "sumWithOverflow", 261 "sumMap", 262 "minMap", 263 "maxMap", 264 "skewSamp", 265 "skewPop", 266 "kurtSamp", 267 "kurtPop", 268 "uniq", 269 "uniqExact", 270 "uniqCombined", 271 "uniqCombined64", 272 "uniqHLL12", 273 "uniqTheta", 274 "quantile", 275 "quantiles", 276 "quantileExact", 277 "quantilesExact", 278 "quantileExactLow", 279 "quantilesExactLow", 280 "quantileExactHigh", 281 "quantilesExactHigh", 282 "quantileExactWeighted", 283 "quantilesExactWeighted", 284 "quantileTiming", 285 "quantilesTiming", 286 "quantileTimingWeighted", 287 "quantilesTimingWeighted", 288 "quantileDeterministic", 289 "quantilesDeterministic", 290 "quantileTDigest", 291 "quantilesTDigest", 292 "quantileTDigestWeighted", 293 "quantilesTDigestWeighted", 294 "quantileBFloat16", 295 "quantilesBFloat16", 296 "quantileBFloat16Weighted", 297 "quantilesBFloat16Weighted", 298 "simpleLinearRegression", 299 "stochasticLinearRegression", 300 "stochasticLogisticRegression", 301 "categoricalInformationValue", 302 "contingency", 303 "cramersV", 304 "cramersVBiasCorrected", 305 "theilsU", 306 "maxIntersections", 307 "maxIntersectionsPosition", 308 "meanZTest", 309 "quantileInterpolatedWeighted", 310 "quantilesInterpolatedWeighted", 311 "quantileGK", 312 "quantilesGK", 313 "sparkBar", 314 "sumCount", 315 "largestTriangleThreeBuckets", 316 "histogram", 317 "sequenceMatch", 318 "sequenceCount", 319 "windowFunnel", 320 "retention", 321 "uniqUpTo", 322 "sequenceNextNode", 323 "exponentialTimeDecayedAvg", 324 } 325 326 AGG_FUNCTIONS_SUFFIXES = [ 327 "If", 328 "Array", 329 "ArrayIf", 330 "Map", 331 "SimpleState", 332 "State", 333 "Merge", 334 "MergeState", 335 "ForEach", 336 "Distinct", 337 "OrDefault", 338 "OrNull", 339 "Resample", 340 "ArgMin", 341 "ArgMax", 342 ] 343 344 FUNC_TOKENS = { 345 *parser.Parser.FUNC_TOKENS, 346 TokenType.SET, 347 } 348 349 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 350 351 ID_VAR_TOKENS = { 352 *parser.Parser.ID_VAR_TOKENS, 353 TokenType.LIKE, 354 } 355 356 AGG_FUNC_MAPPING = ( 357 lambda functions, suffixes: { 358 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 359 } 360 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 361 362 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 363 364 FUNCTION_PARSERS = { 365 **parser.Parser.FUNCTION_PARSERS, 366 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 367 "QUANTILE": lambda self: self._parse_quantile(), 368 } 369 370 FUNCTION_PARSERS.pop("MATCH") 371 372 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 373 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 374 375 RANGE_PARSERS = { 376 **parser.Parser.RANGE_PARSERS, 377 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 378 and self._parse_in(this, is_global=True), 379 } 380 381 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 382 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 383 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 384 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 385 386 JOIN_KINDS = { 387 *parser.Parser.JOIN_KINDS, 388 TokenType.ANY, 389 TokenType.ASOF, 390 TokenType.ARRAY, 391 } 392 393 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 394 TokenType.ANY, 395 TokenType.ARRAY, 396 TokenType.FINAL, 397 TokenType.FORMAT, 398 TokenType.SETTINGS, 399 } 400 401 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 402 TokenType.FORMAT, 403 } 404 405 LOG_DEFAULTS_TO_LN = True 406 407 QUERY_MODIFIER_PARSERS = { 408 **parser.Parser.QUERY_MODIFIER_PARSERS, 409 TokenType.SETTINGS: lambda self: ( 410 "settings", 411 self._advance() or self._parse_csv(self._parse_assignment), 412 ), 413 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "INDEX": lambda self: self._parse_index_constraint(), 419 "CODEC": lambda self: self._parse_compress(), 420 } 421 422 ALTER_PARSERS = { 423 **parser.Parser.ALTER_PARSERS, 424 "REPLACE": lambda self: self._parse_alter_table_replace(), 425 } 426 427 SCHEMA_UNNAMED_CONSTRAINTS = { 428 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 429 "INDEX", 430 } 431 432 def _parse_create(self) -> exp.Create | exp.Command: 433 create = super()._parse_create() 434 435 # DATABASE in ClickHouse is the same as SCHEMA in other dialects 436 if isinstance(create, exp.Create) and create.kind == "DATABASE": 437 create.set("kind", "SCHEMA") 438 439 return create 440 441 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 442 index = self._index 443 this = self._parse_bitwise() 444 if self._match(TokenType.FROM): 445 self._retreat(index) 446 return super()._parse_extract() 447 448 # We return Anonymous here because extract and regexpExtract have different semantics, 449 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 450 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 451 # 452 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 453 self._match(TokenType.COMMA) 454 return self.expression( 455 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 456 ) 457 458 def _parse_assignment(self) -> t.Optional[exp.Expression]: 459 this = super()._parse_assignment() 460 461 if self._match(TokenType.PLACEHOLDER): 462 return self.expression( 463 exp.If, 464 this=this, 465 true=self._parse_assignment(), 466 false=self._match(TokenType.COLON) and self._parse_assignment(), 467 ) 468 469 return this 470 471 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 472 """ 473 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 474 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 475 """ 476 if not self._match(TokenType.L_BRACE): 477 return None 478 479 this = self._parse_id_var() 480 self._match(TokenType.COLON) 481 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 482 self._match_text_seq("IDENTIFIER") and "Identifier" 483 ) 484 485 if not kind: 486 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 487 elif not self._match(TokenType.R_BRACE): 488 self.raise_error("Expecting }") 489 490 return self.expression(exp.Placeholder, this=this, kind=kind) 491 492 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 493 this = super()._parse_in(this) 494 this.set("is_global", is_global) 495 return this 496 497 def _parse_table( 498 self, 499 schema: bool = False, 500 joins: bool = False, 501 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 502 parse_bracket: bool = False, 503 is_db_reference: bool = False, 504 parse_partition: bool = False, 505 ) -> t.Optional[exp.Expression]: 506 this = super()._parse_table( 507 schema=schema, 508 joins=joins, 509 alias_tokens=alias_tokens, 510 parse_bracket=parse_bracket, 511 is_db_reference=is_db_reference, 512 ) 513 514 if self._match(TokenType.FINAL): 515 this = self.expression(exp.Final, this=this) 516 517 return this 518 519 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 520 return super()._parse_position(haystack_first=True) 521 522 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 523 def _parse_cte(self) -> exp.CTE: 524 # WITH <identifier> AS <subquery expression> 525 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 526 527 if not cte: 528 # WITH <expression> AS <identifier> 529 cte = self.expression( 530 exp.CTE, 531 this=self._parse_assignment(), 532 alias=self._parse_table_alias(), 533 scalar=True, 534 ) 535 536 return cte 537 538 def _parse_join_parts( 539 self, 540 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 541 is_global = self._match(TokenType.GLOBAL) and self._prev 542 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 543 544 if kind_pre: 545 kind = self._match_set(self.JOIN_KINDS) and self._prev 546 side = self._match_set(self.JOIN_SIDES) and self._prev 547 return is_global, side, kind 548 549 return ( 550 is_global, 551 self._match_set(self.JOIN_SIDES) and self._prev, 552 self._match_set(self.JOIN_KINDS) and self._prev, 553 ) 554 555 def _parse_join( 556 self, skip_join_token: bool = False, parse_bracket: bool = False 557 ) -> t.Optional[exp.Join]: 558 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 559 if join: 560 join.set("global", join.args.pop("method", None)) 561 562 return join 563 564 def _parse_function( 565 self, 566 functions: t.Optional[t.Dict[str, t.Callable]] = None, 567 anonymous: bool = False, 568 optional_parens: bool = True, 569 any_token: bool = False, 570 ) -> t.Optional[exp.Expression]: 571 expr = super()._parse_function( 572 functions=functions, 573 anonymous=anonymous, 574 optional_parens=optional_parens, 575 any_token=any_token, 576 ) 577 578 func = expr.this if isinstance(expr, exp.Window) else expr 579 580 # Aggregate functions can be split in 2 parts: <func_name><suffix> 581 parts = ( 582 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 583 ) 584 585 if parts: 586 params = self._parse_func_params(func) 587 588 kwargs = { 589 "this": func.this, 590 "expressions": func.expressions, 591 } 592 if parts[1]: 593 kwargs["parts"] = parts 594 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 595 else: 596 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 597 598 kwargs["exp_class"] = exp_class 599 if params: 600 kwargs["params"] = params 601 602 func = self.expression(**kwargs) 603 604 if isinstance(expr, exp.Window): 605 # The window's func was parsed as Anonymous in base parser, fix its 606 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 607 expr.set("this", func) 608 elif params: 609 # Params have blocked super()._parse_function() from parsing the following window 610 # (if that exists) as they're standing between the function call and the window spec 611 expr = self._parse_window(func) 612 else: 613 expr = func 614 615 return expr 616 617 def _parse_func_params( 618 self, this: t.Optional[exp.Func] = None 619 ) -> t.Optional[t.List[exp.Expression]]: 620 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 621 return self._parse_csv(self._parse_lambda) 622 623 if self._match(TokenType.L_PAREN): 624 params = self._parse_csv(self._parse_lambda) 625 self._match_r_paren(this) 626 return params 627 628 return None 629 630 def _parse_quantile(self) -> exp.Quantile: 631 this = self._parse_lambda() 632 params = self._parse_func_params() 633 if params: 634 return self.expression(exp.Quantile, this=params[0], quantile=this) 635 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 636 637 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 638 return super()._parse_wrapped_id_vars(optional=True) 639 640 def _parse_primary_key( 641 self, wrapped_optional: bool = False, in_props: bool = False 642 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 643 return super()._parse_primary_key( 644 wrapped_optional=wrapped_optional or in_props, in_props=in_props 645 ) 646 647 def _parse_on_property(self) -> t.Optional[exp.Expression]: 648 index = self._index 649 if self._match_text_seq("CLUSTER"): 650 this = self._parse_id_var() 651 if this: 652 return self.expression(exp.OnCluster, this=this) 653 else: 654 self._retreat(index) 655 return None 656 657 def _parse_index_constraint( 658 self, kind: t.Optional[str] = None 659 ) -> exp.IndexColumnConstraint: 660 # INDEX name1 expr TYPE type1(args) GRANULARITY value 661 this = self._parse_id_var() 662 expression = self._parse_assignment() 663 664 index_type = self._match_text_seq("TYPE") and ( 665 self._parse_function() or self._parse_var() 666 ) 667 668 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 669 670 return self.expression( 671 exp.IndexColumnConstraint, 672 this=this, 673 expression=expression, 674 index_type=index_type, 675 granularity=granularity, 676 ) 677 678 def _parse_partition(self) -> t.Optional[exp.Partition]: 679 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 680 if not self._match(TokenType.PARTITION): 681 return None 682 683 if self._match_text_seq("ID"): 684 # Corresponds to the PARTITION ID <string_value> syntax 685 expressions: t.List[exp.Expression] = [ 686 self.expression(exp.PartitionId, this=self._parse_string()) 687 ] 688 else: 689 expressions = self._parse_expressions() 690 691 return self.expression(exp.Partition, expressions=expressions) 692 693 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 694 partition = self._parse_partition() 695 696 if not partition or not self._match(TokenType.FROM): 697 return None 698 699 return self.expression( 700 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 701 ) 702 703 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 704 if not self._match_text_seq("PROJECTION"): 705 return None 706 707 return self.expression( 708 exp.ProjectionDef, 709 this=self._parse_id_var(), 710 expression=self._parse_wrapped(self._parse_statement), 711 ) 712 713 def _parse_constraint(self) -> t.Optional[exp.Expression]: 714 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
716 class Generator(generator.Generator): 717 QUERY_HINTS = False 718 STRUCT_DELIMITER = ("(", ")") 719 NVL2_SUPPORTED = False 720 TABLESAMPLE_REQUIRES_PARENS = False 721 TABLESAMPLE_SIZE_IS_ROWS = False 722 TABLESAMPLE_KEYWORDS = "SAMPLE" 723 LAST_DAY_SUPPORTS_DATE_PART = False 724 CAN_IMPLEMENT_ARRAY_ANY = True 725 SUPPORTS_TO_NUMBER = False 726 JOIN_HINTS = False 727 TABLE_HINTS = False 728 EXPLICIT_SET_OP = True 729 GROUPINGS_SEP = "" 730 SET_OP_MODIFIERS = False 731 SUPPORTS_TABLE_ALIAS_COLUMNS = False 732 VALUES_AS_TABLE = False 733 734 STRING_TYPE_MAPPING = { 735 exp.DataType.Type.CHAR: "String", 736 exp.DataType.Type.LONGBLOB: "String", 737 exp.DataType.Type.LONGTEXT: "String", 738 exp.DataType.Type.MEDIUMBLOB: "String", 739 exp.DataType.Type.MEDIUMTEXT: "String", 740 exp.DataType.Type.TINYBLOB: "String", 741 exp.DataType.Type.TINYTEXT: "String", 742 exp.DataType.Type.TEXT: "String", 743 exp.DataType.Type.VARBINARY: "String", 744 exp.DataType.Type.VARCHAR: "String", 745 } 746 747 SUPPORTED_JSON_PATH_PARTS = { 748 exp.JSONPathKey, 749 exp.JSONPathRoot, 750 exp.JSONPathSubscript, 751 } 752 753 TYPE_MAPPING = { 754 **generator.Generator.TYPE_MAPPING, 755 **STRING_TYPE_MAPPING, 756 exp.DataType.Type.ARRAY: "Array", 757 exp.DataType.Type.BIGINT: "Int64", 758 exp.DataType.Type.DATE32: "Date32", 759 exp.DataType.Type.DATETIME64: "DateTime64", 760 exp.DataType.Type.DOUBLE: "Float64", 761 exp.DataType.Type.ENUM: "Enum", 762 exp.DataType.Type.ENUM8: "Enum8", 763 exp.DataType.Type.ENUM16: "Enum16", 764 exp.DataType.Type.FIXEDSTRING: "FixedString", 765 exp.DataType.Type.FLOAT: "Float32", 766 exp.DataType.Type.INT: "Int32", 767 exp.DataType.Type.MEDIUMINT: "Int32", 768 exp.DataType.Type.INT128: "Int128", 769 exp.DataType.Type.INT256: "Int256", 770 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 771 exp.DataType.Type.MAP: "Map", 772 exp.DataType.Type.NESTED: "Nested", 773 exp.DataType.Type.NULLABLE: "Nullable", 774 exp.DataType.Type.SMALLINT: "Int16", 775 exp.DataType.Type.STRUCT: "Tuple", 776 exp.DataType.Type.TINYINT: "Int8", 777 exp.DataType.Type.UBIGINT: "UInt64", 778 exp.DataType.Type.UINT: "UInt32", 779 exp.DataType.Type.UINT128: "UInt128", 780 exp.DataType.Type.UINT256: "UInt256", 781 exp.DataType.Type.USMALLINT: "UInt16", 782 exp.DataType.Type.UTINYINT: "UInt8", 783 exp.DataType.Type.IPV4: "IPv4", 784 exp.DataType.Type.IPV6: "IPv6", 785 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 786 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 787 } 788 789 TRANSFORMS = { 790 **generator.Generator.TRANSFORMS, 791 exp.AnyValue: rename_func("any"), 792 exp.ApproxDistinct: rename_func("uniq"), 793 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 794 exp.ArraySize: rename_func("LENGTH"), 795 exp.ArraySum: rename_func("arraySum"), 796 exp.ArgMax: arg_max_or_min_no_count("argMax"), 797 exp.ArgMin: arg_max_or_min_no_count("argMin"), 798 exp.Array: inline_array_sql, 799 exp.CastToStrType: rename_func("CAST"), 800 exp.CountIf: rename_func("countIf"), 801 exp.CompressColumnConstraint: lambda self, 802 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 803 exp.ComputedColumnConstraint: lambda self, 804 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 805 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 806 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 807 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 808 exp.DateStrToDate: rename_func("toDate"), 809 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 810 exp.Explode: rename_func("arrayJoin"), 811 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 812 exp.IsNan: rename_func("isNaN"), 813 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 814 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 815 exp.JSONPathKey: json_path_key_only_name, 816 exp.JSONPathRoot: lambda *_: "", 817 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 818 exp.Nullif: rename_func("nullIf"), 819 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 820 exp.Pivot: no_pivot_sql, 821 exp.Quantile: _quantile_sql, 822 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 823 exp.Rand: rename_func("randCanonical"), 824 exp.StartsWith: rename_func("startsWith"), 825 exp.StrPosition: lambda self, e: self.func( 826 "position", e.this, e.args.get("substr"), e.args.get("position") 827 ), 828 exp.TimeToStr: lambda self, e: self.func( 829 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 830 ), 831 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 832 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 833 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 834 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 835 exp.MD5Digest: rename_func("MD5"), 836 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 837 exp.SHA: rename_func("SHA1"), 838 exp.SHA2: sha256_sql, 839 exp.UnixToTime: _unix_to_time_sql, 840 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 841 exp.Variance: rename_func("varSamp"), 842 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 843 exp.Stddev: rename_func("stddevSamp"), 844 } 845 846 PROPERTIES_LOCATION = { 847 **generator.Generator.PROPERTIES_LOCATION, 848 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 849 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 850 exp.OnCluster: exp.Properties.Location.POST_NAME, 851 } 852 853 # there's no list in docs, but it can be found in Clickhouse code 854 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 855 ON_CLUSTER_TARGETS = { 856 "DATABASE", 857 "TABLE", 858 "VIEW", 859 "DICTIONARY", 860 "INDEX", 861 "FUNCTION", 862 "NAMED COLLECTION", 863 } 864 865 # https://github.com/ClickHouse/ClickHouse/blob/275de04b8f6bb8c9334bf8070001afe2dab0b17d/src/Functions/FunctionsConversion.cpp#L2939-L2989 866 TRY_CAST_TYPES = { 867 "DATE", 868 "DATE32", 869 "DATETIME", 870 "DATETIME64", 871 "DECIMAL32", 872 "DECIMAL64", 873 "DECIMAL128", 874 "DECIMAL256", 875 "FLOAT32", 876 "FLOAT64", 877 "INT8", 878 "INT16", 879 "INT32", 880 "INT64", 881 "INT128", 882 "INT256", 883 "IPV4", 884 "IPV6", 885 "UINT8", 886 "UINT16", 887 "UINT32", 888 "UINT64", 889 "UINT128", 890 "UINT256", 891 "UUID", 892 } 893 894 def strtodate_sql(self, expression: exp.StrToDate) -> str: 895 strtodate_sql = self.function_fallback_sql(expression) 896 897 if not isinstance(expression.parent, exp.Cast): 898 # StrToDate returns DATEs in other dialects (eg. postgres), so 899 # this branch aims to improve the transpilation to clickhouse 900 return f"CAST({strtodate_sql} AS DATE)" 901 902 return strtodate_sql 903 904 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 905 this = expression.this 906 907 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 908 return self.sql(this) 909 910 return super().cast_sql(expression, safe_prefix=safe_prefix) 911 912 def trycast_sql(self, expression: exp.TryCast) -> str: 913 target_type = self.sql(expression.to) 914 if target_type.upper() in self.TRY_CAST_TYPES: 915 return self.func(f"to{target_type}OrNull", expression.this) 916 917 self.unsupported(f"There is no `to<Type>OrNull` for type {target_type}.") 918 return super().cast_sql(expression) 919 920 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 921 this = self.json_path_part(expression.this) 922 return str(int(this) + 1) if is_int(this) else this 923 924 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 925 return f"AS {self.sql(expression, 'this')}" 926 927 def _any_to_has( 928 self, 929 expression: exp.EQ | exp.NEQ, 930 default: t.Callable[[t.Any], str], 931 prefix: str = "", 932 ) -> str: 933 if isinstance(expression.left, exp.Any): 934 arr = expression.left 935 this = expression.right 936 elif isinstance(expression.right, exp.Any): 937 arr = expression.right 938 this = expression.left 939 else: 940 return default(expression) 941 942 return prefix + self.func("has", arr.this.unnest(), this) 943 944 def eq_sql(self, expression: exp.EQ) -> str: 945 return self._any_to_has(expression, super().eq_sql) 946 947 def neq_sql(self, expression: exp.NEQ) -> str: 948 return self._any_to_has(expression, super().neq_sql, "NOT ") 949 950 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 951 # Manually add a flag to make the search case-insensitive 952 regex = self.func("CONCAT", "'(?i)'", expression.expression) 953 return self.func("match", expression.this, regex) 954 955 def datatype_sql(self, expression: exp.DataType) -> str: 956 # String is the standard ClickHouse type, every other variant is just an alias. 957 # Additionally, any supplied length parameter will be ignored. 958 # 959 # https://clickhouse.com/docs/en/sql-reference/data-types/string 960 if expression.this in self.STRING_TYPE_MAPPING: 961 return "String" 962 963 return super().datatype_sql(expression) 964 965 def cte_sql(self, expression: exp.CTE) -> str: 966 if expression.args.get("scalar"): 967 this = self.sql(expression, "this") 968 alias = self.sql(expression, "alias") 969 return f"{this} AS {alias}" 970 971 return super().cte_sql(expression) 972 973 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 974 return super().after_limit_modifiers(expression) + [ 975 ( 976 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 977 if expression.args.get("settings") 978 else "" 979 ), 980 ( 981 self.seg("FORMAT ") + self.sql(expression, "format") 982 if expression.args.get("format") 983 else "" 984 ), 985 ] 986 987 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 988 params = self.expressions(expression, key="params", flat=True) 989 return self.func(expression.name, *expression.expressions) + f"({params})" 990 991 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 992 return self.func(expression.name, *expression.expressions) 993 994 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 995 return self.anonymousaggfunc_sql(expression) 996 997 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 998 return self.parameterizedagg_sql(expression) 999 1000 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1001 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1002 1003 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1004 return f"ON CLUSTER {self.sql(expression, 'this')}" 1005 1006 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1007 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1008 exp.Properties.Location.POST_NAME 1009 ): 1010 this_name = self.sql( 1011 expression.this if isinstance(expression.this, exp.Schema) else expression, 1012 "this", 1013 ) 1014 this_properties = " ".join( 1015 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1016 ) 1017 this_schema = self.schema_columns_sql(expression.this) 1018 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1019 1020 return super().createable_sql(expression, locations) 1021 1022 def create_sql(self, expression: exp.Create) -> str: 1023 # The comment property comes last in CTAS statements, i.e. after the query 1024 query = expression.expression 1025 if isinstance(query, exp.Query): 1026 comment_prop = expression.find(exp.SchemaCommentProperty) 1027 if comment_prop: 1028 comment_prop.pop() 1029 query.replace(exp.paren(query)) 1030 else: 1031 comment_prop = None 1032 1033 # ClickHouse only has DATABASEs and objects under them, eg. TABLEs, VIEWs, etc 1034 if expression.kind == "SCHEMA": 1035 expression.set("kind", "DATABASE") 1036 1037 create_sql = super().create_sql(expression) 1038 1039 comment_sql = self.sql(comment_prop) 1040 comment_sql = f" {comment_sql}" if comment_sql else "" 1041 1042 return f"{create_sql}{comment_sql}" 1043 1044 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1045 this = self.indent(self.sql(expression, "this")) 1046 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1047 1048 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1049 this = self.sql(expression, "this") 1050 this = f" {this}" if this else "" 1051 expr = self.sql(expression, "expression") 1052 expr = f" {expr}" if expr else "" 1053 index_type = self.sql(expression, "index_type") 1054 index_type = f" TYPE {index_type}" if index_type else "" 1055 granularity = self.sql(expression, "granularity") 1056 granularity = f" GRANULARITY {granularity}" if granularity else "" 1057 1058 return f"INDEX{this}{expr}{index_type}{granularity}" 1059 1060 def partition_sql(self, expression: exp.Partition) -> str: 1061 return f"PARTITION {self.expressions(expression, flat=True)}" 1062 1063 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1064 return f"ID {self.sql(expression.this)}" 1065 1066 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1067 return ( 1068 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1069 ) 1070 1071 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1072 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
894 def strtodate_sql(self, expression: exp.StrToDate) -> str: 895 strtodate_sql = self.function_fallback_sql(expression) 896 897 if not isinstance(expression.parent, exp.Cast): 898 # StrToDate returns DATEs in other dialects (eg. postgres), so 899 # this branch aims to improve the transpilation to clickhouse 900 return f"CAST({strtodate_sql} AS DATE)" 901 902 return strtodate_sql
904 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 905 this = expression.this 906 907 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 908 return self.sql(this) 909 910 return super().cast_sql(expression, safe_prefix=safe_prefix)
912 def trycast_sql(self, expression: exp.TryCast) -> str: 913 target_type = self.sql(expression.to) 914 if target_type.upper() in self.TRY_CAST_TYPES: 915 return self.func(f"to{target_type}OrNull", expression.this) 916 917 self.unsupported(f"There is no `to<Type>OrNull` for type {target_type}.") 918 return super().cast_sql(expression)
955 def datatype_sql(self, expression: exp.DataType) -> str: 956 # String is the standard ClickHouse type, every other variant is just an alias. 957 # Additionally, any supplied length parameter will be ignored. 958 # 959 # https://clickhouse.com/docs/en/sql-reference/data-types/string 960 if expression.this in self.STRING_TYPE_MAPPING: 961 return "String" 962 963 return super().datatype_sql(expression)
973 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 974 return super().after_limit_modifiers(expression) + [ 975 ( 976 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 977 if expression.args.get("settings") 978 else "" 979 ), 980 ( 981 self.seg("FORMAT ") + self.sql(expression, "format") 982 if expression.args.get("format") 983 else "" 984 ), 985 ]
1006 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1007 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1008 exp.Properties.Location.POST_NAME 1009 ): 1010 this_name = self.sql( 1011 expression.this if isinstance(expression.this, exp.Schema) else expression, 1012 "this", 1013 ) 1014 this_properties = " ".join( 1015 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1016 ) 1017 this_schema = self.schema_columns_sql(expression.this) 1018 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 1019 1020 return super().createable_sql(expression, locations)
1022 def create_sql(self, expression: exp.Create) -> str: 1023 # The comment property comes last in CTAS statements, i.e. after the query 1024 query = expression.expression 1025 if isinstance(query, exp.Query): 1026 comment_prop = expression.find(exp.SchemaCommentProperty) 1027 if comment_prop: 1028 comment_prop.pop() 1029 query.replace(exp.paren(query)) 1030 else: 1031 comment_prop = None 1032 1033 # ClickHouse only has DATABASEs and objects under them, eg. TABLEs, VIEWs, etc 1034 if expression.kind == "SCHEMA": 1035 expression.set("kind", "DATABASE") 1036 1037 create_sql = super().create_sql(expression) 1038 1039 comment_sql = self.sql(comment_prop) 1040 comment_sql = f" {comment_sql}" if comment_sql else "" 1041 1042 return f"{create_sql}{comment_sql}"
1048 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1049 this = self.sql(expression, "this") 1050 this = f" {this}" if this else "" 1051 expr = self.sql(expression, "expression") 1052 expr = f" {expr}" if expr else "" 1053 index_type = self.sql(expression, "index_type") 1054 index_type = f" TYPE {index_type}" if index_type else "" 1055 granularity = self.sql(expression, "granularity") 1056 granularity = f" GRANULARITY {granularity}" if granularity else "" 1057 1058 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql