sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143class _Parser(type): 144 def __new__(cls, clsname, bases, attrs): 145 klass = super().__new__(cls, clsname, bases, attrs) 146 147 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 148 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 149 150 return klass 151 152 153class Parser(metaclass=_Parser): 154 """ 155 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 156 157 Args: 158 error_level: The desired error level. 159 Default: ErrorLevel.IMMEDIATE 160 error_message_context: The amount of context to capture from a query string when displaying 161 the error message (in number of characters). 162 Default: 100 163 max_errors: Maximum number of error messages to include in a raised ParseError. 164 This is only relevant if error_level is ErrorLevel.RAISE. 165 Default: 3 166 """ 167 168 FUNCTIONS: t.Dict[str, t.Callable] = { 169 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 170 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 171 "CONCAT": lambda args, dialect: exp.Concat( 172 expressions=args, 173 safe=not dialect.STRICT_STRING_CONCAT, 174 coalesce=dialect.CONCAT_COALESCE, 175 ), 176 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 177 expressions=args, 178 safe=not dialect.STRICT_STRING_CONCAT, 179 coalesce=dialect.CONCAT_COALESCE, 180 ), 181 "CONVERT_TIMEZONE": build_convert_timezone, 182 "DATE_TO_DATE_STR": lambda args: exp.Cast( 183 this=seq_get(args, 0), 184 to=exp.DataType(this=exp.DataType.Type.TEXT), 185 ), 186 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 187 start=seq_get(args, 0), 188 end=seq_get(args, 1), 189 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 190 ), 191 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 192 "HEX": build_hex, 193 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 194 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 195 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 196 "LIKE": build_like, 197 "LOG": build_logarithm, 198 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 199 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 200 "LOWER": build_lower, 201 "LPAD": lambda args: build_pad(args), 202 "LEFTPAD": lambda args: build_pad(args), 203 "MOD": build_mod, 204 "RPAD": lambda args: build_pad(args, is_left=False), 205 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 206 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 207 if len(args) != 2 208 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 209 "TIME_TO_TIME_STR": lambda args: exp.Cast( 210 this=seq_get(args, 0), 211 to=exp.DataType(this=exp.DataType.Type.TEXT), 212 ), 213 "TO_HEX": build_hex, 214 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 215 this=exp.Cast( 216 this=seq_get(args, 0), 217 to=exp.DataType(this=exp.DataType.Type.TEXT), 218 ), 219 start=exp.Literal.number(1), 220 length=exp.Literal.number(10), 221 ), 222 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 223 "UPPER": build_upper, 224 "VAR_MAP": build_var_map, 225 } 226 227 NO_PAREN_FUNCTIONS = { 228 TokenType.CURRENT_DATE: exp.CurrentDate, 229 TokenType.CURRENT_DATETIME: exp.CurrentDate, 230 TokenType.CURRENT_TIME: exp.CurrentTime, 231 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 232 TokenType.CURRENT_USER: exp.CurrentUser, 233 } 234 235 STRUCT_TYPE_TOKENS = { 236 TokenType.NESTED, 237 TokenType.OBJECT, 238 TokenType.STRUCT, 239 } 240 241 NESTED_TYPE_TOKENS = { 242 TokenType.ARRAY, 243 TokenType.LIST, 244 TokenType.LOWCARDINALITY, 245 TokenType.MAP, 246 TokenType.NULLABLE, 247 *STRUCT_TYPE_TOKENS, 248 } 249 250 ENUM_TYPE_TOKENS = { 251 TokenType.ENUM, 252 TokenType.ENUM8, 253 TokenType.ENUM16, 254 } 255 256 AGGREGATE_TYPE_TOKENS = { 257 TokenType.AGGREGATEFUNCTION, 258 TokenType.SIMPLEAGGREGATEFUNCTION, 259 } 260 261 TYPE_TOKENS = { 262 TokenType.BIT, 263 TokenType.BOOLEAN, 264 TokenType.TINYINT, 265 TokenType.UTINYINT, 266 TokenType.SMALLINT, 267 TokenType.USMALLINT, 268 TokenType.INT, 269 TokenType.UINT, 270 TokenType.BIGINT, 271 TokenType.UBIGINT, 272 TokenType.INT128, 273 TokenType.UINT128, 274 TokenType.INT256, 275 TokenType.UINT256, 276 TokenType.MEDIUMINT, 277 TokenType.UMEDIUMINT, 278 TokenType.FIXEDSTRING, 279 TokenType.FLOAT, 280 TokenType.DOUBLE, 281 TokenType.CHAR, 282 TokenType.NCHAR, 283 TokenType.VARCHAR, 284 TokenType.NVARCHAR, 285 TokenType.BPCHAR, 286 TokenType.TEXT, 287 TokenType.MEDIUMTEXT, 288 TokenType.LONGTEXT, 289 TokenType.MEDIUMBLOB, 290 TokenType.LONGBLOB, 291 TokenType.BINARY, 292 TokenType.VARBINARY, 293 TokenType.JSON, 294 TokenType.JSONB, 295 TokenType.INTERVAL, 296 TokenType.TINYBLOB, 297 TokenType.TINYTEXT, 298 TokenType.TIME, 299 TokenType.TIMETZ, 300 TokenType.TIMESTAMP, 301 TokenType.TIMESTAMP_S, 302 TokenType.TIMESTAMP_MS, 303 TokenType.TIMESTAMP_NS, 304 TokenType.TIMESTAMPTZ, 305 TokenType.TIMESTAMPLTZ, 306 TokenType.TIMESTAMPNTZ, 307 TokenType.DATETIME, 308 TokenType.DATETIME64, 309 TokenType.DATE, 310 TokenType.DATE32, 311 TokenType.INT4RANGE, 312 TokenType.INT4MULTIRANGE, 313 TokenType.INT8RANGE, 314 TokenType.INT8MULTIRANGE, 315 TokenType.NUMRANGE, 316 TokenType.NUMMULTIRANGE, 317 TokenType.TSRANGE, 318 TokenType.TSMULTIRANGE, 319 TokenType.TSTZRANGE, 320 TokenType.TSTZMULTIRANGE, 321 TokenType.DATERANGE, 322 TokenType.DATEMULTIRANGE, 323 TokenType.DECIMAL, 324 TokenType.UDECIMAL, 325 TokenType.BIGDECIMAL, 326 TokenType.UUID, 327 TokenType.GEOGRAPHY, 328 TokenType.GEOMETRY, 329 TokenType.HLLSKETCH, 330 TokenType.HSTORE, 331 TokenType.PSEUDO_TYPE, 332 TokenType.SUPER, 333 TokenType.SERIAL, 334 TokenType.SMALLSERIAL, 335 TokenType.BIGSERIAL, 336 TokenType.XML, 337 TokenType.YEAR, 338 TokenType.UNIQUEIDENTIFIER, 339 TokenType.USERDEFINED, 340 TokenType.MONEY, 341 TokenType.SMALLMONEY, 342 TokenType.ROWVERSION, 343 TokenType.IMAGE, 344 TokenType.VARIANT, 345 TokenType.VECTOR, 346 TokenType.OBJECT, 347 TokenType.OBJECT_IDENTIFIER, 348 TokenType.INET, 349 TokenType.IPADDRESS, 350 TokenType.IPPREFIX, 351 TokenType.IPV4, 352 TokenType.IPV6, 353 TokenType.UNKNOWN, 354 TokenType.NULL, 355 TokenType.NAME, 356 TokenType.TDIGEST, 357 *ENUM_TYPE_TOKENS, 358 *NESTED_TYPE_TOKENS, 359 *AGGREGATE_TYPE_TOKENS, 360 } 361 362 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 363 TokenType.BIGINT: TokenType.UBIGINT, 364 TokenType.INT: TokenType.UINT, 365 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 366 TokenType.SMALLINT: TokenType.USMALLINT, 367 TokenType.TINYINT: TokenType.UTINYINT, 368 TokenType.DECIMAL: TokenType.UDECIMAL, 369 } 370 371 SUBQUERY_PREDICATES = { 372 TokenType.ANY: exp.Any, 373 TokenType.ALL: exp.All, 374 TokenType.EXISTS: exp.Exists, 375 TokenType.SOME: exp.Any, 376 } 377 378 RESERVED_TOKENS = { 379 *Tokenizer.SINGLE_TOKENS.values(), 380 TokenType.SELECT, 381 } - {TokenType.IDENTIFIER} 382 383 DB_CREATABLES = { 384 TokenType.DATABASE, 385 TokenType.DICTIONARY, 386 TokenType.MODEL, 387 TokenType.SCHEMA, 388 TokenType.SEQUENCE, 389 TokenType.STORAGE_INTEGRATION, 390 TokenType.TABLE, 391 TokenType.TAG, 392 TokenType.VIEW, 393 TokenType.WAREHOUSE, 394 TokenType.STREAMLIT, 395 } 396 397 CREATABLES = { 398 TokenType.COLUMN, 399 TokenType.CONSTRAINT, 400 TokenType.FOREIGN_KEY, 401 TokenType.FUNCTION, 402 TokenType.INDEX, 403 TokenType.PROCEDURE, 404 *DB_CREATABLES, 405 } 406 407 ALTERABLES = { 408 TokenType.TABLE, 409 TokenType.VIEW, 410 } 411 412 # Tokens that can represent identifiers 413 ID_VAR_TOKENS = { 414 TokenType.ALL, 415 TokenType.VAR, 416 TokenType.ANTI, 417 TokenType.APPLY, 418 TokenType.ASC, 419 TokenType.ASOF, 420 TokenType.AUTO_INCREMENT, 421 TokenType.BEGIN, 422 TokenType.BPCHAR, 423 TokenType.CACHE, 424 TokenType.CASE, 425 TokenType.COLLATE, 426 TokenType.COMMAND, 427 TokenType.COMMENT, 428 TokenType.COMMIT, 429 TokenType.CONSTRAINT, 430 TokenType.COPY, 431 TokenType.CUBE, 432 TokenType.DEFAULT, 433 TokenType.DELETE, 434 TokenType.DESC, 435 TokenType.DESCRIBE, 436 TokenType.DICTIONARY, 437 TokenType.DIV, 438 TokenType.END, 439 TokenType.EXECUTE, 440 TokenType.ESCAPE, 441 TokenType.FALSE, 442 TokenType.FIRST, 443 TokenType.FILTER, 444 TokenType.FINAL, 445 TokenType.FORMAT, 446 TokenType.FULL, 447 TokenType.IDENTIFIER, 448 TokenType.IS, 449 TokenType.ISNULL, 450 TokenType.INTERVAL, 451 TokenType.KEEP, 452 TokenType.KILL, 453 TokenType.LEFT, 454 TokenType.LOAD, 455 TokenType.MERGE, 456 TokenType.NATURAL, 457 TokenType.NEXT, 458 TokenType.OFFSET, 459 TokenType.OPERATOR, 460 TokenType.ORDINALITY, 461 TokenType.OVERLAPS, 462 TokenType.OVERWRITE, 463 TokenType.PARTITION, 464 TokenType.PERCENT, 465 TokenType.PIVOT, 466 TokenType.PRAGMA, 467 TokenType.RANGE, 468 TokenType.RECURSIVE, 469 TokenType.REFERENCES, 470 TokenType.REFRESH, 471 TokenType.RENAME, 472 TokenType.REPLACE, 473 TokenType.RIGHT, 474 TokenType.ROLLUP, 475 TokenType.ROW, 476 TokenType.ROWS, 477 TokenType.SEMI, 478 TokenType.SET, 479 TokenType.SETTINGS, 480 TokenType.SHOW, 481 TokenType.TEMPORARY, 482 TokenType.TOP, 483 TokenType.TRUE, 484 TokenType.TRUNCATE, 485 TokenType.UNIQUE, 486 TokenType.UNNEST, 487 TokenType.UNPIVOT, 488 TokenType.UPDATE, 489 TokenType.USE, 490 TokenType.VOLATILE, 491 TokenType.WINDOW, 492 *CREATABLES, 493 *SUBQUERY_PREDICATES, 494 *TYPE_TOKENS, 495 *NO_PAREN_FUNCTIONS, 496 } 497 498 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 499 500 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 501 TokenType.ANTI, 502 TokenType.APPLY, 503 TokenType.ASOF, 504 TokenType.FULL, 505 TokenType.LEFT, 506 TokenType.LOCK, 507 TokenType.NATURAL, 508 TokenType.OFFSET, 509 TokenType.RIGHT, 510 TokenType.SEMI, 511 TokenType.WINDOW, 512 } 513 514 ALIAS_TOKENS = ID_VAR_TOKENS 515 516 ARRAY_CONSTRUCTORS = { 517 "ARRAY": exp.Array, 518 "LIST": exp.List, 519 } 520 521 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 522 523 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 524 525 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 526 527 FUNC_TOKENS = { 528 TokenType.COLLATE, 529 TokenType.COMMAND, 530 TokenType.CURRENT_DATE, 531 TokenType.CURRENT_DATETIME, 532 TokenType.CURRENT_TIMESTAMP, 533 TokenType.CURRENT_TIME, 534 TokenType.CURRENT_USER, 535 TokenType.FILTER, 536 TokenType.FIRST, 537 TokenType.FORMAT, 538 TokenType.GLOB, 539 TokenType.IDENTIFIER, 540 TokenType.INDEX, 541 TokenType.ISNULL, 542 TokenType.ILIKE, 543 TokenType.INSERT, 544 TokenType.LIKE, 545 TokenType.MERGE, 546 TokenType.OFFSET, 547 TokenType.PRIMARY_KEY, 548 TokenType.RANGE, 549 TokenType.REPLACE, 550 TokenType.RLIKE, 551 TokenType.ROW, 552 TokenType.UNNEST, 553 TokenType.VAR, 554 TokenType.LEFT, 555 TokenType.RIGHT, 556 TokenType.SEQUENCE, 557 TokenType.DATE, 558 TokenType.DATETIME, 559 TokenType.TABLE, 560 TokenType.TIMESTAMP, 561 TokenType.TIMESTAMPTZ, 562 TokenType.TRUNCATE, 563 TokenType.WINDOW, 564 TokenType.XOR, 565 *TYPE_TOKENS, 566 *SUBQUERY_PREDICATES, 567 } 568 569 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 570 TokenType.AND: exp.And, 571 } 572 573 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 574 TokenType.COLON_EQ: exp.PropertyEQ, 575 } 576 577 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 578 TokenType.OR: exp.Or, 579 } 580 581 EQUALITY = { 582 TokenType.EQ: exp.EQ, 583 TokenType.NEQ: exp.NEQ, 584 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 585 } 586 587 COMPARISON = { 588 TokenType.GT: exp.GT, 589 TokenType.GTE: exp.GTE, 590 TokenType.LT: exp.LT, 591 TokenType.LTE: exp.LTE, 592 } 593 594 BITWISE = { 595 TokenType.AMP: exp.BitwiseAnd, 596 TokenType.CARET: exp.BitwiseXor, 597 TokenType.PIPE: exp.BitwiseOr, 598 } 599 600 TERM = { 601 TokenType.DASH: exp.Sub, 602 TokenType.PLUS: exp.Add, 603 TokenType.MOD: exp.Mod, 604 TokenType.COLLATE: exp.Collate, 605 } 606 607 FACTOR = { 608 TokenType.DIV: exp.IntDiv, 609 TokenType.LR_ARROW: exp.Distance, 610 TokenType.SLASH: exp.Div, 611 TokenType.STAR: exp.Mul, 612 } 613 614 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 615 616 TIMES = { 617 TokenType.TIME, 618 TokenType.TIMETZ, 619 } 620 621 TIMESTAMPS = { 622 TokenType.TIMESTAMP, 623 TokenType.TIMESTAMPTZ, 624 TokenType.TIMESTAMPLTZ, 625 *TIMES, 626 } 627 628 SET_OPERATIONS = { 629 TokenType.UNION, 630 TokenType.INTERSECT, 631 TokenType.EXCEPT, 632 } 633 634 JOIN_METHODS = { 635 TokenType.ASOF, 636 TokenType.NATURAL, 637 TokenType.POSITIONAL, 638 } 639 640 JOIN_SIDES = { 641 TokenType.LEFT, 642 TokenType.RIGHT, 643 TokenType.FULL, 644 } 645 646 JOIN_KINDS = { 647 TokenType.ANTI, 648 TokenType.CROSS, 649 TokenType.INNER, 650 TokenType.OUTER, 651 TokenType.SEMI, 652 TokenType.STRAIGHT_JOIN, 653 } 654 655 JOIN_HINTS: t.Set[str] = set() 656 657 LAMBDAS = { 658 TokenType.ARROW: lambda self, expressions: self.expression( 659 exp.Lambda, 660 this=self._replace_lambda( 661 self._parse_assignment(), 662 expressions, 663 ), 664 expressions=expressions, 665 ), 666 TokenType.FARROW: lambda self, expressions: self.expression( 667 exp.Kwarg, 668 this=exp.var(expressions[0].name), 669 expression=self._parse_assignment(), 670 ), 671 } 672 673 COLUMN_OPERATORS = { 674 TokenType.DOT: None, 675 TokenType.DCOLON: lambda self, this, to: self.expression( 676 exp.Cast if self.STRICT_CAST else exp.TryCast, 677 this=this, 678 to=to, 679 ), 680 TokenType.ARROW: lambda self, this, path: self.expression( 681 exp.JSONExtract, 682 this=this, 683 expression=self.dialect.to_json_path(path), 684 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 685 ), 686 TokenType.DARROW: lambda self, this, path: self.expression( 687 exp.JSONExtractScalar, 688 this=this, 689 expression=self.dialect.to_json_path(path), 690 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 691 ), 692 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 693 exp.JSONBExtract, 694 this=this, 695 expression=path, 696 ), 697 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 698 exp.JSONBExtractScalar, 699 this=this, 700 expression=path, 701 ), 702 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 703 exp.JSONBContains, 704 this=this, 705 expression=key, 706 ), 707 } 708 709 EXPRESSION_PARSERS = { 710 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 711 exp.Column: lambda self: self._parse_column(), 712 exp.Condition: lambda self: self._parse_assignment(), 713 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 714 exp.Expression: lambda self: self._parse_expression(), 715 exp.From: lambda self: self._parse_from(joins=True), 716 exp.Group: lambda self: self._parse_group(), 717 exp.Having: lambda self: self._parse_having(), 718 exp.Identifier: lambda self: self._parse_id_var(), 719 exp.Join: lambda self: self._parse_join(), 720 exp.Lambda: lambda self: self._parse_lambda(), 721 exp.Lateral: lambda self: self._parse_lateral(), 722 exp.Limit: lambda self: self._parse_limit(), 723 exp.Offset: lambda self: self._parse_offset(), 724 exp.Order: lambda self: self._parse_order(), 725 exp.Ordered: lambda self: self._parse_ordered(), 726 exp.Properties: lambda self: self._parse_properties(), 727 exp.Qualify: lambda self: self._parse_qualify(), 728 exp.Returning: lambda self: self._parse_returning(), 729 exp.Select: lambda self: self._parse_select(), 730 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 731 exp.Table: lambda self: self._parse_table_parts(), 732 exp.TableAlias: lambda self: self._parse_table_alias(), 733 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 734 exp.Where: lambda self: self._parse_where(), 735 exp.Window: lambda self: self._parse_named_window(), 736 exp.With: lambda self: self._parse_with(), 737 "JOIN_TYPE": lambda self: self._parse_join_parts(), 738 } 739 740 STATEMENT_PARSERS = { 741 TokenType.ALTER: lambda self: self._parse_alter(), 742 TokenType.BEGIN: lambda self: self._parse_transaction(), 743 TokenType.CACHE: lambda self: self._parse_cache(), 744 TokenType.COMMENT: lambda self: self._parse_comment(), 745 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 746 TokenType.COPY: lambda self: self._parse_copy(), 747 TokenType.CREATE: lambda self: self._parse_create(), 748 TokenType.DELETE: lambda self: self._parse_delete(), 749 TokenType.DESC: lambda self: self._parse_describe(), 750 TokenType.DESCRIBE: lambda self: self._parse_describe(), 751 TokenType.DROP: lambda self: self._parse_drop(), 752 TokenType.INSERT: lambda self: self._parse_insert(), 753 TokenType.KILL: lambda self: self._parse_kill(), 754 TokenType.LOAD: lambda self: self._parse_load(), 755 TokenType.MERGE: lambda self: self._parse_merge(), 756 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 757 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 758 TokenType.REFRESH: lambda self: self._parse_refresh(), 759 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 760 TokenType.SET: lambda self: self._parse_set(), 761 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 762 TokenType.UNCACHE: lambda self: self._parse_uncache(), 763 TokenType.UPDATE: lambda self: self._parse_update(), 764 TokenType.USE: lambda self: self.expression( 765 exp.Use, 766 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 767 this=self._parse_table(schema=False), 768 ), 769 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 770 } 771 772 UNARY_PARSERS = { 773 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 774 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 775 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 776 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 777 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 778 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 779 } 780 781 STRING_PARSERS = { 782 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 783 exp.RawString, this=token.text 784 ), 785 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 786 exp.National, this=token.text 787 ), 788 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 789 TokenType.STRING: lambda self, token: self.expression( 790 exp.Literal, this=token.text, is_string=True 791 ), 792 TokenType.UNICODE_STRING: lambda self, token: self.expression( 793 exp.UnicodeString, 794 this=token.text, 795 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 796 ), 797 } 798 799 NUMERIC_PARSERS = { 800 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 801 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 802 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 803 TokenType.NUMBER: lambda self, token: self.expression( 804 exp.Literal, this=token.text, is_string=False 805 ), 806 } 807 808 PRIMARY_PARSERS = { 809 **STRING_PARSERS, 810 **NUMERIC_PARSERS, 811 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 812 TokenType.NULL: lambda self, _: self.expression(exp.Null), 813 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 814 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 815 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 816 TokenType.STAR: lambda self, _: self.expression( 817 exp.Star, 818 **{ 819 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 820 "replace": self._parse_star_op("REPLACE"), 821 "rename": self._parse_star_op("RENAME"), 822 }, 823 ), 824 } 825 826 PLACEHOLDER_PARSERS = { 827 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 828 TokenType.PARAMETER: lambda self: self._parse_parameter(), 829 TokenType.COLON: lambda self: ( 830 self.expression(exp.Placeholder, this=self._prev.text) 831 if self._match_set(self.ID_VAR_TOKENS) 832 else None 833 ), 834 } 835 836 RANGE_PARSERS = { 837 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 838 TokenType.GLOB: binary_range_parser(exp.Glob), 839 TokenType.ILIKE: binary_range_parser(exp.ILike), 840 TokenType.IN: lambda self, this: self._parse_in(this), 841 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 842 TokenType.IS: lambda self, this: self._parse_is(this), 843 TokenType.LIKE: binary_range_parser(exp.Like), 844 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 845 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 846 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 847 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 848 } 849 850 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 851 "ALLOWED_VALUES": lambda self: self.expression( 852 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 853 ), 854 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 855 "AUTO": lambda self: self._parse_auto_property(), 856 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 857 "BACKUP": lambda self: self.expression( 858 exp.BackupProperty, this=self._parse_var(any_token=True) 859 ), 860 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 861 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 862 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 863 "CHECKSUM": lambda self: self._parse_checksum(), 864 "CLUSTER BY": lambda self: self._parse_cluster(), 865 "CLUSTERED": lambda self: self._parse_clustered_by(), 866 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 867 exp.CollateProperty, **kwargs 868 ), 869 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 870 "CONTAINS": lambda self: self._parse_contains_property(), 871 "COPY": lambda self: self._parse_copy_property(), 872 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 873 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 874 "DEFINER": lambda self: self._parse_definer(), 875 "DETERMINISTIC": lambda self: self.expression( 876 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 877 ), 878 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 879 "DISTKEY": lambda self: self._parse_distkey(), 880 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 881 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 882 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 883 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 884 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 885 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 886 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 887 "FREESPACE": lambda self: self._parse_freespace(), 888 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 889 "HEAP": lambda self: self.expression(exp.HeapProperty), 890 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 891 "IMMUTABLE": lambda self: self.expression( 892 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 893 ), 894 "INHERITS": lambda self: self.expression( 895 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 896 ), 897 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 898 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 899 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 900 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 901 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 902 "LIKE": lambda self: self._parse_create_like(), 903 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 904 "LOCK": lambda self: self._parse_locking(), 905 "LOCKING": lambda self: self._parse_locking(), 906 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 907 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 908 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 909 "MODIFIES": lambda self: self._parse_modifies_property(), 910 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 911 "NO": lambda self: self._parse_no_property(), 912 "ON": lambda self: self._parse_on_property(), 913 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 914 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 915 "PARTITION": lambda self: self._parse_partitioned_of(), 916 "PARTITION BY": lambda self: self._parse_partitioned_by(), 917 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 918 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 919 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 920 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 921 "READS": lambda self: self._parse_reads_property(), 922 "REMOTE": lambda self: self._parse_remote_with_connection(), 923 "RETURNS": lambda self: self._parse_returns(), 924 "STRICT": lambda self: self.expression(exp.StrictProperty), 925 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 926 "ROW": lambda self: self._parse_row(), 927 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 928 "SAMPLE": lambda self: self.expression( 929 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 930 ), 931 "SECURE": lambda self: self.expression(exp.SecureProperty), 932 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 933 "SETTINGS": lambda self: self._parse_settings_property(), 934 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 935 "SORTKEY": lambda self: self._parse_sortkey(), 936 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 937 "STABLE": lambda self: self.expression( 938 exp.StabilityProperty, this=exp.Literal.string("STABLE") 939 ), 940 "STORED": lambda self: self._parse_stored(), 941 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 942 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 943 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 944 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 945 "TO": lambda self: self._parse_to_table(), 946 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 947 "TRANSFORM": lambda self: self.expression( 948 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 949 ), 950 "TTL": lambda self: self._parse_ttl(), 951 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 952 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 953 "VOLATILE": lambda self: self._parse_volatile_property(), 954 "WITH": lambda self: self._parse_with_property(), 955 } 956 957 CONSTRAINT_PARSERS = { 958 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 959 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 960 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 961 "CHARACTER SET": lambda self: self.expression( 962 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 963 ), 964 "CHECK": lambda self: self.expression( 965 exp.CheckColumnConstraint, 966 this=self._parse_wrapped(self._parse_assignment), 967 enforced=self._match_text_seq("ENFORCED"), 968 ), 969 "COLLATE": lambda self: self.expression( 970 exp.CollateColumnConstraint, 971 this=self._parse_identifier() or self._parse_column(), 972 ), 973 "COMMENT": lambda self: self.expression( 974 exp.CommentColumnConstraint, this=self._parse_string() 975 ), 976 "COMPRESS": lambda self: self._parse_compress(), 977 "CLUSTERED": lambda self: self.expression( 978 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 979 ), 980 "NONCLUSTERED": lambda self: self.expression( 981 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 982 ), 983 "DEFAULT": lambda self: self.expression( 984 exp.DefaultColumnConstraint, this=self._parse_bitwise() 985 ), 986 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 987 "EPHEMERAL": lambda self: self.expression( 988 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 989 ), 990 "EXCLUDE": lambda self: self.expression( 991 exp.ExcludeColumnConstraint, this=self._parse_index_params() 992 ), 993 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 994 "FORMAT": lambda self: self.expression( 995 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 996 ), 997 "GENERATED": lambda self: self._parse_generated_as_identity(), 998 "IDENTITY": lambda self: self._parse_auto_increment(), 999 "INLINE": lambda self: self._parse_inline(), 1000 "LIKE": lambda self: self._parse_create_like(), 1001 "NOT": lambda self: self._parse_not_constraint(), 1002 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1003 "ON": lambda self: ( 1004 self._match(TokenType.UPDATE) 1005 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1006 ) 1007 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1008 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1009 "PERIOD": lambda self: self._parse_period_for_system_time(), 1010 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1011 "REFERENCES": lambda self: self._parse_references(match=False), 1012 "TITLE": lambda self: self.expression( 1013 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1014 ), 1015 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1016 "UNIQUE": lambda self: self._parse_unique(), 1017 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1018 "WITH": lambda self: self.expression( 1019 exp.Properties, expressions=self._parse_wrapped_properties() 1020 ), 1021 } 1022 1023 ALTER_PARSERS = { 1024 "ADD": lambda self: self._parse_alter_table_add(), 1025 "ALTER": lambda self: self._parse_alter_table_alter(), 1026 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1027 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1028 "DROP": lambda self: self._parse_alter_table_drop(), 1029 "RENAME": lambda self: self._parse_alter_table_rename(), 1030 "SET": lambda self: self._parse_alter_table_set(), 1031 "AS": lambda self: self._parse_select(), 1032 } 1033 1034 ALTER_ALTER_PARSERS = { 1035 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1036 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1037 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1038 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1039 } 1040 1041 SCHEMA_UNNAMED_CONSTRAINTS = { 1042 "CHECK", 1043 "EXCLUDE", 1044 "FOREIGN KEY", 1045 "LIKE", 1046 "PERIOD", 1047 "PRIMARY KEY", 1048 "UNIQUE", 1049 } 1050 1051 NO_PAREN_FUNCTION_PARSERS = { 1052 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1053 "CASE": lambda self: self._parse_case(), 1054 "CONNECT_BY_ROOT": lambda self: self.expression( 1055 exp.ConnectByRoot, this=self._parse_column() 1056 ), 1057 "IF": lambda self: self._parse_if(), 1058 "NEXT": lambda self: self._parse_next_value_for(), 1059 } 1060 1061 INVALID_FUNC_NAME_TOKENS = { 1062 TokenType.IDENTIFIER, 1063 TokenType.STRING, 1064 } 1065 1066 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1067 1068 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1069 1070 FUNCTION_PARSERS = { 1071 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1072 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1073 "DECODE": lambda self: self._parse_decode(), 1074 "EXTRACT": lambda self: self._parse_extract(), 1075 "GAP_FILL": lambda self: self._parse_gap_fill(), 1076 "JSON_OBJECT": lambda self: self._parse_json_object(), 1077 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1078 "JSON_TABLE": lambda self: self._parse_json_table(), 1079 "MATCH": lambda self: self._parse_match_against(), 1080 "OPENJSON": lambda self: self._parse_open_json(), 1081 "POSITION": lambda self: self._parse_position(), 1082 "PREDICT": lambda self: self._parse_predict(), 1083 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1084 "STRING_AGG": lambda self: self._parse_string_agg(), 1085 "SUBSTRING": lambda self: self._parse_substring(), 1086 "TRIM": lambda self: self._parse_trim(), 1087 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1088 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1089 } 1090 1091 QUERY_MODIFIER_PARSERS = { 1092 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1093 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1094 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1095 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1096 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1097 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1098 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1099 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1100 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1101 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1102 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1103 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1104 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1105 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1106 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1107 TokenType.CLUSTER_BY: lambda self: ( 1108 "cluster", 1109 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1110 ), 1111 TokenType.DISTRIBUTE_BY: lambda self: ( 1112 "distribute", 1113 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1114 ), 1115 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1116 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1117 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1118 } 1119 1120 SET_PARSERS = { 1121 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1122 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1123 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1124 "TRANSACTION": lambda self: self._parse_set_transaction(), 1125 } 1126 1127 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1128 1129 TYPE_LITERAL_PARSERS = { 1130 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1131 } 1132 1133 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1134 1135 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1136 1137 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1138 1139 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1140 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1141 "ISOLATION": ( 1142 ("LEVEL", "REPEATABLE", "READ"), 1143 ("LEVEL", "READ", "COMMITTED"), 1144 ("LEVEL", "READ", "UNCOMITTED"), 1145 ("LEVEL", "SERIALIZABLE"), 1146 ), 1147 "READ": ("WRITE", "ONLY"), 1148 } 1149 1150 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1151 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1152 ) 1153 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1154 1155 CREATE_SEQUENCE: OPTIONS_TYPE = { 1156 "SCALE": ("EXTEND", "NOEXTEND"), 1157 "SHARD": ("EXTEND", "NOEXTEND"), 1158 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1159 **dict.fromkeys( 1160 ( 1161 "SESSION", 1162 "GLOBAL", 1163 "KEEP", 1164 "NOKEEP", 1165 "ORDER", 1166 "NOORDER", 1167 "NOCACHE", 1168 "CYCLE", 1169 "NOCYCLE", 1170 "NOMINVALUE", 1171 "NOMAXVALUE", 1172 "NOSCALE", 1173 "NOSHARD", 1174 ), 1175 tuple(), 1176 ), 1177 } 1178 1179 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1180 1181 USABLES: OPTIONS_TYPE = dict.fromkeys( 1182 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1183 ) 1184 1185 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1186 1187 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1188 "TYPE": ("EVOLUTION",), 1189 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1190 } 1191 1192 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1193 "NOT": ("ENFORCED",), 1194 "MATCH": ( 1195 "FULL", 1196 "PARTIAL", 1197 "SIMPLE", 1198 ), 1199 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1200 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1201 } 1202 1203 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1204 1205 CLONE_KEYWORDS = {"CLONE", "COPY"} 1206 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1207 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1208 1209 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1210 1211 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1212 1213 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1214 1215 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1216 1217 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1218 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1219 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1220 1221 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1222 1223 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1224 1225 ADD_CONSTRAINT_TOKENS = { 1226 TokenType.CONSTRAINT, 1227 TokenType.FOREIGN_KEY, 1228 TokenType.INDEX, 1229 TokenType.KEY, 1230 TokenType.PRIMARY_KEY, 1231 TokenType.UNIQUE, 1232 } 1233 1234 DISTINCT_TOKENS = {TokenType.DISTINCT} 1235 1236 NULL_TOKENS = {TokenType.NULL} 1237 1238 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1239 1240 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1241 1242 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1243 1244 STRICT_CAST = True 1245 1246 PREFIXED_PIVOT_COLUMNS = False 1247 IDENTIFY_PIVOT_STRINGS = False 1248 1249 LOG_DEFAULTS_TO_LN = False 1250 1251 # Whether ADD is present for each column added by ALTER TABLE 1252 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1253 1254 # Whether the table sample clause expects CSV syntax 1255 TABLESAMPLE_CSV = False 1256 1257 # The default method used for table sampling 1258 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1259 1260 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1261 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1262 1263 # Whether the TRIM function expects the characters to trim as its first argument 1264 TRIM_PATTERN_FIRST = False 1265 1266 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1267 STRING_ALIASES = False 1268 1269 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1270 MODIFIERS_ATTACHED_TO_SET_OP = True 1271 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1272 1273 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1274 NO_PAREN_IF_COMMANDS = True 1275 1276 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1277 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1278 1279 # Whether the `:` operator is used to extract a value from a VARIANT column 1280 COLON_IS_VARIANT_EXTRACT = False 1281 1282 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1283 # If this is True and '(' is not found, the keyword will be treated as an identifier 1284 VALUES_FOLLOWED_BY_PAREN = True 1285 1286 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1287 SUPPORTS_IMPLICIT_UNNEST = False 1288 1289 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1290 INTERVAL_SPANS = True 1291 1292 # Whether a PARTITION clause can follow a table reference 1293 SUPPORTS_PARTITION_SELECTION = False 1294 1295 __slots__ = ( 1296 "error_level", 1297 "error_message_context", 1298 "max_errors", 1299 "dialect", 1300 "sql", 1301 "errors", 1302 "_tokens", 1303 "_index", 1304 "_curr", 1305 "_next", 1306 "_prev", 1307 "_prev_comments", 1308 ) 1309 1310 # Autofilled 1311 SHOW_TRIE: t.Dict = {} 1312 SET_TRIE: t.Dict = {} 1313 1314 def __init__( 1315 self, 1316 error_level: t.Optional[ErrorLevel] = None, 1317 error_message_context: int = 100, 1318 max_errors: int = 3, 1319 dialect: DialectType = None, 1320 ): 1321 from sqlglot.dialects import Dialect 1322 1323 self.error_level = error_level or ErrorLevel.IMMEDIATE 1324 self.error_message_context = error_message_context 1325 self.max_errors = max_errors 1326 self.dialect = Dialect.get_or_raise(dialect) 1327 self.reset() 1328 1329 def reset(self): 1330 self.sql = "" 1331 self.errors = [] 1332 self._tokens = [] 1333 self._index = 0 1334 self._curr = None 1335 self._next = None 1336 self._prev = None 1337 self._prev_comments = None 1338 1339 def parse( 1340 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1341 ) -> t.List[t.Optional[exp.Expression]]: 1342 """ 1343 Parses a list of tokens and returns a list of syntax trees, one tree 1344 per parsed SQL statement. 1345 1346 Args: 1347 raw_tokens: The list of tokens. 1348 sql: The original SQL string, used to produce helpful debug messages. 1349 1350 Returns: 1351 The list of the produced syntax trees. 1352 """ 1353 return self._parse( 1354 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1355 ) 1356 1357 def parse_into( 1358 self, 1359 expression_types: exp.IntoType, 1360 raw_tokens: t.List[Token], 1361 sql: t.Optional[str] = None, 1362 ) -> t.List[t.Optional[exp.Expression]]: 1363 """ 1364 Parses a list of tokens into a given Expression type. If a collection of Expression 1365 types is given instead, this method will try to parse the token list into each one 1366 of them, stopping at the first for which the parsing succeeds. 1367 1368 Args: 1369 expression_types: The expression type(s) to try and parse the token list into. 1370 raw_tokens: The list of tokens. 1371 sql: The original SQL string, used to produce helpful debug messages. 1372 1373 Returns: 1374 The target Expression. 1375 """ 1376 errors = [] 1377 for expression_type in ensure_list(expression_types): 1378 parser = self.EXPRESSION_PARSERS.get(expression_type) 1379 if not parser: 1380 raise TypeError(f"No parser registered for {expression_type}") 1381 1382 try: 1383 return self._parse(parser, raw_tokens, sql) 1384 except ParseError as e: 1385 e.errors[0]["into_expression"] = expression_type 1386 errors.append(e) 1387 1388 raise ParseError( 1389 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1390 errors=merge_errors(errors), 1391 ) from errors[-1] 1392 1393 def _parse( 1394 self, 1395 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1396 raw_tokens: t.List[Token], 1397 sql: t.Optional[str] = None, 1398 ) -> t.List[t.Optional[exp.Expression]]: 1399 self.reset() 1400 self.sql = sql or "" 1401 1402 total = len(raw_tokens) 1403 chunks: t.List[t.List[Token]] = [[]] 1404 1405 for i, token in enumerate(raw_tokens): 1406 if token.token_type == TokenType.SEMICOLON: 1407 if token.comments: 1408 chunks.append([token]) 1409 1410 if i < total - 1: 1411 chunks.append([]) 1412 else: 1413 chunks[-1].append(token) 1414 1415 expressions = [] 1416 1417 for tokens in chunks: 1418 self._index = -1 1419 self._tokens = tokens 1420 self._advance() 1421 1422 expressions.append(parse_method(self)) 1423 1424 if self._index < len(self._tokens): 1425 self.raise_error("Invalid expression / Unexpected token") 1426 1427 self.check_errors() 1428 1429 return expressions 1430 1431 def check_errors(self) -> None: 1432 """Logs or raises any found errors, depending on the chosen error level setting.""" 1433 if self.error_level == ErrorLevel.WARN: 1434 for error in self.errors: 1435 logger.error(str(error)) 1436 elif self.error_level == ErrorLevel.RAISE and self.errors: 1437 raise ParseError( 1438 concat_messages(self.errors, self.max_errors), 1439 errors=merge_errors(self.errors), 1440 ) 1441 1442 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1443 """ 1444 Appends an error in the list of recorded errors or raises it, depending on the chosen 1445 error level setting. 1446 """ 1447 token = token or self._curr or self._prev or Token.string("") 1448 start = token.start 1449 end = token.end + 1 1450 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1451 highlight = self.sql[start:end] 1452 end_context = self.sql[end : end + self.error_message_context] 1453 1454 error = ParseError.new( 1455 f"{message}. Line {token.line}, Col: {token.col}.\n" 1456 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1457 description=message, 1458 line=token.line, 1459 col=token.col, 1460 start_context=start_context, 1461 highlight=highlight, 1462 end_context=end_context, 1463 ) 1464 1465 if self.error_level == ErrorLevel.IMMEDIATE: 1466 raise error 1467 1468 self.errors.append(error) 1469 1470 def expression( 1471 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1472 ) -> E: 1473 """ 1474 Creates a new, validated Expression. 1475 1476 Args: 1477 exp_class: The expression class to instantiate. 1478 comments: An optional list of comments to attach to the expression. 1479 kwargs: The arguments to set for the expression along with their respective values. 1480 1481 Returns: 1482 The target expression. 1483 """ 1484 instance = exp_class(**kwargs) 1485 instance.add_comments(comments) if comments else self._add_comments(instance) 1486 return self.validate_expression(instance) 1487 1488 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1489 if expression and self._prev_comments: 1490 expression.add_comments(self._prev_comments) 1491 self._prev_comments = None 1492 1493 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1494 """ 1495 Validates an Expression, making sure that all its mandatory arguments are set. 1496 1497 Args: 1498 expression: The expression to validate. 1499 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1500 1501 Returns: 1502 The validated expression. 1503 """ 1504 if self.error_level != ErrorLevel.IGNORE: 1505 for error_message in expression.error_messages(args): 1506 self.raise_error(error_message) 1507 1508 return expression 1509 1510 def _find_sql(self, start: Token, end: Token) -> str: 1511 return self.sql[start.start : end.end + 1] 1512 1513 def _is_connected(self) -> bool: 1514 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1515 1516 def _advance(self, times: int = 1) -> None: 1517 self._index += times 1518 self._curr = seq_get(self._tokens, self._index) 1519 self._next = seq_get(self._tokens, self._index + 1) 1520 1521 if self._index > 0: 1522 self._prev = self._tokens[self._index - 1] 1523 self._prev_comments = self._prev.comments 1524 else: 1525 self._prev = None 1526 self._prev_comments = None 1527 1528 def _retreat(self, index: int) -> None: 1529 if index != self._index: 1530 self._advance(index - self._index) 1531 1532 def _warn_unsupported(self) -> None: 1533 if len(self._tokens) <= 1: 1534 return 1535 1536 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1537 # interested in emitting a warning for the one being currently processed. 1538 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1539 1540 logger.warning( 1541 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1542 ) 1543 1544 def _parse_command(self) -> exp.Command: 1545 self._warn_unsupported() 1546 return self.expression( 1547 exp.Command, 1548 comments=self._prev_comments, 1549 this=self._prev.text.upper(), 1550 expression=self._parse_string(), 1551 ) 1552 1553 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1554 """ 1555 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1556 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1557 solve this by setting & resetting the parser state accordingly 1558 """ 1559 index = self._index 1560 error_level = self.error_level 1561 1562 self.error_level = ErrorLevel.IMMEDIATE 1563 try: 1564 this = parse_method() 1565 except ParseError: 1566 this = None 1567 finally: 1568 if not this or retreat: 1569 self._retreat(index) 1570 self.error_level = error_level 1571 1572 return this 1573 1574 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1575 start = self._prev 1576 exists = self._parse_exists() if allow_exists else None 1577 1578 self._match(TokenType.ON) 1579 1580 materialized = self._match_text_seq("MATERIALIZED") 1581 kind = self._match_set(self.CREATABLES) and self._prev 1582 if not kind: 1583 return self._parse_as_command(start) 1584 1585 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1586 this = self._parse_user_defined_function(kind=kind.token_type) 1587 elif kind.token_type == TokenType.TABLE: 1588 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1589 elif kind.token_type == TokenType.COLUMN: 1590 this = self._parse_column() 1591 else: 1592 this = self._parse_id_var() 1593 1594 self._match(TokenType.IS) 1595 1596 return self.expression( 1597 exp.Comment, 1598 this=this, 1599 kind=kind.text, 1600 expression=self._parse_string(), 1601 exists=exists, 1602 materialized=materialized, 1603 ) 1604 1605 def _parse_to_table( 1606 self, 1607 ) -> exp.ToTableProperty: 1608 table = self._parse_table_parts(schema=True) 1609 return self.expression(exp.ToTableProperty, this=table) 1610 1611 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1612 def _parse_ttl(self) -> exp.Expression: 1613 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1614 this = self._parse_bitwise() 1615 1616 if self._match_text_seq("DELETE"): 1617 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1618 if self._match_text_seq("RECOMPRESS"): 1619 return self.expression( 1620 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1621 ) 1622 if self._match_text_seq("TO", "DISK"): 1623 return self.expression( 1624 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1625 ) 1626 if self._match_text_seq("TO", "VOLUME"): 1627 return self.expression( 1628 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1629 ) 1630 1631 return this 1632 1633 expressions = self._parse_csv(_parse_ttl_action) 1634 where = self._parse_where() 1635 group = self._parse_group() 1636 1637 aggregates = None 1638 if group and self._match(TokenType.SET): 1639 aggregates = self._parse_csv(self._parse_set_item) 1640 1641 return self.expression( 1642 exp.MergeTreeTTL, 1643 expressions=expressions, 1644 where=where, 1645 group=group, 1646 aggregates=aggregates, 1647 ) 1648 1649 def _parse_statement(self) -> t.Optional[exp.Expression]: 1650 if self._curr is None: 1651 return None 1652 1653 if self._match_set(self.STATEMENT_PARSERS): 1654 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1655 1656 if self._match_set(self.dialect.tokenizer.COMMANDS): 1657 return self._parse_command() 1658 1659 expression = self._parse_expression() 1660 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1661 return self._parse_query_modifiers(expression) 1662 1663 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1664 start = self._prev 1665 temporary = self._match(TokenType.TEMPORARY) 1666 materialized = self._match_text_seq("MATERIALIZED") 1667 1668 kind = self._match_set(self.CREATABLES) and self._prev.text 1669 if not kind: 1670 return self._parse_as_command(start) 1671 1672 if_exists = exists or self._parse_exists() 1673 table = self._parse_table_parts( 1674 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1675 ) 1676 1677 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1678 1679 if self._match(TokenType.L_PAREN, advance=False): 1680 expressions = self._parse_wrapped_csv(self._parse_types) 1681 else: 1682 expressions = None 1683 1684 return self.expression( 1685 exp.Drop, 1686 comments=start.comments, 1687 exists=if_exists, 1688 this=table, 1689 expressions=expressions, 1690 kind=kind.upper(), 1691 temporary=temporary, 1692 materialized=materialized, 1693 cascade=self._match_text_seq("CASCADE"), 1694 constraints=self._match_text_seq("CONSTRAINTS"), 1695 purge=self._match_text_seq("PURGE"), 1696 cluster=cluster, 1697 ) 1698 1699 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1700 return ( 1701 self._match_text_seq("IF") 1702 and (not not_ or self._match(TokenType.NOT)) 1703 and self._match(TokenType.EXISTS) 1704 ) 1705 1706 def _parse_create(self) -> exp.Create | exp.Command: 1707 # Note: this can't be None because we've matched a statement parser 1708 start = self._prev 1709 comments = self._prev_comments 1710 1711 replace = ( 1712 start.token_type == TokenType.REPLACE 1713 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1714 or self._match_pair(TokenType.OR, TokenType.ALTER) 1715 ) 1716 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1717 1718 unique = self._match(TokenType.UNIQUE) 1719 1720 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1721 clustered = True 1722 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1723 "COLUMNSTORE" 1724 ): 1725 clustered = False 1726 else: 1727 clustered = None 1728 1729 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1730 self._advance() 1731 1732 properties = None 1733 create_token = self._match_set(self.CREATABLES) and self._prev 1734 1735 if not create_token: 1736 # exp.Properties.Location.POST_CREATE 1737 properties = self._parse_properties() 1738 create_token = self._match_set(self.CREATABLES) and self._prev 1739 1740 if not properties or not create_token: 1741 return self._parse_as_command(start) 1742 1743 concurrently = self._match_text_seq("CONCURRENTLY") 1744 exists = self._parse_exists(not_=True) 1745 this = None 1746 expression: t.Optional[exp.Expression] = None 1747 indexes = None 1748 no_schema_binding = None 1749 begin = None 1750 end = None 1751 clone = None 1752 1753 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1754 nonlocal properties 1755 if properties and temp_props: 1756 properties.expressions.extend(temp_props.expressions) 1757 elif temp_props: 1758 properties = temp_props 1759 1760 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1761 this = self._parse_user_defined_function(kind=create_token.token_type) 1762 1763 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1764 extend_props(self._parse_properties()) 1765 1766 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1767 extend_props(self._parse_properties()) 1768 1769 if not expression: 1770 if self._match(TokenType.COMMAND): 1771 expression = self._parse_as_command(self._prev) 1772 else: 1773 begin = self._match(TokenType.BEGIN) 1774 return_ = self._match_text_seq("RETURN") 1775 1776 if self._match(TokenType.STRING, advance=False): 1777 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1778 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1779 expression = self._parse_string() 1780 extend_props(self._parse_properties()) 1781 else: 1782 expression = self._parse_statement() 1783 1784 end = self._match_text_seq("END") 1785 1786 if return_: 1787 expression = self.expression(exp.Return, this=expression) 1788 elif create_token.token_type == TokenType.INDEX: 1789 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1790 if not self._match(TokenType.ON): 1791 index = self._parse_id_var() 1792 anonymous = False 1793 else: 1794 index = None 1795 anonymous = True 1796 1797 this = self._parse_index(index=index, anonymous=anonymous) 1798 elif create_token.token_type in self.DB_CREATABLES: 1799 table_parts = self._parse_table_parts( 1800 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1801 ) 1802 1803 # exp.Properties.Location.POST_NAME 1804 self._match(TokenType.COMMA) 1805 extend_props(self._parse_properties(before=True)) 1806 1807 this = self._parse_schema(this=table_parts) 1808 1809 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1810 extend_props(self._parse_properties()) 1811 1812 self._match(TokenType.ALIAS) 1813 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1814 # exp.Properties.Location.POST_ALIAS 1815 extend_props(self._parse_properties()) 1816 1817 if create_token.token_type == TokenType.SEQUENCE: 1818 expression = self._parse_types() 1819 extend_props(self._parse_properties()) 1820 else: 1821 expression = self._parse_ddl_select() 1822 1823 if create_token.token_type == TokenType.TABLE: 1824 # exp.Properties.Location.POST_EXPRESSION 1825 extend_props(self._parse_properties()) 1826 1827 indexes = [] 1828 while True: 1829 index = self._parse_index() 1830 1831 # exp.Properties.Location.POST_INDEX 1832 extend_props(self._parse_properties()) 1833 if not index: 1834 break 1835 else: 1836 self._match(TokenType.COMMA) 1837 indexes.append(index) 1838 elif create_token.token_type == TokenType.VIEW: 1839 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1840 no_schema_binding = True 1841 1842 shallow = self._match_text_seq("SHALLOW") 1843 1844 if self._match_texts(self.CLONE_KEYWORDS): 1845 copy = self._prev.text.lower() == "copy" 1846 clone = self.expression( 1847 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1848 ) 1849 1850 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1851 return self._parse_as_command(start) 1852 1853 return self.expression( 1854 exp.Create, 1855 comments=comments, 1856 this=this, 1857 kind=create_token.text.upper(), 1858 replace=replace, 1859 refresh=refresh, 1860 unique=unique, 1861 expression=expression, 1862 exists=exists, 1863 properties=properties, 1864 indexes=indexes, 1865 no_schema_binding=no_schema_binding, 1866 begin=begin, 1867 end=end, 1868 clone=clone, 1869 concurrently=concurrently, 1870 clustered=clustered, 1871 ) 1872 1873 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1874 seq = exp.SequenceProperties() 1875 1876 options = [] 1877 index = self._index 1878 1879 while self._curr: 1880 self._match(TokenType.COMMA) 1881 if self._match_text_seq("INCREMENT"): 1882 self._match_text_seq("BY") 1883 self._match_text_seq("=") 1884 seq.set("increment", self._parse_term()) 1885 elif self._match_text_seq("MINVALUE"): 1886 seq.set("minvalue", self._parse_term()) 1887 elif self._match_text_seq("MAXVALUE"): 1888 seq.set("maxvalue", self._parse_term()) 1889 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1890 self._match_text_seq("=") 1891 seq.set("start", self._parse_term()) 1892 elif self._match_text_seq("CACHE"): 1893 # T-SQL allows empty CACHE which is initialized dynamically 1894 seq.set("cache", self._parse_number() or True) 1895 elif self._match_text_seq("OWNED", "BY"): 1896 # "OWNED BY NONE" is the default 1897 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1898 else: 1899 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1900 if opt: 1901 options.append(opt) 1902 else: 1903 break 1904 1905 seq.set("options", options if options else None) 1906 return None if self._index == index else seq 1907 1908 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1909 # only used for teradata currently 1910 self._match(TokenType.COMMA) 1911 1912 kwargs = { 1913 "no": self._match_text_seq("NO"), 1914 "dual": self._match_text_seq("DUAL"), 1915 "before": self._match_text_seq("BEFORE"), 1916 "default": self._match_text_seq("DEFAULT"), 1917 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1918 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1919 "after": self._match_text_seq("AFTER"), 1920 "minimum": self._match_texts(("MIN", "MINIMUM")), 1921 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1922 } 1923 1924 if self._match_texts(self.PROPERTY_PARSERS): 1925 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1926 try: 1927 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1928 except TypeError: 1929 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1930 1931 return None 1932 1933 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1934 return self._parse_wrapped_csv(self._parse_property) 1935 1936 def _parse_property(self) -> t.Optional[exp.Expression]: 1937 if self._match_texts(self.PROPERTY_PARSERS): 1938 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1939 1940 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1941 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1942 1943 if self._match_text_seq("COMPOUND", "SORTKEY"): 1944 return self._parse_sortkey(compound=True) 1945 1946 if self._match_text_seq("SQL", "SECURITY"): 1947 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1948 1949 index = self._index 1950 key = self._parse_column() 1951 1952 if not self._match(TokenType.EQ): 1953 self._retreat(index) 1954 return self._parse_sequence_properties() 1955 1956 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1957 if isinstance(key, exp.Column): 1958 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1959 1960 value = self._parse_bitwise() or self._parse_var(any_token=True) 1961 1962 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1963 if isinstance(value, exp.Column): 1964 value = exp.var(value.name) 1965 1966 return self.expression(exp.Property, this=key, value=value) 1967 1968 def _parse_stored(self) -> exp.FileFormatProperty: 1969 self._match(TokenType.ALIAS) 1970 1971 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1972 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1973 1974 return self.expression( 1975 exp.FileFormatProperty, 1976 this=( 1977 self.expression( 1978 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1979 ) 1980 if input_format or output_format 1981 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1982 ), 1983 ) 1984 1985 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1986 field = self._parse_field() 1987 if isinstance(field, exp.Identifier) and not field.quoted: 1988 field = exp.var(field) 1989 1990 return field 1991 1992 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1993 self._match(TokenType.EQ) 1994 self._match(TokenType.ALIAS) 1995 1996 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1997 1998 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1999 properties = [] 2000 while True: 2001 if before: 2002 prop = self._parse_property_before() 2003 else: 2004 prop = self._parse_property() 2005 if not prop: 2006 break 2007 for p in ensure_list(prop): 2008 properties.append(p) 2009 2010 if properties: 2011 return self.expression(exp.Properties, expressions=properties) 2012 2013 return None 2014 2015 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2016 return self.expression( 2017 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2018 ) 2019 2020 def _parse_settings_property(self) -> exp.SettingsProperty: 2021 return self.expression( 2022 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2023 ) 2024 2025 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2026 if self._index >= 2: 2027 pre_volatile_token = self._tokens[self._index - 2] 2028 else: 2029 pre_volatile_token = None 2030 2031 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2032 return exp.VolatileProperty() 2033 2034 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2035 2036 def _parse_retention_period(self) -> exp.Var: 2037 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2038 number = self._parse_number() 2039 number_str = f"{number} " if number else "" 2040 unit = self._parse_var(any_token=True) 2041 return exp.var(f"{number_str}{unit}") 2042 2043 def _parse_system_versioning_property( 2044 self, with_: bool = False 2045 ) -> exp.WithSystemVersioningProperty: 2046 self._match(TokenType.EQ) 2047 prop = self.expression( 2048 exp.WithSystemVersioningProperty, 2049 **{ # type: ignore 2050 "on": True, 2051 "with": with_, 2052 }, 2053 ) 2054 2055 if self._match_text_seq("OFF"): 2056 prop.set("on", False) 2057 return prop 2058 2059 self._match(TokenType.ON) 2060 if self._match(TokenType.L_PAREN): 2061 while self._curr and not self._match(TokenType.R_PAREN): 2062 if self._match_text_seq("HISTORY_TABLE", "="): 2063 prop.set("this", self._parse_table_parts()) 2064 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2065 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2066 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2067 prop.set("retention_period", self._parse_retention_period()) 2068 2069 self._match(TokenType.COMMA) 2070 2071 return prop 2072 2073 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2074 self._match(TokenType.EQ) 2075 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2076 prop = self.expression(exp.DataDeletionProperty, on=on) 2077 2078 if self._match(TokenType.L_PAREN): 2079 while self._curr and not self._match(TokenType.R_PAREN): 2080 if self._match_text_seq("FILTER_COLUMN", "="): 2081 prop.set("filter_column", self._parse_column()) 2082 elif self._match_text_seq("RETENTION_PERIOD", "="): 2083 prop.set("retention_period", self._parse_retention_period()) 2084 2085 self._match(TokenType.COMMA) 2086 2087 return prop 2088 2089 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2090 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2091 prop = self._parse_system_versioning_property(with_=True) 2092 self._match_r_paren() 2093 return prop 2094 2095 if self._match(TokenType.L_PAREN, advance=False): 2096 return self._parse_wrapped_properties() 2097 2098 if self._match_text_seq("JOURNAL"): 2099 return self._parse_withjournaltable() 2100 2101 if self._match_texts(self.VIEW_ATTRIBUTES): 2102 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2103 2104 if self._match_text_seq("DATA"): 2105 return self._parse_withdata(no=False) 2106 elif self._match_text_seq("NO", "DATA"): 2107 return self._parse_withdata(no=True) 2108 2109 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2110 return self._parse_serde_properties(with_=True) 2111 2112 if self._match(TokenType.SCHEMA): 2113 return self.expression( 2114 exp.WithSchemaBindingProperty, 2115 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2116 ) 2117 2118 if not self._next: 2119 return None 2120 2121 return self._parse_withisolatedloading() 2122 2123 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2124 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2125 self._match(TokenType.EQ) 2126 2127 user = self._parse_id_var() 2128 self._match(TokenType.PARAMETER) 2129 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2130 2131 if not user or not host: 2132 return None 2133 2134 return exp.DefinerProperty(this=f"{user}@{host}") 2135 2136 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2137 self._match(TokenType.TABLE) 2138 self._match(TokenType.EQ) 2139 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2140 2141 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2142 return self.expression(exp.LogProperty, no=no) 2143 2144 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2145 return self.expression(exp.JournalProperty, **kwargs) 2146 2147 def _parse_checksum(self) -> exp.ChecksumProperty: 2148 self._match(TokenType.EQ) 2149 2150 on = None 2151 if self._match(TokenType.ON): 2152 on = True 2153 elif self._match_text_seq("OFF"): 2154 on = False 2155 2156 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2157 2158 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2159 return self.expression( 2160 exp.Cluster, 2161 expressions=( 2162 self._parse_wrapped_csv(self._parse_ordered) 2163 if wrapped 2164 else self._parse_csv(self._parse_ordered) 2165 ), 2166 ) 2167 2168 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2169 self._match_text_seq("BY") 2170 2171 self._match_l_paren() 2172 expressions = self._parse_csv(self._parse_column) 2173 self._match_r_paren() 2174 2175 if self._match_text_seq("SORTED", "BY"): 2176 self._match_l_paren() 2177 sorted_by = self._parse_csv(self._parse_ordered) 2178 self._match_r_paren() 2179 else: 2180 sorted_by = None 2181 2182 self._match(TokenType.INTO) 2183 buckets = self._parse_number() 2184 self._match_text_seq("BUCKETS") 2185 2186 return self.expression( 2187 exp.ClusteredByProperty, 2188 expressions=expressions, 2189 sorted_by=sorted_by, 2190 buckets=buckets, 2191 ) 2192 2193 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2194 if not self._match_text_seq("GRANTS"): 2195 self._retreat(self._index - 1) 2196 return None 2197 2198 return self.expression(exp.CopyGrantsProperty) 2199 2200 def _parse_freespace(self) -> exp.FreespaceProperty: 2201 self._match(TokenType.EQ) 2202 return self.expression( 2203 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2204 ) 2205 2206 def _parse_mergeblockratio( 2207 self, no: bool = False, default: bool = False 2208 ) -> exp.MergeBlockRatioProperty: 2209 if self._match(TokenType.EQ): 2210 return self.expression( 2211 exp.MergeBlockRatioProperty, 2212 this=self._parse_number(), 2213 percent=self._match(TokenType.PERCENT), 2214 ) 2215 2216 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2217 2218 def _parse_datablocksize( 2219 self, 2220 default: t.Optional[bool] = None, 2221 minimum: t.Optional[bool] = None, 2222 maximum: t.Optional[bool] = None, 2223 ) -> exp.DataBlocksizeProperty: 2224 self._match(TokenType.EQ) 2225 size = self._parse_number() 2226 2227 units = None 2228 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2229 units = self._prev.text 2230 2231 return self.expression( 2232 exp.DataBlocksizeProperty, 2233 size=size, 2234 units=units, 2235 default=default, 2236 minimum=minimum, 2237 maximum=maximum, 2238 ) 2239 2240 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2241 self._match(TokenType.EQ) 2242 always = self._match_text_seq("ALWAYS") 2243 manual = self._match_text_seq("MANUAL") 2244 never = self._match_text_seq("NEVER") 2245 default = self._match_text_seq("DEFAULT") 2246 2247 autotemp = None 2248 if self._match_text_seq("AUTOTEMP"): 2249 autotemp = self._parse_schema() 2250 2251 return self.expression( 2252 exp.BlockCompressionProperty, 2253 always=always, 2254 manual=manual, 2255 never=never, 2256 default=default, 2257 autotemp=autotemp, 2258 ) 2259 2260 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2261 index = self._index 2262 no = self._match_text_seq("NO") 2263 concurrent = self._match_text_seq("CONCURRENT") 2264 2265 if not self._match_text_seq("ISOLATED", "LOADING"): 2266 self._retreat(index) 2267 return None 2268 2269 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2270 return self.expression( 2271 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2272 ) 2273 2274 def _parse_locking(self) -> exp.LockingProperty: 2275 if self._match(TokenType.TABLE): 2276 kind = "TABLE" 2277 elif self._match(TokenType.VIEW): 2278 kind = "VIEW" 2279 elif self._match(TokenType.ROW): 2280 kind = "ROW" 2281 elif self._match_text_seq("DATABASE"): 2282 kind = "DATABASE" 2283 else: 2284 kind = None 2285 2286 if kind in ("DATABASE", "TABLE", "VIEW"): 2287 this = self._parse_table_parts() 2288 else: 2289 this = None 2290 2291 if self._match(TokenType.FOR): 2292 for_or_in = "FOR" 2293 elif self._match(TokenType.IN): 2294 for_or_in = "IN" 2295 else: 2296 for_or_in = None 2297 2298 if self._match_text_seq("ACCESS"): 2299 lock_type = "ACCESS" 2300 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2301 lock_type = "EXCLUSIVE" 2302 elif self._match_text_seq("SHARE"): 2303 lock_type = "SHARE" 2304 elif self._match_text_seq("READ"): 2305 lock_type = "READ" 2306 elif self._match_text_seq("WRITE"): 2307 lock_type = "WRITE" 2308 elif self._match_text_seq("CHECKSUM"): 2309 lock_type = "CHECKSUM" 2310 else: 2311 lock_type = None 2312 2313 override = self._match_text_seq("OVERRIDE") 2314 2315 return self.expression( 2316 exp.LockingProperty, 2317 this=this, 2318 kind=kind, 2319 for_or_in=for_or_in, 2320 lock_type=lock_type, 2321 override=override, 2322 ) 2323 2324 def _parse_partition_by(self) -> t.List[exp.Expression]: 2325 if self._match(TokenType.PARTITION_BY): 2326 return self._parse_csv(self._parse_assignment) 2327 return [] 2328 2329 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2330 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2331 if self._match_text_seq("MINVALUE"): 2332 return exp.var("MINVALUE") 2333 if self._match_text_seq("MAXVALUE"): 2334 return exp.var("MAXVALUE") 2335 return self._parse_bitwise() 2336 2337 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2338 expression = None 2339 from_expressions = None 2340 to_expressions = None 2341 2342 if self._match(TokenType.IN): 2343 this = self._parse_wrapped_csv(self._parse_bitwise) 2344 elif self._match(TokenType.FROM): 2345 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2346 self._match_text_seq("TO") 2347 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2348 elif self._match_text_seq("WITH", "(", "MODULUS"): 2349 this = self._parse_number() 2350 self._match_text_seq(",", "REMAINDER") 2351 expression = self._parse_number() 2352 self._match_r_paren() 2353 else: 2354 self.raise_error("Failed to parse partition bound spec.") 2355 2356 return self.expression( 2357 exp.PartitionBoundSpec, 2358 this=this, 2359 expression=expression, 2360 from_expressions=from_expressions, 2361 to_expressions=to_expressions, 2362 ) 2363 2364 # https://www.postgresql.org/docs/current/sql-createtable.html 2365 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2366 if not self._match_text_seq("OF"): 2367 self._retreat(self._index - 1) 2368 return None 2369 2370 this = self._parse_table(schema=True) 2371 2372 if self._match(TokenType.DEFAULT): 2373 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2374 elif self._match_text_seq("FOR", "VALUES"): 2375 expression = self._parse_partition_bound_spec() 2376 else: 2377 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2378 2379 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2380 2381 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2382 self._match(TokenType.EQ) 2383 return self.expression( 2384 exp.PartitionedByProperty, 2385 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2386 ) 2387 2388 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2389 if self._match_text_seq("AND", "STATISTICS"): 2390 statistics = True 2391 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2392 statistics = False 2393 else: 2394 statistics = None 2395 2396 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2397 2398 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2399 if self._match_text_seq("SQL"): 2400 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2401 return None 2402 2403 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2404 if self._match_text_seq("SQL", "DATA"): 2405 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2406 return None 2407 2408 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2409 if self._match_text_seq("PRIMARY", "INDEX"): 2410 return exp.NoPrimaryIndexProperty() 2411 if self._match_text_seq("SQL"): 2412 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2413 return None 2414 2415 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2416 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2417 return exp.OnCommitProperty() 2418 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2419 return exp.OnCommitProperty(delete=True) 2420 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2421 2422 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2423 if self._match_text_seq("SQL", "DATA"): 2424 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2425 return None 2426 2427 def _parse_distkey(self) -> exp.DistKeyProperty: 2428 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2429 2430 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2431 table = self._parse_table(schema=True) 2432 2433 options = [] 2434 while self._match_texts(("INCLUDING", "EXCLUDING")): 2435 this = self._prev.text.upper() 2436 2437 id_var = self._parse_id_var() 2438 if not id_var: 2439 return None 2440 2441 options.append( 2442 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2443 ) 2444 2445 return self.expression(exp.LikeProperty, this=table, expressions=options) 2446 2447 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2448 return self.expression( 2449 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2450 ) 2451 2452 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2453 self._match(TokenType.EQ) 2454 return self.expression( 2455 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2456 ) 2457 2458 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2459 self._match_text_seq("WITH", "CONNECTION") 2460 return self.expression( 2461 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2462 ) 2463 2464 def _parse_returns(self) -> exp.ReturnsProperty: 2465 value: t.Optional[exp.Expression] 2466 null = None 2467 is_table = self._match(TokenType.TABLE) 2468 2469 if is_table: 2470 if self._match(TokenType.LT): 2471 value = self.expression( 2472 exp.Schema, 2473 this="TABLE", 2474 expressions=self._parse_csv(self._parse_struct_types), 2475 ) 2476 if not self._match(TokenType.GT): 2477 self.raise_error("Expecting >") 2478 else: 2479 value = self._parse_schema(exp.var("TABLE")) 2480 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2481 null = True 2482 value = None 2483 else: 2484 value = self._parse_types() 2485 2486 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2487 2488 def _parse_describe(self) -> exp.Describe: 2489 kind = self._match_set(self.CREATABLES) and self._prev.text 2490 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2491 if self._match(TokenType.DOT): 2492 style = None 2493 self._retreat(self._index - 2) 2494 this = self._parse_table(schema=True) 2495 properties = self._parse_properties() 2496 expressions = properties.expressions if properties else None 2497 return self.expression( 2498 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2499 ) 2500 2501 def _parse_insert(self) -> exp.Insert: 2502 comments = ensure_list(self._prev_comments) 2503 hint = self._parse_hint() 2504 overwrite = self._match(TokenType.OVERWRITE) 2505 ignore = self._match(TokenType.IGNORE) 2506 local = self._match_text_seq("LOCAL") 2507 alternative = None 2508 is_function = None 2509 2510 if self._match_text_seq("DIRECTORY"): 2511 this: t.Optional[exp.Expression] = self.expression( 2512 exp.Directory, 2513 this=self._parse_var_or_string(), 2514 local=local, 2515 row_format=self._parse_row_format(match_row=True), 2516 ) 2517 else: 2518 if self._match(TokenType.OR): 2519 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2520 2521 self._match(TokenType.INTO) 2522 comments += ensure_list(self._prev_comments) 2523 self._match(TokenType.TABLE) 2524 is_function = self._match(TokenType.FUNCTION) 2525 2526 this = ( 2527 self._parse_table(schema=True, parse_partition=True) 2528 if not is_function 2529 else self._parse_function() 2530 ) 2531 2532 returning = self._parse_returning() 2533 2534 return self.expression( 2535 exp.Insert, 2536 comments=comments, 2537 hint=hint, 2538 is_function=is_function, 2539 this=this, 2540 stored=self._match_text_seq("STORED") and self._parse_stored(), 2541 by_name=self._match_text_seq("BY", "NAME"), 2542 exists=self._parse_exists(), 2543 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2544 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2545 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2546 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2547 conflict=self._parse_on_conflict(), 2548 returning=returning or self._parse_returning(), 2549 overwrite=overwrite, 2550 alternative=alternative, 2551 ignore=ignore, 2552 ) 2553 2554 def _parse_kill(self) -> exp.Kill: 2555 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2556 2557 return self.expression( 2558 exp.Kill, 2559 this=self._parse_primary(), 2560 kind=kind, 2561 ) 2562 2563 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2564 conflict = self._match_text_seq("ON", "CONFLICT") 2565 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2566 2567 if not conflict and not duplicate: 2568 return None 2569 2570 conflict_keys = None 2571 constraint = None 2572 2573 if conflict: 2574 if self._match_text_seq("ON", "CONSTRAINT"): 2575 constraint = self._parse_id_var() 2576 elif self._match(TokenType.L_PAREN): 2577 conflict_keys = self._parse_csv(self._parse_id_var) 2578 self._match_r_paren() 2579 2580 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2581 if self._prev.token_type == TokenType.UPDATE: 2582 self._match(TokenType.SET) 2583 expressions = self._parse_csv(self._parse_equality) 2584 else: 2585 expressions = None 2586 2587 return self.expression( 2588 exp.OnConflict, 2589 duplicate=duplicate, 2590 expressions=expressions, 2591 action=action, 2592 conflict_keys=conflict_keys, 2593 constraint=constraint, 2594 ) 2595 2596 def _parse_returning(self) -> t.Optional[exp.Returning]: 2597 if not self._match(TokenType.RETURNING): 2598 return None 2599 return self.expression( 2600 exp.Returning, 2601 expressions=self._parse_csv(self._parse_expression), 2602 into=self._match(TokenType.INTO) and self._parse_table_part(), 2603 ) 2604 2605 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2606 if not self._match(TokenType.FORMAT): 2607 return None 2608 return self._parse_row_format() 2609 2610 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2611 index = self._index 2612 with_ = with_ or self._match_text_seq("WITH") 2613 2614 if not self._match(TokenType.SERDE_PROPERTIES): 2615 self._retreat(index) 2616 return None 2617 return self.expression( 2618 exp.SerdeProperties, 2619 **{ # type: ignore 2620 "expressions": self._parse_wrapped_properties(), 2621 "with": with_, 2622 }, 2623 ) 2624 2625 def _parse_row_format( 2626 self, match_row: bool = False 2627 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2628 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2629 return None 2630 2631 if self._match_text_seq("SERDE"): 2632 this = self._parse_string() 2633 2634 serde_properties = self._parse_serde_properties() 2635 2636 return self.expression( 2637 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2638 ) 2639 2640 self._match_text_seq("DELIMITED") 2641 2642 kwargs = {} 2643 2644 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2645 kwargs["fields"] = self._parse_string() 2646 if self._match_text_seq("ESCAPED", "BY"): 2647 kwargs["escaped"] = self._parse_string() 2648 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2649 kwargs["collection_items"] = self._parse_string() 2650 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2651 kwargs["map_keys"] = self._parse_string() 2652 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2653 kwargs["lines"] = self._parse_string() 2654 if self._match_text_seq("NULL", "DEFINED", "AS"): 2655 kwargs["null"] = self._parse_string() 2656 2657 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2658 2659 def _parse_load(self) -> exp.LoadData | exp.Command: 2660 if self._match_text_seq("DATA"): 2661 local = self._match_text_seq("LOCAL") 2662 self._match_text_seq("INPATH") 2663 inpath = self._parse_string() 2664 overwrite = self._match(TokenType.OVERWRITE) 2665 self._match_pair(TokenType.INTO, TokenType.TABLE) 2666 2667 return self.expression( 2668 exp.LoadData, 2669 this=self._parse_table(schema=True), 2670 local=local, 2671 overwrite=overwrite, 2672 inpath=inpath, 2673 partition=self._parse_partition(), 2674 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2675 serde=self._match_text_seq("SERDE") and self._parse_string(), 2676 ) 2677 return self._parse_as_command(self._prev) 2678 2679 def _parse_delete(self) -> exp.Delete: 2680 # This handles MySQL's "Multiple-Table Syntax" 2681 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2682 tables = None 2683 comments = self._prev_comments 2684 if not self._match(TokenType.FROM, advance=False): 2685 tables = self._parse_csv(self._parse_table) or None 2686 2687 returning = self._parse_returning() 2688 2689 return self.expression( 2690 exp.Delete, 2691 comments=comments, 2692 tables=tables, 2693 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2694 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2695 where=self._parse_where(), 2696 returning=returning or self._parse_returning(), 2697 limit=self._parse_limit(), 2698 ) 2699 2700 def _parse_update(self) -> exp.Update: 2701 comments = self._prev_comments 2702 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2703 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2704 returning = self._parse_returning() 2705 return self.expression( 2706 exp.Update, 2707 comments=comments, 2708 **{ # type: ignore 2709 "this": this, 2710 "expressions": expressions, 2711 "from": self._parse_from(joins=True), 2712 "where": self._parse_where(), 2713 "returning": returning or self._parse_returning(), 2714 "order": self._parse_order(), 2715 "limit": self._parse_limit(), 2716 }, 2717 ) 2718 2719 def _parse_uncache(self) -> exp.Uncache: 2720 if not self._match(TokenType.TABLE): 2721 self.raise_error("Expecting TABLE after UNCACHE") 2722 2723 return self.expression( 2724 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2725 ) 2726 2727 def _parse_cache(self) -> exp.Cache: 2728 lazy = self._match_text_seq("LAZY") 2729 self._match(TokenType.TABLE) 2730 table = self._parse_table(schema=True) 2731 2732 options = [] 2733 if self._match_text_seq("OPTIONS"): 2734 self._match_l_paren() 2735 k = self._parse_string() 2736 self._match(TokenType.EQ) 2737 v = self._parse_string() 2738 options = [k, v] 2739 self._match_r_paren() 2740 2741 self._match(TokenType.ALIAS) 2742 return self.expression( 2743 exp.Cache, 2744 this=table, 2745 lazy=lazy, 2746 options=options, 2747 expression=self._parse_select(nested=True), 2748 ) 2749 2750 def _parse_partition(self) -> t.Optional[exp.Partition]: 2751 if not self._match(TokenType.PARTITION): 2752 return None 2753 2754 return self.expression( 2755 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2756 ) 2757 2758 def _parse_value(self) -> t.Optional[exp.Tuple]: 2759 if self._match(TokenType.L_PAREN): 2760 expressions = self._parse_csv(self._parse_expression) 2761 self._match_r_paren() 2762 return self.expression(exp.Tuple, expressions=expressions) 2763 2764 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2765 expression = self._parse_expression() 2766 if expression: 2767 return self.expression(exp.Tuple, expressions=[expression]) 2768 return None 2769 2770 def _parse_projections(self) -> t.List[exp.Expression]: 2771 return self._parse_expressions() 2772 2773 def _parse_select( 2774 self, 2775 nested: bool = False, 2776 table: bool = False, 2777 parse_subquery_alias: bool = True, 2778 parse_set_operation: bool = True, 2779 ) -> t.Optional[exp.Expression]: 2780 cte = self._parse_with() 2781 2782 if cte: 2783 this = self._parse_statement() 2784 2785 if not this: 2786 self.raise_error("Failed to parse any statement following CTE") 2787 return cte 2788 2789 if "with" in this.arg_types: 2790 this.set("with", cte) 2791 else: 2792 self.raise_error(f"{this.key} does not support CTE") 2793 this = cte 2794 2795 return this 2796 2797 # duckdb supports leading with FROM x 2798 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2799 2800 if self._match(TokenType.SELECT): 2801 comments = self._prev_comments 2802 2803 hint = self._parse_hint() 2804 2805 if self._next and not self._next.token_type == TokenType.DOT: 2806 all_ = self._match(TokenType.ALL) 2807 distinct = self._match_set(self.DISTINCT_TOKENS) 2808 else: 2809 all_, distinct = None, None 2810 2811 kind = ( 2812 self._match(TokenType.ALIAS) 2813 and self._match_texts(("STRUCT", "VALUE")) 2814 and self._prev.text.upper() 2815 ) 2816 2817 if distinct: 2818 distinct = self.expression( 2819 exp.Distinct, 2820 on=self._parse_value() if self._match(TokenType.ON) else None, 2821 ) 2822 2823 if all_ and distinct: 2824 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2825 2826 limit = self._parse_limit(top=True) 2827 projections = self._parse_projections() 2828 2829 this = self.expression( 2830 exp.Select, 2831 kind=kind, 2832 hint=hint, 2833 distinct=distinct, 2834 expressions=projections, 2835 limit=limit, 2836 ) 2837 this.comments = comments 2838 2839 into = self._parse_into() 2840 if into: 2841 this.set("into", into) 2842 2843 if not from_: 2844 from_ = self._parse_from() 2845 2846 if from_: 2847 this.set("from", from_) 2848 2849 this = self._parse_query_modifiers(this) 2850 elif (table or nested) and self._match(TokenType.L_PAREN): 2851 if self._match(TokenType.PIVOT): 2852 this = self._parse_simplified_pivot() 2853 elif self._match(TokenType.FROM): 2854 this = exp.select("*").from_( 2855 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2856 ) 2857 else: 2858 this = ( 2859 self._parse_table() 2860 if table 2861 else self._parse_select(nested=True, parse_set_operation=False) 2862 ) 2863 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2864 2865 self._match_r_paren() 2866 2867 # We return early here so that the UNION isn't attached to the subquery by the 2868 # following call to _parse_set_operations, but instead becomes the parent node 2869 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2870 elif self._match(TokenType.VALUES, advance=False): 2871 this = self._parse_derived_table_values() 2872 elif from_: 2873 this = exp.select("*").from_(from_.this, copy=False) 2874 elif self._match(TokenType.SUMMARIZE): 2875 table = self._match(TokenType.TABLE) 2876 this = self._parse_select() or self._parse_string() or self._parse_table() 2877 return self.expression(exp.Summarize, this=this, table=table) 2878 elif self._match(TokenType.DESCRIBE): 2879 this = self._parse_describe() 2880 elif self._match_text_seq("STREAM"): 2881 this = self.expression(exp.Stream, this=self._parse_function()) 2882 else: 2883 this = None 2884 2885 return self._parse_set_operations(this) if parse_set_operation else this 2886 2887 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2888 if not skip_with_token and not self._match(TokenType.WITH): 2889 return None 2890 2891 comments = self._prev_comments 2892 recursive = self._match(TokenType.RECURSIVE) 2893 2894 expressions = [] 2895 while True: 2896 expressions.append(self._parse_cte()) 2897 2898 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2899 break 2900 else: 2901 self._match(TokenType.WITH) 2902 2903 return self.expression( 2904 exp.With, comments=comments, expressions=expressions, recursive=recursive 2905 ) 2906 2907 def _parse_cte(self) -> exp.CTE: 2908 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2909 if not alias or not alias.this: 2910 self.raise_error("Expected CTE to have alias") 2911 2912 self._match(TokenType.ALIAS) 2913 comments = self._prev_comments 2914 2915 if self._match_text_seq("NOT", "MATERIALIZED"): 2916 materialized = False 2917 elif self._match_text_seq("MATERIALIZED"): 2918 materialized = True 2919 else: 2920 materialized = None 2921 2922 return self.expression( 2923 exp.CTE, 2924 this=self._parse_wrapped(self._parse_statement), 2925 alias=alias, 2926 materialized=materialized, 2927 comments=comments, 2928 ) 2929 2930 def _parse_table_alias( 2931 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2932 ) -> t.Optional[exp.TableAlias]: 2933 any_token = self._match(TokenType.ALIAS) 2934 alias = ( 2935 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2936 or self._parse_string_as_identifier() 2937 ) 2938 2939 index = self._index 2940 if self._match(TokenType.L_PAREN): 2941 columns = self._parse_csv(self._parse_function_parameter) 2942 self._match_r_paren() if columns else self._retreat(index) 2943 else: 2944 columns = None 2945 2946 if not alias and not columns: 2947 return None 2948 2949 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2950 2951 # We bubble up comments from the Identifier to the TableAlias 2952 if isinstance(alias, exp.Identifier): 2953 table_alias.add_comments(alias.pop_comments()) 2954 2955 return table_alias 2956 2957 def _parse_subquery( 2958 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2959 ) -> t.Optional[exp.Subquery]: 2960 if not this: 2961 return None 2962 2963 return self.expression( 2964 exp.Subquery, 2965 this=this, 2966 pivots=self._parse_pivots(), 2967 alias=self._parse_table_alias() if parse_alias else None, 2968 ) 2969 2970 def _implicit_unnests_to_explicit(self, this: E) -> E: 2971 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2972 2973 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2974 for i, join in enumerate(this.args.get("joins") or []): 2975 table = join.this 2976 normalized_table = table.copy() 2977 normalized_table.meta["maybe_column"] = True 2978 normalized_table = _norm(normalized_table, dialect=self.dialect) 2979 2980 if isinstance(table, exp.Table) and not join.args.get("on"): 2981 if normalized_table.parts[0].name in refs: 2982 table_as_column = table.to_column() 2983 unnest = exp.Unnest(expressions=[table_as_column]) 2984 2985 # Table.to_column creates a parent Alias node that we want to convert to 2986 # a TableAlias and attach to the Unnest, so it matches the parser's output 2987 if isinstance(table.args.get("alias"), exp.TableAlias): 2988 table_as_column.replace(table_as_column.this) 2989 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2990 2991 table.replace(unnest) 2992 2993 refs.add(normalized_table.alias_or_name) 2994 2995 return this 2996 2997 def _parse_query_modifiers( 2998 self, this: t.Optional[exp.Expression] 2999 ) -> t.Optional[exp.Expression]: 3000 if isinstance(this, (exp.Query, exp.Table)): 3001 for join in self._parse_joins(): 3002 this.append("joins", join) 3003 for lateral in iter(self._parse_lateral, None): 3004 this.append("laterals", lateral) 3005 3006 while True: 3007 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3008 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3009 key, expression = parser(self) 3010 3011 if expression: 3012 this.set(key, expression) 3013 if key == "limit": 3014 offset = expression.args.pop("offset", None) 3015 3016 if offset: 3017 offset = exp.Offset(expression=offset) 3018 this.set("offset", offset) 3019 3020 limit_by_expressions = expression.expressions 3021 expression.set("expressions", None) 3022 offset.set("expressions", limit_by_expressions) 3023 continue 3024 break 3025 3026 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3027 this = self._implicit_unnests_to_explicit(this) 3028 3029 return this 3030 3031 def _parse_hint(self) -> t.Optional[exp.Hint]: 3032 if self._match(TokenType.HINT): 3033 hints = [] 3034 for hint in iter( 3035 lambda: self._parse_csv( 3036 lambda: self._parse_function() or self._parse_var(upper=True) 3037 ), 3038 [], 3039 ): 3040 hints.extend(hint) 3041 3042 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3043 self.raise_error("Expected */ after HINT") 3044 3045 return self.expression(exp.Hint, expressions=hints) 3046 3047 return None 3048 3049 def _parse_into(self) -> t.Optional[exp.Into]: 3050 if not self._match(TokenType.INTO): 3051 return None 3052 3053 temp = self._match(TokenType.TEMPORARY) 3054 unlogged = self._match_text_seq("UNLOGGED") 3055 self._match(TokenType.TABLE) 3056 3057 return self.expression( 3058 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3059 ) 3060 3061 def _parse_from( 3062 self, joins: bool = False, skip_from_token: bool = False 3063 ) -> t.Optional[exp.From]: 3064 if not skip_from_token and not self._match(TokenType.FROM): 3065 return None 3066 3067 return self.expression( 3068 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3069 ) 3070 3071 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3072 return self.expression( 3073 exp.MatchRecognizeMeasure, 3074 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3075 this=self._parse_expression(), 3076 ) 3077 3078 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3079 if not self._match(TokenType.MATCH_RECOGNIZE): 3080 return None 3081 3082 self._match_l_paren() 3083 3084 partition = self._parse_partition_by() 3085 order = self._parse_order() 3086 3087 measures = ( 3088 self._parse_csv(self._parse_match_recognize_measure) 3089 if self._match_text_seq("MEASURES") 3090 else None 3091 ) 3092 3093 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3094 rows = exp.var("ONE ROW PER MATCH") 3095 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3096 text = "ALL ROWS PER MATCH" 3097 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3098 text += " SHOW EMPTY MATCHES" 3099 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3100 text += " OMIT EMPTY MATCHES" 3101 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3102 text += " WITH UNMATCHED ROWS" 3103 rows = exp.var(text) 3104 else: 3105 rows = None 3106 3107 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3108 text = "AFTER MATCH SKIP" 3109 if self._match_text_seq("PAST", "LAST", "ROW"): 3110 text += " PAST LAST ROW" 3111 elif self._match_text_seq("TO", "NEXT", "ROW"): 3112 text += " TO NEXT ROW" 3113 elif self._match_text_seq("TO", "FIRST"): 3114 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3115 elif self._match_text_seq("TO", "LAST"): 3116 text += f" TO LAST {self._advance_any().text}" # type: ignore 3117 after = exp.var(text) 3118 else: 3119 after = None 3120 3121 if self._match_text_seq("PATTERN"): 3122 self._match_l_paren() 3123 3124 if not self._curr: 3125 self.raise_error("Expecting )", self._curr) 3126 3127 paren = 1 3128 start = self._curr 3129 3130 while self._curr and paren > 0: 3131 if self._curr.token_type == TokenType.L_PAREN: 3132 paren += 1 3133 if self._curr.token_type == TokenType.R_PAREN: 3134 paren -= 1 3135 3136 end = self._prev 3137 self._advance() 3138 3139 if paren > 0: 3140 self.raise_error("Expecting )", self._curr) 3141 3142 pattern = exp.var(self._find_sql(start, end)) 3143 else: 3144 pattern = None 3145 3146 define = ( 3147 self._parse_csv(self._parse_name_as_expression) 3148 if self._match_text_seq("DEFINE") 3149 else None 3150 ) 3151 3152 self._match_r_paren() 3153 3154 return self.expression( 3155 exp.MatchRecognize, 3156 partition_by=partition, 3157 order=order, 3158 measures=measures, 3159 rows=rows, 3160 after=after, 3161 pattern=pattern, 3162 define=define, 3163 alias=self._parse_table_alias(), 3164 ) 3165 3166 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3167 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3168 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3169 cross_apply = False 3170 3171 if cross_apply is not None: 3172 this = self._parse_select(table=True) 3173 view = None 3174 outer = None 3175 elif self._match(TokenType.LATERAL): 3176 this = self._parse_select(table=True) 3177 view = self._match(TokenType.VIEW) 3178 outer = self._match(TokenType.OUTER) 3179 else: 3180 return None 3181 3182 if not this: 3183 this = ( 3184 self._parse_unnest() 3185 or self._parse_function() 3186 or self._parse_id_var(any_token=False) 3187 ) 3188 3189 while self._match(TokenType.DOT): 3190 this = exp.Dot( 3191 this=this, 3192 expression=self._parse_function() or self._parse_id_var(any_token=False), 3193 ) 3194 3195 if view: 3196 table = self._parse_id_var(any_token=False) 3197 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3198 table_alias: t.Optional[exp.TableAlias] = self.expression( 3199 exp.TableAlias, this=table, columns=columns 3200 ) 3201 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3202 # We move the alias from the lateral's child node to the lateral itself 3203 table_alias = this.args["alias"].pop() 3204 else: 3205 table_alias = self._parse_table_alias() 3206 3207 return self.expression( 3208 exp.Lateral, 3209 this=this, 3210 view=view, 3211 outer=outer, 3212 alias=table_alias, 3213 cross_apply=cross_apply, 3214 ) 3215 3216 def _parse_join_parts( 3217 self, 3218 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3219 return ( 3220 self._match_set(self.JOIN_METHODS) and self._prev, 3221 self._match_set(self.JOIN_SIDES) and self._prev, 3222 self._match_set(self.JOIN_KINDS) and self._prev, 3223 ) 3224 3225 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3226 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3227 this = self._parse_column() 3228 if isinstance(this, exp.Column): 3229 return this.this 3230 return this 3231 3232 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3233 3234 def _parse_join( 3235 self, skip_join_token: bool = False, parse_bracket: bool = False 3236 ) -> t.Optional[exp.Join]: 3237 if self._match(TokenType.COMMA): 3238 return self.expression(exp.Join, this=self._parse_table()) 3239 3240 index = self._index 3241 method, side, kind = self._parse_join_parts() 3242 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3243 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3244 3245 if not skip_join_token and not join: 3246 self._retreat(index) 3247 kind = None 3248 method = None 3249 side = None 3250 3251 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3252 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3253 3254 if not skip_join_token and not join and not outer_apply and not cross_apply: 3255 return None 3256 3257 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3258 3259 if method: 3260 kwargs["method"] = method.text 3261 if side: 3262 kwargs["side"] = side.text 3263 if kind: 3264 kwargs["kind"] = kind.text 3265 if hint: 3266 kwargs["hint"] = hint 3267 3268 if self._match(TokenType.MATCH_CONDITION): 3269 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3270 3271 if self._match(TokenType.ON): 3272 kwargs["on"] = self._parse_assignment() 3273 elif self._match(TokenType.USING): 3274 kwargs["using"] = self._parse_using_identifiers() 3275 elif ( 3276 not (outer_apply or cross_apply) 3277 and not isinstance(kwargs["this"], exp.Unnest) 3278 and not (kind and kind.token_type == TokenType.CROSS) 3279 ): 3280 index = self._index 3281 joins: t.Optional[list] = list(self._parse_joins()) 3282 3283 if joins and self._match(TokenType.ON): 3284 kwargs["on"] = self._parse_assignment() 3285 elif joins and self._match(TokenType.USING): 3286 kwargs["using"] = self._parse_using_identifiers() 3287 else: 3288 joins = None 3289 self._retreat(index) 3290 3291 kwargs["this"].set("joins", joins if joins else None) 3292 3293 comments = [c for token in (method, side, kind) if token for c in token.comments] 3294 return self.expression(exp.Join, comments=comments, **kwargs) 3295 3296 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3297 this = self._parse_assignment() 3298 3299 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3300 return this 3301 3302 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3303 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3304 3305 return this 3306 3307 def _parse_index_params(self) -> exp.IndexParameters: 3308 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3309 3310 if self._match(TokenType.L_PAREN, advance=False): 3311 columns = self._parse_wrapped_csv(self._parse_with_operator) 3312 else: 3313 columns = None 3314 3315 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3316 partition_by = self._parse_partition_by() 3317 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3318 tablespace = ( 3319 self._parse_var(any_token=True) 3320 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3321 else None 3322 ) 3323 where = self._parse_where() 3324 3325 on = self._parse_field() if self._match(TokenType.ON) else None 3326 3327 return self.expression( 3328 exp.IndexParameters, 3329 using=using, 3330 columns=columns, 3331 include=include, 3332 partition_by=partition_by, 3333 where=where, 3334 with_storage=with_storage, 3335 tablespace=tablespace, 3336 on=on, 3337 ) 3338 3339 def _parse_index( 3340 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3341 ) -> t.Optional[exp.Index]: 3342 if index or anonymous: 3343 unique = None 3344 primary = None 3345 amp = None 3346 3347 self._match(TokenType.ON) 3348 self._match(TokenType.TABLE) # hive 3349 table = self._parse_table_parts(schema=True) 3350 else: 3351 unique = self._match(TokenType.UNIQUE) 3352 primary = self._match_text_seq("PRIMARY") 3353 amp = self._match_text_seq("AMP") 3354 3355 if not self._match(TokenType.INDEX): 3356 return None 3357 3358 index = self._parse_id_var() 3359 table = None 3360 3361 params = self._parse_index_params() 3362 3363 return self.expression( 3364 exp.Index, 3365 this=index, 3366 table=table, 3367 unique=unique, 3368 primary=primary, 3369 amp=amp, 3370 params=params, 3371 ) 3372 3373 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3374 hints: t.List[exp.Expression] = [] 3375 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3376 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3377 hints.append( 3378 self.expression( 3379 exp.WithTableHint, 3380 expressions=self._parse_csv( 3381 lambda: self._parse_function() or self._parse_var(any_token=True) 3382 ), 3383 ) 3384 ) 3385 self._match_r_paren() 3386 else: 3387 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3388 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3389 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3390 3391 self._match_set((TokenType.INDEX, TokenType.KEY)) 3392 if self._match(TokenType.FOR): 3393 hint.set("target", self._advance_any() and self._prev.text.upper()) 3394 3395 hint.set("expressions", self._parse_wrapped_id_vars()) 3396 hints.append(hint) 3397 3398 return hints or None 3399 3400 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3401 return ( 3402 (not schema and self._parse_function(optional_parens=False)) 3403 or self._parse_id_var(any_token=False) 3404 or self._parse_string_as_identifier() 3405 or self._parse_placeholder() 3406 ) 3407 3408 def _parse_table_parts( 3409 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3410 ) -> exp.Table: 3411 catalog = None 3412 db = None 3413 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3414 3415 while self._match(TokenType.DOT): 3416 if catalog: 3417 # This allows nesting the table in arbitrarily many dot expressions if needed 3418 table = self.expression( 3419 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3420 ) 3421 else: 3422 catalog = db 3423 db = table 3424 # "" used for tsql FROM a..b case 3425 table = self._parse_table_part(schema=schema) or "" 3426 3427 if ( 3428 wildcard 3429 and self._is_connected() 3430 and (isinstance(table, exp.Identifier) or not table) 3431 and self._match(TokenType.STAR) 3432 ): 3433 if isinstance(table, exp.Identifier): 3434 table.args["this"] += "*" 3435 else: 3436 table = exp.Identifier(this="*") 3437 3438 # We bubble up comments from the Identifier to the Table 3439 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3440 3441 if is_db_reference: 3442 catalog = db 3443 db = table 3444 table = None 3445 3446 if not table and not is_db_reference: 3447 self.raise_error(f"Expected table name but got {self._curr}") 3448 if not db and is_db_reference: 3449 self.raise_error(f"Expected database name but got {self._curr}") 3450 3451 table = self.expression( 3452 exp.Table, 3453 comments=comments, 3454 this=table, 3455 db=db, 3456 catalog=catalog, 3457 ) 3458 3459 changes = self._parse_changes() 3460 if changes: 3461 table.set("changes", changes) 3462 3463 at_before = self._parse_historical_data() 3464 if at_before: 3465 table.set("when", at_before) 3466 3467 pivots = self._parse_pivots() 3468 if pivots: 3469 table.set("pivots", pivots) 3470 3471 return table 3472 3473 def _parse_table( 3474 self, 3475 schema: bool = False, 3476 joins: bool = False, 3477 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3478 parse_bracket: bool = False, 3479 is_db_reference: bool = False, 3480 parse_partition: bool = False, 3481 ) -> t.Optional[exp.Expression]: 3482 lateral = self._parse_lateral() 3483 if lateral: 3484 return lateral 3485 3486 unnest = self._parse_unnest() 3487 if unnest: 3488 return unnest 3489 3490 values = self._parse_derived_table_values() 3491 if values: 3492 return values 3493 3494 subquery = self._parse_select(table=True) 3495 if subquery: 3496 if not subquery.args.get("pivots"): 3497 subquery.set("pivots", self._parse_pivots()) 3498 return subquery 3499 3500 bracket = parse_bracket and self._parse_bracket(None) 3501 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3502 3503 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3504 self._parse_table 3505 ) 3506 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3507 3508 only = self._match(TokenType.ONLY) 3509 3510 this = t.cast( 3511 exp.Expression, 3512 bracket 3513 or rows_from 3514 or self._parse_bracket( 3515 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3516 ), 3517 ) 3518 3519 if only: 3520 this.set("only", only) 3521 3522 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3523 self._match_text_seq("*") 3524 3525 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3526 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3527 this.set("partition", self._parse_partition()) 3528 3529 if schema: 3530 return self._parse_schema(this=this) 3531 3532 version = self._parse_version() 3533 3534 if version: 3535 this.set("version", version) 3536 3537 if self.dialect.ALIAS_POST_TABLESAMPLE: 3538 table_sample = self._parse_table_sample() 3539 3540 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3541 if alias: 3542 this.set("alias", alias) 3543 3544 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3545 return self.expression( 3546 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3547 ) 3548 3549 this.set("hints", self._parse_table_hints()) 3550 3551 if not this.args.get("pivots"): 3552 this.set("pivots", self._parse_pivots()) 3553 3554 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3555 table_sample = self._parse_table_sample() 3556 3557 if table_sample: 3558 table_sample.set("this", this) 3559 this = table_sample 3560 3561 if joins: 3562 for join in self._parse_joins(): 3563 this.append("joins", join) 3564 3565 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3566 this.set("ordinality", True) 3567 this.set("alias", self._parse_table_alias()) 3568 3569 return this 3570 3571 def _parse_version(self) -> t.Optional[exp.Version]: 3572 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3573 this = "TIMESTAMP" 3574 elif self._match(TokenType.VERSION_SNAPSHOT): 3575 this = "VERSION" 3576 else: 3577 return None 3578 3579 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3580 kind = self._prev.text.upper() 3581 start = self._parse_bitwise() 3582 self._match_texts(("TO", "AND")) 3583 end = self._parse_bitwise() 3584 expression: t.Optional[exp.Expression] = self.expression( 3585 exp.Tuple, expressions=[start, end] 3586 ) 3587 elif self._match_text_seq("CONTAINED", "IN"): 3588 kind = "CONTAINED IN" 3589 expression = self.expression( 3590 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3591 ) 3592 elif self._match(TokenType.ALL): 3593 kind = "ALL" 3594 expression = None 3595 else: 3596 self._match_text_seq("AS", "OF") 3597 kind = "AS OF" 3598 expression = self._parse_type() 3599 3600 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3601 3602 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3603 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3604 index = self._index 3605 historical_data = None 3606 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3607 this = self._prev.text.upper() 3608 kind = ( 3609 self._match(TokenType.L_PAREN) 3610 and self._match_texts(self.HISTORICAL_DATA_KIND) 3611 and self._prev.text.upper() 3612 ) 3613 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3614 3615 if expression: 3616 self._match_r_paren() 3617 historical_data = self.expression( 3618 exp.HistoricalData, this=this, kind=kind, expression=expression 3619 ) 3620 else: 3621 self._retreat(index) 3622 3623 return historical_data 3624 3625 def _parse_changes(self) -> t.Optional[exp.Changes]: 3626 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3627 return None 3628 3629 information = self._parse_var(any_token=True) 3630 self._match_r_paren() 3631 3632 return self.expression( 3633 exp.Changes, 3634 information=information, 3635 at_before=self._parse_historical_data(), 3636 end=self._parse_historical_data(), 3637 ) 3638 3639 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3640 if not self._match(TokenType.UNNEST): 3641 return None 3642 3643 expressions = self._parse_wrapped_csv(self._parse_equality) 3644 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3645 3646 alias = self._parse_table_alias() if with_alias else None 3647 3648 if alias: 3649 if self.dialect.UNNEST_COLUMN_ONLY: 3650 if alias.args.get("columns"): 3651 self.raise_error("Unexpected extra column alias in unnest.") 3652 3653 alias.set("columns", [alias.this]) 3654 alias.set("this", None) 3655 3656 columns = alias.args.get("columns") or [] 3657 if offset and len(expressions) < len(columns): 3658 offset = columns.pop() 3659 3660 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3661 self._match(TokenType.ALIAS) 3662 offset = self._parse_id_var( 3663 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3664 ) or exp.to_identifier("offset") 3665 3666 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3667 3668 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3669 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3670 if not is_derived and not ( 3671 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3672 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3673 ): 3674 return None 3675 3676 expressions = self._parse_csv(self._parse_value) 3677 alias = self._parse_table_alias() 3678 3679 if is_derived: 3680 self._match_r_paren() 3681 3682 return self.expression( 3683 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3684 ) 3685 3686 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3687 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3688 as_modifier and self._match_text_seq("USING", "SAMPLE") 3689 ): 3690 return None 3691 3692 bucket_numerator = None 3693 bucket_denominator = None 3694 bucket_field = None 3695 percent = None 3696 size = None 3697 seed = None 3698 3699 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3700 matched_l_paren = self._match(TokenType.L_PAREN) 3701 3702 if self.TABLESAMPLE_CSV: 3703 num = None 3704 expressions = self._parse_csv(self._parse_primary) 3705 else: 3706 expressions = None 3707 num = ( 3708 self._parse_factor() 3709 if self._match(TokenType.NUMBER, advance=False) 3710 else self._parse_primary() or self._parse_placeholder() 3711 ) 3712 3713 if self._match_text_seq("BUCKET"): 3714 bucket_numerator = self._parse_number() 3715 self._match_text_seq("OUT", "OF") 3716 bucket_denominator = bucket_denominator = self._parse_number() 3717 self._match(TokenType.ON) 3718 bucket_field = self._parse_field() 3719 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3720 percent = num 3721 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3722 size = num 3723 else: 3724 percent = num 3725 3726 if matched_l_paren: 3727 self._match_r_paren() 3728 3729 if self._match(TokenType.L_PAREN): 3730 method = self._parse_var(upper=True) 3731 seed = self._match(TokenType.COMMA) and self._parse_number() 3732 self._match_r_paren() 3733 elif self._match_texts(("SEED", "REPEATABLE")): 3734 seed = self._parse_wrapped(self._parse_number) 3735 3736 if not method and self.DEFAULT_SAMPLING_METHOD: 3737 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3738 3739 return self.expression( 3740 exp.TableSample, 3741 expressions=expressions, 3742 method=method, 3743 bucket_numerator=bucket_numerator, 3744 bucket_denominator=bucket_denominator, 3745 bucket_field=bucket_field, 3746 percent=percent, 3747 size=size, 3748 seed=seed, 3749 ) 3750 3751 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3752 return list(iter(self._parse_pivot, None)) or None 3753 3754 def _parse_joins(self) -> t.Iterator[exp.Join]: 3755 return iter(self._parse_join, None) 3756 3757 # https://duckdb.org/docs/sql/statements/pivot 3758 def _parse_simplified_pivot(self) -> exp.Pivot: 3759 def _parse_on() -> t.Optional[exp.Expression]: 3760 this = self._parse_bitwise() 3761 return self._parse_in(this) if self._match(TokenType.IN) else this 3762 3763 this = self._parse_table() 3764 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3765 using = self._match(TokenType.USING) and self._parse_csv( 3766 lambda: self._parse_alias(self._parse_function()) 3767 ) 3768 group = self._parse_group() 3769 return self.expression( 3770 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3771 ) 3772 3773 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3774 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3775 this = self._parse_select_or_expression() 3776 3777 self._match(TokenType.ALIAS) 3778 alias = self._parse_field() 3779 if alias: 3780 return self.expression(exp.PivotAlias, this=this, alias=alias) 3781 3782 return this 3783 3784 value = self._parse_column() 3785 3786 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3787 self.raise_error("Expecting IN (") 3788 3789 if self._match(TokenType.ANY): 3790 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3791 else: 3792 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3793 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3794 3795 self._match_r_paren() 3796 return expr 3797 3798 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3799 index = self._index 3800 include_nulls = None 3801 3802 if self._match(TokenType.PIVOT): 3803 unpivot = False 3804 elif self._match(TokenType.UNPIVOT): 3805 unpivot = True 3806 3807 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3808 if self._match_text_seq("INCLUDE", "NULLS"): 3809 include_nulls = True 3810 elif self._match_text_seq("EXCLUDE", "NULLS"): 3811 include_nulls = False 3812 else: 3813 return None 3814 3815 expressions = [] 3816 3817 if not self._match(TokenType.L_PAREN): 3818 self._retreat(index) 3819 return None 3820 3821 if unpivot: 3822 expressions = self._parse_csv(self._parse_column) 3823 else: 3824 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3825 3826 if not expressions: 3827 self.raise_error("Failed to parse PIVOT's aggregation list") 3828 3829 if not self._match(TokenType.FOR): 3830 self.raise_error("Expecting FOR") 3831 3832 field = self._parse_pivot_in() 3833 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3834 self._parse_bitwise 3835 ) 3836 3837 self._match_r_paren() 3838 3839 pivot = self.expression( 3840 exp.Pivot, 3841 expressions=expressions, 3842 field=field, 3843 unpivot=unpivot, 3844 include_nulls=include_nulls, 3845 default_on_null=default_on_null, 3846 ) 3847 3848 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3849 pivot.set("alias", self._parse_table_alias()) 3850 3851 if not unpivot: 3852 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3853 3854 columns: t.List[exp.Expression] = [] 3855 for fld in pivot.args["field"].expressions: 3856 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3857 for name in names: 3858 if self.PREFIXED_PIVOT_COLUMNS: 3859 name = f"{name}_{field_name}" if name else field_name 3860 else: 3861 name = f"{field_name}_{name}" if name else field_name 3862 3863 columns.append(exp.to_identifier(name)) 3864 3865 pivot.set("columns", columns) 3866 3867 return pivot 3868 3869 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3870 return [agg.alias for agg in aggregations] 3871 3872 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3873 if not skip_where_token and not self._match(TokenType.PREWHERE): 3874 return None 3875 3876 return self.expression( 3877 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3878 ) 3879 3880 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3881 if not skip_where_token and not self._match(TokenType.WHERE): 3882 return None 3883 3884 return self.expression( 3885 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3886 ) 3887 3888 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3889 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3890 return None 3891 3892 elements: t.Dict[str, t.Any] = defaultdict(list) 3893 3894 if self._match(TokenType.ALL): 3895 elements["all"] = True 3896 elif self._match(TokenType.DISTINCT): 3897 elements["all"] = False 3898 3899 while True: 3900 expressions = self._parse_csv( 3901 lambda: None 3902 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3903 else self._parse_assignment() 3904 ) 3905 if expressions: 3906 elements["expressions"].extend(expressions) 3907 3908 grouping_sets = self._parse_grouping_sets() 3909 if grouping_sets: 3910 elements["grouping_sets"].extend(grouping_sets) 3911 3912 rollup = None 3913 cube = None 3914 totals = None 3915 3916 index = self._index 3917 with_ = self._match(TokenType.WITH) 3918 if self._match(TokenType.ROLLUP): 3919 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3920 elements["rollup"].extend(ensure_list(rollup)) 3921 3922 if self._match(TokenType.CUBE): 3923 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3924 elements["cube"].extend(ensure_list(cube)) 3925 3926 if self._match_text_seq("TOTALS"): 3927 totals = True 3928 elements["totals"] = True # type: ignore 3929 3930 if not (grouping_sets or rollup or cube or totals): 3931 if with_: 3932 self._retreat(index) 3933 break 3934 3935 return self.expression(exp.Group, **elements) # type: ignore 3936 3937 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3938 if not self._match(TokenType.GROUPING_SETS): 3939 return None 3940 3941 return self._parse_wrapped_csv(self._parse_grouping_set) 3942 3943 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3944 if self._match(TokenType.L_PAREN): 3945 grouping_set = self._parse_csv(self._parse_column) 3946 self._match_r_paren() 3947 return self.expression(exp.Tuple, expressions=grouping_set) 3948 3949 return self._parse_column() 3950 3951 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3952 if not skip_having_token and not self._match(TokenType.HAVING): 3953 return None 3954 return self.expression(exp.Having, this=self._parse_assignment()) 3955 3956 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3957 if not self._match(TokenType.QUALIFY): 3958 return None 3959 return self.expression(exp.Qualify, this=self._parse_assignment()) 3960 3961 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3962 if skip_start_token: 3963 start = None 3964 elif self._match(TokenType.START_WITH): 3965 start = self._parse_assignment() 3966 else: 3967 return None 3968 3969 self._match(TokenType.CONNECT_BY) 3970 nocycle = self._match_text_seq("NOCYCLE") 3971 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3972 exp.Prior, this=self._parse_bitwise() 3973 ) 3974 connect = self._parse_assignment() 3975 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3976 3977 if not start and self._match(TokenType.START_WITH): 3978 start = self._parse_assignment() 3979 3980 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3981 3982 def _parse_name_as_expression(self) -> exp.Alias: 3983 return self.expression( 3984 exp.Alias, 3985 alias=self._parse_id_var(any_token=True), 3986 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3987 ) 3988 3989 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3990 if self._match_text_seq("INTERPOLATE"): 3991 return self._parse_wrapped_csv(self._parse_name_as_expression) 3992 return None 3993 3994 def _parse_order( 3995 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3996 ) -> t.Optional[exp.Expression]: 3997 siblings = None 3998 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3999 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4000 return this 4001 4002 siblings = True 4003 4004 return self.expression( 4005 exp.Order, 4006 this=this, 4007 expressions=self._parse_csv(self._parse_ordered), 4008 interpolate=self._parse_interpolate(), 4009 siblings=siblings, 4010 ) 4011 4012 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4013 if not self._match(token): 4014 return None 4015 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4016 4017 def _parse_ordered( 4018 self, parse_method: t.Optional[t.Callable] = None 4019 ) -> t.Optional[exp.Ordered]: 4020 this = parse_method() if parse_method else self._parse_assignment() 4021 if not this: 4022 return None 4023 4024 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4025 this = exp.var("ALL") 4026 4027 asc = self._match(TokenType.ASC) 4028 desc = self._match(TokenType.DESC) or (asc and False) 4029 4030 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4031 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4032 4033 nulls_first = is_nulls_first or False 4034 explicitly_null_ordered = is_nulls_first or is_nulls_last 4035 4036 if ( 4037 not explicitly_null_ordered 4038 and ( 4039 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4040 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4041 ) 4042 and self.dialect.NULL_ORDERING != "nulls_are_last" 4043 ): 4044 nulls_first = True 4045 4046 if self._match_text_seq("WITH", "FILL"): 4047 with_fill = self.expression( 4048 exp.WithFill, 4049 **{ # type: ignore 4050 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4051 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4052 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4053 }, 4054 ) 4055 else: 4056 with_fill = None 4057 4058 return self.expression( 4059 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4060 ) 4061 4062 def _parse_limit( 4063 self, 4064 this: t.Optional[exp.Expression] = None, 4065 top: bool = False, 4066 skip_limit_token: bool = False, 4067 ) -> t.Optional[exp.Expression]: 4068 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4069 comments = self._prev_comments 4070 if top: 4071 limit_paren = self._match(TokenType.L_PAREN) 4072 expression = self._parse_term() if limit_paren else self._parse_number() 4073 4074 if limit_paren: 4075 self._match_r_paren() 4076 else: 4077 expression = self._parse_term() 4078 4079 if self._match(TokenType.COMMA): 4080 offset = expression 4081 expression = self._parse_term() 4082 else: 4083 offset = None 4084 4085 limit_exp = self.expression( 4086 exp.Limit, 4087 this=this, 4088 expression=expression, 4089 offset=offset, 4090 comments=comments, 4091 expressions=self._parse_limit_by(), 4092 ) 4093 4094 return limit_exp 4095 4096 if self._match(TokenType.FETCH): 4097 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4098 direction = self._prev.text.upper() if direction else "FIRST" 4099 4100 count = self._parse_field(tokens=self.FETCH_TOKENS) 4101 percent = self._match(TokenType.PERCENT) 4102 4103 self._match_set((TokenType.ROW, TokenType.ROWS)) 4104 4105 only = self._match_text_seq("ONLY") 4106 with_ties = self._match_text_seq("WITH", "TIES") 4107 4108 if only and with_ties: 4109 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4110 4111 return self.expression( 4112 exp.Fetch, 4113 direction=direction, 4114 count=count, 4115 percent=percent, 4116 with_ties=with_ties, 4117 ) 4118 4119 return this 4120 4121 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4122 if not self._match(TokenType.OFFSET): 4123 return this 4124 4125 count = self._parse_term() 4126 self._match_set((TokenType.ROW, TokenType.ROWS)) 4127 4128 return self.expression( 4129 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4130 ) 4131 4132 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4133 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4134 4135 def _parse_locks(self) -> t.List[exp.Lock]: 4136 locks = [] 4137 while True: 4138 if self._match_text_seq("FOR", "UPDATE"): 4139 update = True 4140 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4141 "LOCK", "IN", "SHARE", "MODE" 4142 ): 4143 update = False 4144 else: 4145 break 4146 4147 expressions = None 4148 if self._match_text_seq("OF"): 4149 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4150 4151 wait: t.Optional[bool | exp.Expression] = None 4152 if self._match_text_seq("NOWAIT"): 4153 wait = True 4154 elif self._match_text_seq("WAIT"): 4155 wait = self._parse_primary() 4156 elif self._match_text_seq("SKIP", "LOCKED"): 4157 wait = False 4158 4159 locks.append( 4160 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4161 ) 4162 4163 return locks 4164 4165 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4166 while this and self._match_set(self.SET_OPERATIONS): 4167 token_type = self._prev.token_type 4168 4169 if token_type == TokenType.UNION: 4170 operation: t.Type[exp.SetOperation] = exp.Union 4171 elif token_type == TokenType.EXCEPT: 4172 operation = exp.Except 4173 else: 4174 operation = exp.Intersect 4175 4176 comments = self._prev.comments 4177 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4178 by_name = self._match_text_seq("BY", "NAME") 4179 expression = self._parse_select(nested=True, parse_set_operation=False) 4180 4181 this = self.expression( 4182 operation, 4183 comments=comments, 4184 this=this, 4185 distinct=distinct, 4186 by_name=by_name, 4187 expression=expression, 4188 ) 4189 4190 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4191 expression = this.expression 4192 4193 if expression: 4194 for arg in self.SET_OP_MODIFIERS: 4195 expr = expression.args.get(arg) 4196 if expr: 4197 this.set(arg, expr.pop()) 4198 4199 return this 4200 4201 def _parse_expression(self) -> t.Optional[exp.Expression]: 4202 return self._parse_alias(self._parse_assignment()) 4203 4204 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4205 this = self._parse_disjunction() 4206 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4207 # This allows us to parse <non-identifier token> := <expr> 4208 this = exp.column( 4209 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4210 ) 4211 4212 while self._match_set(self.ASSIGNMENT): 4213 this = self.expression( 4214 self.ASSIGNMENT[self._prev.token_type], 4215 this=this, 4216 comments=self._prev_comments, 4217 expression=self._parse_assignment(), 4218 ) 4219 4220 return this 4221 4222 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4223 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4224 4225 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4226 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4227 4228 def _parse_equality(self) -> t.Optional[exp.Expression]: 4229 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4230 4231 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4232 return self._parse_tokens(self._parse_range, self.COMPARISON) 4233 4234 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4235 this = this or self._parse_bitwise() 4236 negate = self._match(TokenType.NOT) 4237 4238 if self._match_set(self.RANGE_PARSERS): 4239 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4240 if not expression: 4241 return this 4242 4243 this = expression 4244 elif self._match(TokenType.ISNULL): 4245 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4246 4247 # Postgres supports ISNULL and NOTNULL for conditions. 4248 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4249 if self._match(TokenType.NOTNULL): 4250 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4251 this = self.expression(exp.Not, this=this) 4252 4253 if negate: 4254 this = self._negate_range(this) 4255 4256 if self._match(TokenType.IS): 4257 this = self._parse_is(this) 4258 4259 return this 4260 4261 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4262 if not this: 4263 return this 4264 4265 return self.expression(exp.Not, this=this) 4266 4267 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4268 index = self._index - 1 4269 negate = self._match(TokenType.NOT) 4270 4271 if self._match_text_seq("DISTINCT", "FROM"): 4272 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4273 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4274 4275 expression = self._parse_null() or self._parse_boolean() 4276 if not expression: 4277 self._retreat(index) 4278 return None 4279 4280 this = self.expression(exp.Is, this=this, expression=expression) 4281 return self.expression(exp.Not, this=this) if negate else this 4282 4283 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4284 unnest = self._parse_unnest(with_alias=False) 4285 if unnest: 4286 this = self.expression(exp.In, this=this, unnest=unnest) 4287 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4288 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4289 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4290 4291 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4292 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4293 else: 4294 this = self.expression(exp.In, this=this, expressions=expressions) 4295 4296 if matched_l_paren: 4297 self._match_r_paren(this) 4298 elif not self._match(TokenType.R_BRACKET, expression=this): 4299 self.raise_error("Expecting ]") 4300 else: 4301 this = self.expression(exp.In, this=this, field=self._parse_field()) 4302 4303 return this 4304 4305 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4306 low = self._parse_bitwise() 4307 self._match(TokenType.AND) 4308 high = self._parse_bitwise() 4309 return self.expression(exp.Between, this=this, low=low, high=high) 4310 4311 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4312 if not self._match(TokenType.ESCAPE): 4313 return this 4314 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4315 4316 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4317 index = self._index 4318 4319 if not self._match(TokenType.INTERVAL) and match_interval: 4320 return None 4321 4322 if self._match(TokenType.STRING, advance=False): 4323 this = self._parse_primary() 4324 else: 4325 this = self._parse_term() 4326 4327 if not this or ( 4328 isinstance(this, exp.Column) 4329 and not this.table 4330 and not this.this.quoted 4331 and this.name.upper() == "IS" 4332 ): 4333 self._retreat(index) 4334 return None 4335 4336 unit = self._parse_function() or ( 4337 not self._match(TokenType.ALIAS, advance=False) 4338 and self._parse_var(any_token=True, upper=True) 4339 ) 4340 4341 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4342 # each INTERVAL expression into this canonical form so it's easy to transpile 4343 if this and this.is_number: 4344 this = exp.Literal.string(this.to_py()) 4345 elif this and this.is_string: 4346 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4347 if len(parts) == 1: 4348 if unit: 4349 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4350 self._retreat(self._index - 1) 4351 4352 this = exp.Literal.string(parts[0][0]) 4353 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4354 4355 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4356 unit = self.expression( 4357 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4358 ) 4359 4360 interval = self.expression(exp.Interval, this=this, unit=unit) 4361 4362 index = self._index 4363 self._match(TokenType.PLUS) 4364 4365 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4366 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4367 return self.expression( 4368 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4369 ) 4370 4371 self._retreat(index) 4372 return interval 4373 4374 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4375 this = self._parse_term() 4376 4377 while True: 4378 if self._match_set(self.BITWISE): 4379 this = self.expression( 4380 self.BITWISE[self._prev.token_type], 4381 this=this, 4382 expression=self._parse_term(), 4383 ) 4384 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4385 this = self.expression( 4386 exp.DPipe, 4387 this=this, 4388 expression=self._parse_term(), 4389 safe=not self.dialect.STRICT_STRING_CONCAT, 4390 ) 4391 elif self._match(TokenType.DQMARK): 4392 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4393 elif self._match_pair(TokenType.LT, TokenType.LT): 4394 this = self.expression( 4395 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4396 ) 4397 elif self._match_pair(TokenType.GT, TokenType.GT): 4398 this = self.expression( 4399 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4400 ) 4401 else: 4402 break 4403 4404 return this 4405 4406 def _parse_term(self) -> t.Optional[exp.Expression]: 4407 this = self._parse_factor() 4408 4409 while self._match_set(self.TERM): 4410 klass = self.TERM[self._prev.token_type] 4411 comments = self._prev_comments 4412 expression = self._parse_factor() 4413 4414 this = self.expression(klass, this=this, comments=comments, expression=expression) 4415 4416 if isinstance(this, exp.Collate): 4417 expr = this.expression 4418 4419 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4420 # fallback to Identifier / Var 4421 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4422 ident = expr.this 4423 if isinstance(ident, exp.Identifier): 4424 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4425 4426 return this 4427 4428 def _parse_factor(self) -> t.Optional[exp.Expression]: 4429 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4430 this = parse_method() 4431 4432 while self._match_set(self.FACTOR): 4433 klass = self.FACTOR[self._prev.token_type] 4434 comments = self._prev_comments 4435 expression = parse_method() 4436 4437 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4438 self._retreat(self._index - 1) 4439 return this 4440 4441 this = self.expression(klass, this=this, comments=comments, expression=expression) 4442 4443 if isinstance(this, exp.Div): 4444 this.args["typed"] = self.dialect.TYPED_DIVISION 4445 this.args["safe"] = self.dialect.SAFE_DIVISION 4446 4447 return this 4448 4449 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4450 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4451 4452 def _parse_unary(self) -> t.Optional[exp.Expression]: 4453 if self._match_set(self.UNARY_PARSERS): 4454 return self.UNARY_PARSERS[self._prev.token_type](self) 4455 return self._parse_at_time_zone(self._parse_type()) 4456 4457 def _parse_type( 4458 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4459 ) -> t.Optional[exp.Expression]: 4460 interval = parse_interval and self._parse_interval() 4461 if interval: 4462 return interval 4463 4464 index = self._index 4465 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4466 4467 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4468 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4469 if isinstance(data_type, exp.Cast): 4470 # This constructor can contain ops directly after it, for instance struct unnesting: 4471 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4472 return self._parse_column_ops(data_type) 4473 4474 if data_type: 4475 index2 = self._index 4476 this = self._parse_primary() 4477 4478 if isinstance(this, exp.Literal): 4479 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4480 if parser: 4481 return parser(self, this, data_type) 4482 4483 return self.expression(exp.Cast, this=this, to=data_type) 4484 4485 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4486 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4487 # 4488 # If the index difference here is greater than 1, that means the parser itself must have 4489 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4490 # 4491 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4492 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4493 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4494 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4495 # 4496 # In these cases, we don't really want to return the converted type, but instead retreat 4497 # and try to parse a Column or Identifier in the section below. 4498 if data_type.expressions and index2 - index > 1: 4499 self._retreat(index2) 4500 return self._parse_column_ops(data_type) 4501 4502 self._retreat(index) 4503 4504 if fallback_to_identifier: 4505 return self._parse_id_var() 4506 4507 this = self._parse_column() 4508 return this and self._parse_column_ops(this) 4509 4510 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4511 this = self._parse_type() 4512 if not this: 4513 return None 4514 4515 if isinstance(this, exp.Column) and not this.table: 4516 this = exp.var(this.name.upper()) 4517 4518 return self.expression( 4519 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4520 ) 4521 4522 def _parse_types( 4523 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4524 ) -> t.Optional[exp.Expression]: 4525 index = self._index 4526 4527 this: t.Optional[exp.Expression] = None 4528 prefix = self._match_text_seq("SYSUDTLIB", ".") 4529 4530 if not self._match_set(self.TYPE_TOKENS): 4531 identifier = allow_identifiers and self._parse_id_var( 4532 any_token=False, tokens=(TokenType.VAR,) 4533 ) 4534 if isinstance(identifier, exp.Identifier): 4535 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4536 4537 if len(tokens) != 1: 4538 self.raise_error("Unexpected identifier", self._prev) 4539 4540 if tokens[0].token_type in self.TYPE_TOKENS: 4541 self._prev = tokens[0] 4542 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4543 type_name = identifier.name 4544 4545 while self._match(TokenType.DOT): 4546 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4547 4548 this = exp.DataType.build(type_name, udt=True) 4549 else: 4550 self._retreat(self._index - 1) 4551 return None 4552 else: 4553 return None 4554 4555 type_token = self._prev.token_type 4556 4557 if type_token == TokenType.PSEUDO_TYPE: 4558 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4559 4560 if type_token == TokenType.OBJECT_IDENTIFIER: 4561 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4562 4563 # https://materialize.com/docs/sql/types/map/ 4564 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4565 key_type = self._parse_types( 4566 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4567 ) 4568 if not self._match(TokenType.FARROW): 4569 self._retreat(index) 4570 return None 4571 4572 value_type = self._parse_types( 4573 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4574 ) 4575 if not self._match(TokenType.R_BRACKET): 4576 self._retreat(index) 4577 return None 4578 4579 return exp.DataType( 4580 this=exp.DataType.Type.MAP, 4581 expressions=[key_type, value_type], 4582 nested=True, 4583 prefix=prefix, 4584 ) 4585 4586 nested = type_token in self.NESTED_TYPE_TOKENS 4587 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4588 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4589 expressions = None 4590 maybe_func = False 4591 4592 if self._match(TokenType.L_PAREN): 4593 if is_struct: 4594 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4595 elif nested: 4596 expressions = self._parse_csv( 4597 lambda: self._parse_types( 4598 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4599 ) 4600 ) 4601 elif type_token in self.ENUM_TYPE_TOKENS: 4602 expressions = self._parse_csv(self._parse_equality) 4603 elif is_aggregate: 4604 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4605 any_token=False, tokens=(TokenType.VAR,) 4606 ) 4607 if not func_or_ident or not self._match(TokenType.COMMA): 4608 return None 4609 expressions = self._parse_csv( 4610 lambda: self._parse_types( 4611 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4612 ) 4613 ) 4614 expressions.insert(0, func_or_ident) 4615 else: 4616 expressions = self._parse_csv(self._parse_type_size) 4617 4618 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4619 if type_token == TokenType.VECTOR and len(expressions) == 2: 4620 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4621 4622 if not expressions or not self._match(TokenType.R_PAREN): 4623 self._retreat(index) 4624 return None 4625 4626 maybe_func = True 4627 4628 values: t.Optional[t.List[exp.Expression]] = None 4629 4630 if nested and self._match(TokenType.LT): 4631 if is_struct: 4632 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4633 else: 4634 expressions = self._parse_csv( 4635 lambda: self._parse_types( 4636 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4637 ) 4638 ) 4639 4640 if not self._match(TokenType.GT): 4641 self.raise_error("Expecting >") 4642 4643 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4644 values = self._parse_csv(self._parse_assignment) 4645 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4646 4647 if type_token in self.TIMESTAMPS: 4648 if self._match_text_seq("WITH", "TIME", "ZONE"): 4649 maybe_func = False 4650 tz_type = ( 4651 exp.DataType.Type.TIMETZ 4652 if type_token in self.TIMES 4653 else exp.DataType.Type.TIMESTAMPTZ 4654 ) 4655 this = exp.DataType(this=tz_type, expressions=expressions) 4656 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4657 maybe_func = False 4658 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4659 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4660 maybe_func = False 4661 elif type_token == TokenType.INTERVAL: 4662 unit = self._parse_var(upper=True) 4663 if unit: 4664 if self._match_text_seq("TO"): 4665 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4666 4667 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4668 else: 4669 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4670 4671 if maybe_func and check_func: 4672 index2 = self._index 4673 peek = self._parse_string() 4674 4675 if not peek: 4676 self._retreat(index) 4677 return None 4678 4679 self._retreat(index2) 4680 4681 if not this: 4682 if self._match_text_seq("UNSIGNED"): 4683 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4684 if not unsigned_type_token: 4685 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4686 4687 type_token = unsigned_type_token or type_token 4688 4689 this = exp.DataType( 4690 this=exp.DataType.Type[type_token.value], 4691 expressions=expressions, 4692 nested=nested, 4693 prefix=prefix, 4694 ) 4695 4696 # Empty arrays/structs are allowed 4697 if values is not None: 4698 cls = exp.Struct if is_struct else exp.Array 4699 this = exp.cast(cls(expressions=values), this, copy=False) 4700 4701 elif expressions: 4702 this.set("expressions", expressions) 4703 4704 # https://materialize.com/docs/sql/types/list/#type-name 4705 while self._match(TokenType.LIST): 4706 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4707 4708 index = self._index 4709 4710 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4711 matched_array = self._match(TokenType.ARRAY) 4712 4713 while self._curr: 4714 datatype_token = self._prev.token_type 4715 matched_l_bracket = self._match(TokenType.L_BRACKET) 4716 if not matched_l_bracket and not matched_array: 4717 break 4718 4719 matched_array = False 4720 values = self._parse_csv(self._parse_assignment) or None 4721 if ( 4722 values 4723 and not schema 4724 and ( 4725 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4726 ) 4727 ): 4728 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4729 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4730 self._retreat(index) 4731 break 4732 4733 this = exp.DataType( 4734 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4735 ) 4736 self._match(TokenType.R_BRACKET) 4737 4738 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4739 converter = self.TYPE_CONVERTERS.get(this.this) 4740 if converter: 4741 this = converter(t.cast(exp.DataType, this)) 4742 4743 return this 4744 4745 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4746 index = self._index 4747 4748 if ( 4749 self._curr 4750 and self._next 4751 and self._curr.token_type in self.TYPE_TOKENS 4752 and self._next.token_type in self.TYPE_TOKENS 4753 ): 4754 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4755 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4756 this = self._parse_id_var() 4757 else: 4758 this = ( 4759 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4760 or self._parse_id_var() 4761 ) 4762 4763 self._match(TokenType.COLON) 4764 4765 if ( 4766 type_required 4767 and not isinstance(this, exp.DataType) 4768 and not self._match_set(self.TYPE_TOKENS, advance=False) 4769 ): 4770 self._retreat(index) 4771 return self._parse_types() 4772 4773 return self._parse_column_def(this) 4774 4775 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4776 if not self._match_text_seq("AT", "TIME", "ZONE"): 4777 return this 4778 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4779 4780 def _parse_column(self) -> t.Optional[exp.Expression]: 4781 this = self._parse_column_reference() 4782 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4783 4784 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4785 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4786 4787 return column 4788 4789 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4790 this = self._parse_field() 4791 if ( 4792 not this 4793 and self._match(TokenType.VALUES, advance=False) 4794 and self.VALUES_FOLLOWED_BY_PAREN 4795 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4796 ): 4797 this = self._parse_id_var() 4798 4799 if isinstance(this, exp.Identifier): 4800 # We bubble up comments from the Identifier to the Column 4801 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4802 4803 return this 4804 4805 def _parse_colon_as_variant_extract( 4806 self, this: t.Optional[exp.Expression] 4807 ) -> t.Optional[exp.Expression]: 4808 casts = [] 4809 json_path = [] 4810 4811 while self._match(TokenType.COLON): 4812 start_index = self._index 4813 4814 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4815 path = self._parse_column_ops( 4816 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4817 ) 4818 4819 # The cast :: operator has a lower precedence than the extraction operator :, so 4820 # we rearrange the AST appropriately to avoid casting the JSON path 4821 while isinstance(path, exp.Cast): 4822 casts.append(path.to) 4823 path = path.this 4824 4825 if casts: 4826 dcolon_offset = next( 4827 i 4828 for i, t in enumerate(self._tokens[start_index:]) 4829 if t.token_type == TokenType.DCOLON 4830 ) 4831 end_token = self._tokens[start_index + dcolon_offset - 1] 4832 else: 4833 end_token = self._prev 4834 4835 if path: 4836 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4837 4838 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4839 # Databricks transforms it back to the colon/dot notation 4840 if json_path: 4841 this = self.expression( 4842 exp.JSONExtract, 4843 this=this, 4844 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4845 variant_extract=True, 4846 ) 4847 4848 while casts: 4849 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4850 4851 return this 4852 4853 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4854 return self._parse_types() 4855 4856 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4857 this = self._parse_bracket(this) 4858 4859 while self._match_set(self.COLUMN_OPERATORS): 4860 op_token = self._prev.token_type 4861 op = self.COLUMN_OPERATORS.get(op_token) 4862 4863 if op_token == TokenType.DCOLON: 4864 field = self._parse_dcolon() 4865 if not field: 4866 self.raise_error("Expected type") 4867 elif op and self._curr: 4868 field = self._parse_column_reference() 4869 else: 4870 field = self._parse_field(any_token=True, anonymous_func=True) 4871 4872 if isinstance(field, exp.Func) and this: 4873 # bigquery allows function calls like x.y.count(...) 4874 # SAFE.SUBSTR(...) 4875 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4876 this = exp.replace_tree( 4877 this, 4878 lambda n: ( 4879 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4880 if n.table 4881 else n.this 4882 ) 4883 if isinstance(n, exp.Column) 4884 else n, 4885 ) 4886 4887 if op: 4888 this = op(self, this, field) 4889 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4890 this = self.expression( 4891 exp.Column, 4892 this=field, 4893 table=this.this, 4894 db=this.args.get("table"), 4895 catalog=this.args.get("db"), 4896 ) 4897 else: 4898 this = self.expression(exp.Dot, this=this, expression=field) 4899 4900 this = self._parse_bracket(this) 4901 4902 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4903 4904 def _parse_primary(self) -> t.Optional[exp.Expression]: 4905 if self._match_set(self.PRIMARY_PARSERS): 4906 token_type = self._prev.token_type 4907 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4908 4909 if token_type == TokenType.STRING: 4910 expressions = [primary] 4911 while self._match(TokenType.STRING): 4912 expressions.append(exp.Literal.string(self._prev.text)) 4913 4914 if len(expressions) > 1: 4915 return self.expression(exp.Concat, expressions=expressions) 4916 4917 return primary 4918 4919 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4920 return exp.Literal.number(f"0.{self._prev.text}") 4921 4922 if self._match(TokenType.L_PAREN): 4923 comments = self._prev_comments 4924 query = self._parse_select() 4925 4926 if query: 4927 expressions = [query] 4928 else: 4929 expressions = self._parse_expressions() 4930 4931 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4932 4933 if not this and self._match(TokenType.R_PAREN, advance=False): 4934 this = self.expression(exp.Tuple) 4935 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4936 this = self._parse_subquery(this=this, parse_alias=False) 4937 elif isinstance(this, exp.Subquery): 4938 this = self._parse_subquery( 4939 this=self._parse_set_operations(this), parse_alias=False 4940 ) 4941 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4942 this = self.expression(exp.Tuple, expressions=expressions) 4943 else: 4944 this = self.expression(exp.Paren, this=this) 4945 4946 if this: 4947 this.add_comments(comments) 4948 4949 self._match_r_paren(expression=this) 4950 return this 4951 4952 return None 4953 4954 def _parse_field( 4955 self, 4956 any_token: bool = False, 4957 tokens: t.Optional[t.Collection[TokenType]] = None, 4958 anonymous_func: bool = False, 4959 ) -> t.Optional[exp.Expression]: 4960 if anonymous_func: 4961 field = ( 4962 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4963 or self._parse_primary() 4964 ) 4965 else: 4966 field = self._parse_primary() or self._parse_function( 4967 anonymous=anonymous_func, any_token=any_token 4968 ) 4969 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4970 4971 def _parse_function( 4972 self, 4973 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4974 anonymous: bool = False, 4975 optional_parens: bool = True, 4976 any_token: bool = False, 4977 ) -> t.Optional[exp.Expression]: 4978 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4979 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4980 fn_syntax = False 4981 if ( 4982 self._match(TokenType.L_BRACE, advance=False) 4983 and self._next 4984 and self._next.text.upper() == "FN" 4985 ): 4986 self._advance(2) 4987 fn_syntax = True 4988 4989 func = self._parse_function_call( 4990 functions=functions, 4991 anonymous=anonymous, 4992 optional_parens=optional_parens, 4993 any_token=any_token, 4994 ) 4995 4996 if fn_syntax: 4997 self._match(TokenType.R_BRACE) 4998 4999 return func 5000 5001 def _parse_function_call( 5002 self, 5003 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5004 anonymous: bool = False, 5005 optional_parens: bool = True, 5006 any_token: bool = False, 5007 ) -> t.Optional[exp.Expression]: 5008 if not self._curr: 5009 return None 5010 5011 comments = self._curr.comments 5012 token_type = self._curr.token_type 5013 this = self._curr.text 5014 upper = this.upper() 5015 5016 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5017 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5018 self._advance() 5019 return self._parse_window(parser(self)) 5020 5021 if not self._next or self._next.token_type != TokenType.L_PAREN: 5022 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5023 self._advance() 5024 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5025 5026 return None 5027 5028 if any_token: 5029 if token_type in self.RESERVED_TOKENS: 5030 return None 5031 elif token_type not in self.FUNC_TOKENS: 5032 return None 5033 5034 self._advance(2) 5035 5036 parser = self.FUNCTION_PARSERS.get(upper) 5037 if parser and not anonymous: 5038 this = parser(self) 5039 else: 5040 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5041 5042 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5043 this = self.expression(subquery_predicate, this=self._parse_select()) 5044 self._match_r_paren() 5045 return this 5046 5047 if functions is None: 5048 functions = self.FUNCTIONS 5049 5050 function = functions.get(upper) 5051 5052 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5053 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5054 5055 if alias: 5056 args = self._kv_to_prop_eq(args) 5057 5058 if function and not anonymous: 5059 if "dialect" in function.__code__.co_varnames: 5060 func = function(args, dialect=self.dialect) 5061 else: 5062 func = function(args) 5063 5064 func = self.validate_expression(func, args) 5065 if not self.dialect.NORMALIZE_FUNCTIONS: 5066 func.meta["name"] = this 5067 5068 this = func 5069 else: 5070 if token_type == TokenType.IDENTIFIER: 5071 this = exp.Identifier(this=this, quoted=True) 5072 this = self.expression(exp.Anonymous, this=this, expressions=args) 5073 5074 if isinstance(this, exp.Expression): 5075 this.add_comments(comments) 5076 5077 self._match_r_paren(this) 5078 return self._parse_window(this) 5079 5080 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5081 transformed = [] 5082 5083 for e in expressions: 5084 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5085 if isinstance(e, exp.Alias): 5086 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5087 5088 if not isinstance(e, exp.PropertyEQ): 5089 e = self.expression( 5090 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5091 ) 5092 5093 if isinstance(e.this, exp.Column): 5094 e.this.replace(e.this.this) 5095 5096 transformed.append(e) 5097 5098 return transformed 5099 5100 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5101 return self._parse_column_def(self._parse_id_var()) 5102 5103 def _parse_user_defined_function( 5104 self, kind: t.Optional[TokenType] = None 5105 ) -> t.Optional[exp.Expression]: 5106 this = self._parse_id_var() 5107 5108 while self._match(TokenType.DOT): 5109 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5110 5111 if not self._match(TokenType.L_PAREN): 5112 return this 5113 5114 expressions = self._parse_csv(self._parse_function_parameter) 5115 self._match_r_paren() 5116 return self.expression( 5117 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5118 ) 5119 5120 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5121 literal = self._parse_primary() 5122 if literal: 5123 return self.expression(exp.Introducer, this=token.text, expression=literal) 5124 5125 return self.expression(exp.Identifier, this=token.text) 5126 5127 def _parse_session_parameter(self) -> exp.SessionParameter: 5128 kind = None 5129 this = self._parse_id_var() or self._parse_primary() 5130 5131 if this and self._match(TokenType.DOT): 5132 kind = this.name 5133 this = self._parse_var() or self._parse_primary() 5134 5135 return self.expression(exp.SessionParameter, this=this, kind=kind) 5136 5137 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5138 return self._parse_id_var() 5139 5140 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5141 index = self._index 5142 5143 if self._match(TokenType.L_PAREN): 5144 expressions = t.cast( 5145 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5146 ) 5147 5148 if not self._match(TokenType.R_PAREN): 5149 self._retreat(index) 5150 else: 5151 expressions = [self._parse_lambda_arg()] 5152 5153 if self._match_set(self.LAMBDAS): 5154 return self.LAMBDAS[self._prev.token_type](self, expressions) 5155 5156 self._retreat(index) 5157 5158 this: t.Optional[exp.Expression] 5159 5160 if self._match(TokenType.DISTINCT): 5161 this = self.expression( 5162 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5163 ) 5164 else: 5165 this = self._parse_select_or_expression(alias=alias) 5166 5167 return self._parse_limit( 5168 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5169 ) 5170 5171 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5172 index = self._index 5173 if not self._match(TokenType.L_PAREN): 5174 return this 5175 5176 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5177 # expr can be of both types 5178 if self._match_set(self.SELECT_START_TOKENS): 5179 self._retreat(index) 5180 return this 5181 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5182 self._match_r_paren() 5183 return self.expression(exp.Schema, this=this, expressions=args) 5184 5185 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5186 return self._parse_column_def(self._parse_field(any_token=True)) 5187 5188 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5189 # column defs are not really columns, they're identifiers 5190 if isinstance(this, exp.Column): 5191 this = this.this 5192 5193 kind = self._parse_types(schema=True) 5194 5195 if self._match_text_seq("FOR", "ORDINALITY"): 5196 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5197 5198 constraints: t.List[exp.Expression] = [] 5199 5200 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5201 ("ALIAS", "MATERIALIZED") 5202 ): 5203 persisted = self._prev.text.upper() == "MATERIALIZED" 5204 constraints.append( 5205 self.expression( 5206 exp.ComputedColumnConstraint, 5207 this=self._parse_assignment(), 5208 persisted=persisted or self._match_text_seq("PERSISTED"), 5209 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5210 ) 5211 ) 5212 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5213 self._match(TokenType.ALIAS) 5214 constraints.append( 5215 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5216 ) 5217 5218 while True: 5219 constraint = self._parse_column_constraint() 5220 if not constraint: 5221 break 5222 constraints.append(constraint) 5223 5224 if not kind and not constraints: 5225 return this 5226 5227 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5228 5229 def _parse_auto_increment( 5230 self, 5231 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5232 start = None 5233 increment = None 5234 5235 if self._match(TokenType.L_PAREN, advance=False): 5236 args = self._parse_wrapped_csv(self._parse_bitwise) 5237 start = seq_get(args, 0) 5238 increment = seq_get(args, 1) 5239 elif self._match_text_seq("START"): 5240 start = self._parse_bitwise() 5241 self._match_text_seq("INCREMENT") 5242 increment = self._parse_bitwise() 5243 5244 if start and increment: 5245 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5246 5247 return exp.AutoIncrementColumnConstraint() 5248 5249 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5250 if not self._match_text_seq("REFRESH"): 5251 self._retreat(self._index - 1) 5252 return None 5253 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5254 5255 def _parse_compress(self) -> exp.CompressColumnConstraint: 5256 if self._match(TokenType.L_PAREN, advance=False): 5257 return self.expression( 5258 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5259 ) 5260 5261 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5262 5263 def _parse_generated_as_identity( 5264 self, 5265 ) -> ( 5266 exp.GeneratedAsIdentityColumnConstraint 5267 | exp.ComputedColumnConstraint 5268 | exp.GeneratedAsRowColumnConstraint 5269 ): 5270 if self._match_text_seq("BY", "DEFAULT"): 5271 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5272 this = self.expression( 5273 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5274 ) 5275 else: 5276 self._match_text_seq("ALWAYS") 5277 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5278 5279 self._match(TokenType.ALIAS) 5280 5281 if self._match_text_seq("ROW"): 5282 start = self._match_text_seq("START") 5283 if not start: 5284 self._match(TokenType.END) 5285 hidden = self._match_text_seq("HIDDEN") 5286 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5287 5288 identity = self._match_text_seq("IDENTITY") 5289 5290 if self._match(TokenType.L_PAREN): 5291 if self._match(TokenType.START_WITH): 5292 this.set("start", self._parse_bitwise()) 5293 if self._match_text_seq("INCREMENT", "BY"): 5294 this.set("increment", self._parse_bitwise()) 5295 if self._match_text_seq("MINVALUE"): 5296 this.set("minvalue", self._parse_bitwise()) 5297 if self._match_text_seq("MAXVALUE"): 5298 this.set("maxvalue", self._parse_bitwise()) 5299 5300 if self._match_text_seq("CYCLE"): 5301 this.set("cycle", True) 5302 elif self._match_text_seq("NO", "CYCLE"): 5303 this.set("cycle", False) 5304 5305 if not identity: 5306 this.set("expression", self._parse_range()) 5307 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5308 args = self._parse_csv(self._parse_bitwise) 5309 this.set("start", seq_get(args, 0)) 5310 this.set("increment", seq_get(args, 1)) 5311 5312 self._match_r_paren() 5313 5314 return this 5315 5316 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5317 self._match_text_seq("LENGTH") 5318 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5319 5320 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5321 if self._match_text_seq("NULL"): 5322 return self.expression(exp.NotNullColumnConstraint) 5323 if self._match_text_seq("CASESPECIFIC"): 5324 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5325 if self._match_text_seq("FOR", "REPLICATION"): 5326 return self.expression(exp.NotForReplicationColumnConstraint) 5327 return None 5328 5329 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5330 if self._match(TokenType.CONSTRAINT): 5331 this = self._parse_id_var() 5332 else: 5333 this = None 5334 5335 if self._match_texts(self.CONSTRAINT_PARSERS): 5336 return self.expression( 5337 exp.ColumnConstraint, 5338 this=this, 5339 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5340 ) 5341 5342 return this 5343 5344 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5345 if not self._match(TokenType.CONSTRAINT): 5346 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5347 5348 return self.expression( 5349 exp.Constraint, 5350 this=self._parse_id_var(), 5351 expressions=self._parse_unnamed_constraints(), 5352 ) 5353 5354 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5355 constraints = [] 5356 while True: 5357 constraint = self._parse_unnamed_constraint() or self._parse_function() 5358 if not constraint: 5359 break 5360 constraints.append(constraint) 5361 5362 return constraints 5363 5364 def _parse_unnamed_constraint( 5365 self, constraints: t.Optional[t.Collection[str]] = None 5366 ) -> t.Optional[exp.Expression]: 5367 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5368 constraints or self.CONSTRAINT_PARSERS 5369 ): 5370 return None 5371 5372 constraint = self._prev.text.upper() 5373 if constraint not in self.CONSTRAINT_PARSERS: 5374 self.raise_error(f"No parser found for schema constraint {constraint}.") 5375 5376 return self.CONSTRAINT_PARSERS[constraint](self) 5377 5378 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5379 return self._parse_id_var(any_token=False) 5380 5381 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5382 self._match_text_seq("KEY") 5383 return self.expression( 5384 exp.UniqueColumnConstraint, 5385 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5386 this=self._parse_schema(self._parse_unique_key()), 5387 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5388 on_conflict=self._parse_on_conflict(), 5389 ) 5390 5391 def _parse_key_constraint_options(self) -> t.List[str]: 5392 options = [] 5393 while True: 5394 if not self._curr: 5395 break 5396 5397 if self._match(TokenType.ON): 5398 action = None 5399 on = self._advance_any() and self._prev.text 5400 5401 if self._match_text_seq("NO", "ACTION"): 5402 action = "NO ACTION" 5403 elif self._match_text_seq("CASCADE"): 5404 action = "CASCADE" 5405 elif self._match_text_seq("RESTRICT"): 5406 action = "RESTRICT" 5407 elif self._match_pair(TokenType.SET, TokenType.NULL): 5408 action = "SET NULL" 5409 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5410 action = "SET DEFAULT" 5411 else: 5412 self.raise_error("Invalid key constraint") 5413 5414 options.append(f"ON {on} {action}") 5415 else: 5416 var = self._parse_var_from_options( 5417 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5418 ) 5419 if not var: 5420 break 5421 options.append(var.name) 5422 5423 return options 5424 5425 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5426 if match and not self._match(TokenType.REFERENCES): 5427 return None 5428 5429 expressions = None 5430 this = self._parse_table(schema=True) 5431 options = self._parse_key_constraint_options() 5432 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5433 5434 def _parse_foreign_key(self) -> exp.ForeignKey: 5435 expressions = self._parse_wrapped_id_vars() 5436 reference = self._parse_references() 5437 options = {} 5438 5439 while self._match(TokenType.ON): 5440 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5441 self.raise_error("Expected DELETE or UPDATE") 5442 5443 kind = self._prev.text.lower() 5444 5445 if self._match_text_seq("NO", "ACTION"): 5446 action = "NO ACTION" 5447 elif self._match(TokenType.SET): 5448 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5449 action = "SET " + self._prev.text.upper() 5450 else: 5451 self._advance() 5452 action = self._prev.text.upper() 5453 5454 options[kind] = action 5455 5456 return self.expression( 5457 exp.ForeignKey, 5458 expressions=expressions, 5459 reference=reference, 5460 **options, # type: ignore 5461 ) 5462 5463 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5464 return self._parse_field() 5465 5466 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5467 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5468 self._retreat(self._index - 1) 5469 return None 5470 5471 id_vars = self._parse_wrapped_id_vars() 5472 return self.expression( 5473 exp.PeriodForSystemTimeConstraint, 5474 this=seq_get(id_vars, 0), 5475 expression=seq_get(id_vars, 1), 5476 ) 5477 5478 def _parse_primary_key( 5479 self, wrapped_optional: bool = False, in_props: bool = False 5480 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5481 desc = ( 5482 self._match_set((TokenType.ASC, TokenType.DESC)) 5483 and self._prev.token_type == TokenType.DESC 5484 ) 5485 5486 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5487 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5488 5489 expressions = self._parse_wrapped_csv( 5490 self._parse_primary_key_part, optional=wrapped_optional 5491 ) 5492 options = self._parse_key_constraint_options() 5493 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5494 5495 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5496 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5497 5498 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5499 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5500 return this 5501 5502 bracket_kind = self._prev.token_type 5503 expressions = self._parse_csv( 5504 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5505 ) 5506 5507 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5508 self.raise_error("Expected ]") 5509 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5510 self.raise_error("Expected }") 5511 5512 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5513 if bracket_kind == TokenType.L_BRACE: 5514 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5515 elif not this: 5516 this = build_array_constructor( 5517 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5518 ) 5519 else: 5520 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5521 if constructor_type: 5522 return build_array_constructor( 5523 constructor_type, 5524 args=expressions, 5525 bracket_kind=bracket_kind, 5526 dialect=self.dialect, 5527 ) 5528 5529 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5530 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5531 5532 self._add_comments(this) 5533 return self._parse_bracket(this) 5534 5535 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5536 if self._match(TokenType.COLON): 5537 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5538 return this 5539 5540 def _parse_case(self) -> t.Optional[exp.Expression]: 5541 ifs = [] 5542 default = None 5543 5544 comments = self._prev_comments 5545 expression = self._parse_assignment() 5546 5547 while self._match(TokenType.WHEN): 5548 this = self._parse_assignment() 5549 self._match(TokenType.THEN) 5550 then = self._parse_assignment() 5551 ifs.append(self.expression(exp.If, this=this, true=then)) 5552 5553 if self._match(TokenType.ELSE): 5554 default = self._parse_assignment() 5555 5556 if not self._match(TokenType.END): 5557 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5558 default = exp.column("interval") 5559 else: 5560 self.raise_error("Expected END after CASE", self._prev) 5561 5562 return self.expression( 5563 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5564 ) 5565 5566 def _parse_if(self) -> t.Optional[exp.Expression]: 5567 if self._match(TokenType.L_PAREN): 5568 args = self._parse_csv(self._parse_assignment) 5569 this = self.validate_expression(exp.If.from_arg_list(args), args) 5570 self._match_r_paren() 5571 else: 5572 index = self._index - 1 5573 5574 if self.NO_PAREN_IF_COMMANDS and index == 0: 5575 return self._parse_as_command(self._prev) 5576 5577 condition = self._parse_assignment() 5578 5579 if not condition: 5580 self._retreat(index) 5581 return None 5582 5583 self._match(TokenType.THEN) 5584 true = self._parse_assignment() 5585 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5586 self._match(TokenType.END) 5587 this = self.expression(exp.If, this=condition, true=true, false=false) 5588 5589 return this 5590 5591 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5592 if not self._match_text_seq("VALUE", "FOR"): 5593 self._retreat(self._index - 1) 5594 return None 5595 5596 return self.expression( 5597 exp.NextValueFor, 5598 this=self._parse_column(), 5599 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5600 ) 5601 5602 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5603 this = self._parse_function() or self._parse_var_or_string(upper=True) 5604 5605 if self._match(TokenType.FROM): 5606 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5607 5608 if not self._match(TokenType.COMMA): 5609 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5610 5611 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5612 5613 def _parse_gap_fill(self) -> exp.GapFill: 5614 self._match(TokenType.TABLE) 5615 this = self._parse_table() 5616 5617 self._match(TokenType.COMMA) 5618 args = [this, *self._parse_csv(self._parse_lambda)] 5619 5620 gap_fill = exp.GapFill.from_arg_list(args) 5621 return self.validate_expression(gap_fill, args) 5622 5623 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5624 this = self._parse_assignment() 5625 5626 if not self._match(TokenType.ALIAS): 5627 if self._match(TokenType.COMMA): 5628 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5629 5630 self.raise_error("Expected AS after CAST") 5631 5632 fmt = None 5633 to = self._parse_types() 5634 5635 if self._match(TokenType.FORMAT): 5636 fmt_string = self._parse_string() 5637 fmt = self._parse_at_time_zone(fmt_string) 5638 5639 if not to: 5640 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5641 if to.this in exp.DataType.TEMPORAL_TYPES: 5642 this = self.expression( 5643 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5644 this=this, 5645 format=exp.Literal.string( 5646 format_time( 5647 fmt_string.this if fmt_string else "", 5648 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5649 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5650 ) 5651 ), 5652 safe=safe, 5653 ) 5654 5655 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5656 this.set("zone", fmt.args["zone"]) 5657 return this 5658 elif not to: 5659 self.raise_error("Expected TYPE after CAST") 5660 elif isinstance(to, exp.Identifier): 5661 to = exp.DataType.build(to.name, udt=True) 5662 elif to.this == exp.DataType.Type.CHAR: 5663 if self._match(TokenType.CHARACTER_SET): 5664 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5665 5666 return self.expression( 5667 exp.Cast if strict else exp.TryCast, 5668 this=this, 5669 to=to, 5670 format=fmt, 5671 safe=safe, 5672 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5673 ) 5674 5675 def _parse_string_agg(self) -> exp.Expression: 5676 if self._match(TokenType.DISTINCT): 5677 args: t.List[t.Optional[exp.Expression]] = [ 5678 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5679 ] 5680 if self._match(TokenType.COMMA): 5681 args.extend(self._parse_csv(self._parse_assignment)) 5682 else: 5683 args = self._parse_csv(self._parse_assignment) # type: ignore 5684 5685 index = self._index 5686 if not self._match(TokenType.R_PAREN) and args: 5687 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5688 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5689 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5690 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5691 5692 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5693 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5694 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5695 if not self._match_text_seq("WITHIN", "GROUP"): 5696 self._retreat(index) 5697 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5698 5699 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5700 order = self._parse_order(this=seq_get(args, 0)) 5701 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5702 5703 def _parse_convert( 5704 self, strict: bool, safe: t.Optional[bool] = None 5705 ) -> t.Optional[exp.Expression]: 5706 this = self._parse_bitwise() 5707 5708 if self._match(TokenType.USING): 5709 to: t.Optional[exp.Expression] = self.expression( 5710 exp.CharacterSet, this=self._parse_var() 5711 ) 5712 elif self._match(TokenType.COMMA): 5713 to = self._parse_types() 5714 else: 5715 to = None 5716 5717 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5718 5719 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5720 """ 5721 There are generally two variants of the DECODE function: 5722 5723 - DECODE(bin, charset) 5724 - DECODE(expression, search, result [, search, result] ... [, default]) 5725 5726 The second variant will always be parsed into a CASE expression. Note that NULL 5727 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5728 instead of relying on pattern matching. 5729 """ 5730 args = self._parse_csv(self._parse_assignment) 5731 5732 if len(args) < 3: 5733 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5734 5735 expression, *expressions = args 5736 if not expression: 5737 return None 5738 5739 ifs = [] 5740 for search, result in zip(expressions[::2], expressions[1::2]): 5741 if not search or not result: 5742 return None 5743 5744 if isinstance(search, exp.Literal): 5745 ifs.append( 5746 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5747 ) 5748 elif isinstance(search, exp.Null): 5749 ifs.append( 5750 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5751 ) 5752 else: 5753 cond = exp.or_( 5754 exp.EQ(this=expression.copy(), expression=search), 5755 exp.and_( 5756 exp.Is(this=expression.copy(), expression=exp.Null()), 5757 exp.Is(this=search.copy(), expression=exp.Null()), 5758 copy=False, 5759 ), 5760 copy=False, 5761 ) 5762 ifs.append(exp.If(this=cond, true=result)) 5763 5764 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5765 5766 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5767 self._match_text_seq("KEY") 5768 key = self._parse_column() 5769 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5770 self._match_text_seq("VALUE") 5771 value = self._parse_bitwise() 5772 5773 if not key and not value: 5774 return None 5775 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5776 5777 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5778 if not this or not self._match_text_seq("FORMAT", "JSON"): 5779 return this 5780 5781 return self.expression(exp.FormatJson, this=this) 5782 5783 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5784 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5785 for value in values: 5786 if self._match_text_seq(value, "ON", on): 5787 return f"{value} ON {on}" 5788 5789 return None 5790 5791 @t.overload 5792 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5793 5794 @t.overload 5795 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5796 5797 def _parse_json_object(self, agg=False): 5798 star = self._parse_star() 5799 expressions = ( 5800 [star] 5801 if star 5802 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5803 ) 5804 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5805 5806 unique_keys = None 5807 if self._match_text_seq("WITH", "UNIQUE"): 5808 unique_keys = True 5809 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5810 unique_keys = False 5811 5812 self._match_text_seq("KEYS") 5813 5814 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5815 self._parse_type() 5816 ) 5817 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5818 5819 return self.expression( 5820 exp.JSONObjectAgg if agg else exp.JSONObject, 5821 expressions=expressions, 5822 null_handling=null_handling, 5823 unique_keys=unique_keys, 5824 return_type=return_type, 5825 encoding=encoding, 5826 ) 5827 5828 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5829 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5830 if not self._match_text_seq("NESTED"): 5831 this = self._parse_id_var() 5832 kind = self._parse_types(allow_identifiers=False) 5833 nested = None 5834 else: 5835 this = None 5836 kind = None 5837 nested = True 5838 5839 path = self._match_text_seq("PATH") and self._parse_string() 5840 nested_schema = nested and self._parse_json_schema() 5841 5842 return self.expression( 5843 exp.JSONColumnDef, 5844 this=this, 5845 kind=kind, 5846 path=path, 5847 nested_schema=nested_schema, 5848 ) 5849 5850 def _parse_json_schema(self) -> exp.JSONSchema: 5851 self._match_text_seq("COLUMNS") 5852 return self.expression( 5853 exp.JSONSchema, 5854 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5855 ) 5856 5857 def _parse_json_table(self) -> exp.JSONTable: 5858 this = self._parse_format_json(self._parse_bitwise()) 5859 path = self._match(TokenType.COMMA) and self._parse_string() 5860 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5861 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5862 schema = self._parse_json_schema() 5863 5864 return exp.JSONTable( 5865 this=this, 5866 schema=schema, 5867 path=path, 5868 error_handling=error_handling, 5869 empty_handling=empty_handling, 5870 ) 5871 5872 def _parse_match_against(self) -> exp.MatchAgainst: 5873 expressions = self._parse_csv(self._parse_column) 5874 5875 self._match_text_seq(")", "AGAINST", "(") 5876 5877 this = self._parse_string() 5878 5879 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5880 modifier = "IN NATURAL LANGUAGE MODE" 5881 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5882 modifier = f"{modifier} WITH QUERY EXPANSION" 5883 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5884 modifier = "IN BOOLEAN MODE" 5885 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5886 modifier = "WITH QUERY EXPANSION" 5887 else: 5888 modifier = None 5889 5890 return self.expression( 5891 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5892 ) 5893 5894 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5895 def _parse_open_json(self) -> exp.OpenJSON: 5896 this = self._parse_bitwise() 5897 path = self._match(TokenType.COMMA) and self._parse_string() 5898 5899 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5900 this = self._parse_field(any_token=True) 5901 kind = self._parse_types() 5902 path = self._parse_string() 5903 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5904 5905 return self.expression( 5906 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5907 ) 5908 5909 expressions = None 5910 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5911 self._match_l_paren() 5912 expressions = self._parse_csv(_parse_open_json_column_def) 5913 5914 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5915 5916 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5917 args = self._parse_csv(self._parse_bitwise) 5918 5919 if self._match(TokenType.IN): 5920 return self.expression( 5921 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5922 ) 5923 5924 if haystack_first: 5925 haystack = seq_get(args, 0) 5926 needle = seq_get(args, 1) 5927 else: 5928 needle = seq_get(args, 0) 5929 haystack = seq_get(args, 1) 5930 5931 return self.expression( 5932 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5933 ) 5934 5935 def _parse_predict(self) -> exp.Predict: 5936 self._match_text_seq("MODEL") 5937 this = self._parse_table() 5938 5939 self._match(TokenType.COMMA) 5940 self._match_text_seq("TABLE") 5941 5942 return self.expression( 5943 exp.Predict, 5944 this=this, 5945 expression=self._parse_table(), 5946 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5947 ) 5948 5949 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5950 args = self._parse_csv(self._parse_table) 5951 return exp.JoinHint(this=func_name.upper(), expressions=args) 5952 5953 def _parse_substring(self) -> exp.Substring: 5954 # Postgres supports the form: substring(string [from int] [for int]) 5955 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5956 5957 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5958 5959 if self._match(TokenType.FROM): 5960 args.append(self._parse_bitwise()) 5961 if self._match(TokenType.FOR): 5962 if len(args) == 1: 5963 args.append(exp.Literal.number(1)) 5964 args.append(self._parse_bitwise()) 5965 5966 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5967 5968 def _parse_trim(self) -> exp.Trim: 5969 # https://www.w3resource.com/sql/character-functions/trim.php 5970 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5971 5972 position = None 5973 collation = None 5974 expression = None 5975 5976 if self._match_texts(self.TRIM_TYPES): 5977 position = self._prev.text.upper() 5978 5979 this = self._parse_bitwise() 5980 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5981 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5982 expression = self._parse_bitwise() 5983 5984 if invert_order: 5985 this, expression = expression, this 5986 5987 if self._match(TokenType.COLLATE): 5988 collation = self._parse_bitwise() 5989 5990 return self.expression( 5991 exp.Trim, this=this, position=position, expression=expression, collation=collation 5992 ) 5993 5994 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5995 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5996 5997 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5998 return self._parse_window(self._parse_id_var(), alias=True) 5999 6000 def _parse_respect_or_ignore_nulls( 6001 self, this: t.Optional[exp.Expression] 6002 ) -> t.Optional[exp.Expression]: 6003 if self._match_text_seq("IGNORE", "NULLS"): 6004 return self.expression(exp.IgnoreNulls, this=this) 6005 if self._match_text_seq("RESPECT", "NULLS"): 6006 return self.expression(exp.RespectNulls, this=this) 6007 return this 6008 6009 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6010 if self._match(TokenType.HAVING): 6011 self._match_texts(("MAX", "MIN")) 6012 max = self._prev.text.upper() != "MIN" 6013 return self.expression( 6014 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6015 ) 6016 6017 return this 6018 6019 def _parse_window( 6020 self, this: t.Optional[exp.Expression], alias: bool = False 6021 ) -> t.Optional[exp.Expression]: 6022 func = this 6023 comments = func.comments if isinstance(func, exp.Expression) else None 6024 6025 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6026 self._match(TokenType.WHERE) 6027 this = self.expression( 6028 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6029 ) 6030 self._match_r_paren() 6031 6032 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6033 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6034 if self._match_text_seq("WITHIN", "GROUP"): 6035 order = self._parse_wrapped(self._parse_order) 6036 this = self.expression(exp.WithinGroup, this=this, expression=order) 6037 6038 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6039 # Some dialects choose to implement and some do not. 6040 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6041 6042 # There is some code above in _parse_lambda that handles 6043 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6044 6045 # The below changes handle 6046 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6047 6048 # Oracle allows both formats 6049 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6050 # and Snowflake chose to do the same for familiarity 6051 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6052 if isinstance(this, exp.AggFunc): 6053 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6054 6055 if ignore_respect and ignore_respect is not this: 6056 ignore_respect.replace(ignore_respect.this) 6057 this = self.expression(ignore_respect.__class__, this=this) 6058 6059 this = self._parse_respect_or_ignore_nulls(this) 6060 6061 # bigquery select from window x AS (partition by ...) 6062 if alias: 6063 over = None 6064 self._match(TokenType.ALIAS) 6065 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6066 return this 6067 else: 6068 over = self._prev.text.upper() 6069 6070 if comments and isinstance(func, exp.Expression): 6071 func.pop_comments() 6072 6073 if not self._match(TokenType.L_PAREN): 6074 return self.expression( 6075 exp.Window, 6076 comments=comments, 6077 this=this, 6078 alias=self._parse_id_var(False), 6079 over=over, 6080 ) 6081 6082 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6083 6084 first = self._match(TokenType.FIRST) 6085 if self._match_text_seq("LAST"): 6086 first = False 6087 6088 partition, order = self._parse_partition_and_order() 6089 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6090 6091 if kind: 6092 self._match(TokenType.BETWEEN) 6093 start = self._parse_window_spec() 6094 self._match(TokenType.AND) 6095 end = self._parse_window_spec() 6096 6097 spec = self.expression( 6098 exp.WindowSpec, 6099 kind=kind, 6100 start=start["value"], 6101 start_side=start["side"], 6102 end=end["value"], 6103 end_side=end["side"], 6104 ) 6105 else: 6106 spec = None 6107 6108 self._match_r_paren() 6109 6110 window = self.expression( 6111 exp.Window, 6112 comments=comments, 6113 this=this, 6114 partition_by=partition, 6115 order=order, 6116 spec=spec, 6117 alias=window_alias, 6118 over=over, 6119 first=first, 6120 ) 6121 6122 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6123 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6124 return self._parse_window(window, alias=alias) 6125 6126 return window 6127 6128 def _parse_partition_and_order( 6129 self, 6130 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6131 return self._parse_partition_by(), self._parse_order() 6132 6133 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6134 self._match(TokenType.BETWEEN) 6135 6136 return { 6137 "value": ( 6138 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6139 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6140 or self._parse_bitwise() 6141 ), 6142 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6143 } 6144 6145 def _parse_alias( 6146 self, this: t.Optional[exp.Expression], explicit: bool = False 6147 ) -> t.Optional[exp.Expression]: 6148 any_token = self._match(TokenType.ALIAS) 6149 comments = self._prev_comments or [] 6150 6151 if explicit and not any_token: 6152 return this 6153 6154 if self._match(TokenType.L_PAREN): 6155 aliases = self.expression( 6156 exp.Aliases, 6157 comments=comments, 6158 this=this, 6159 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6160 ) 6161 self._match_r_paren(aliases) 6162 return aliases 6163 6164 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6165 self.STRING_ALIASES and self._parse_string_as_identifier() 6166 ) 6167 6168 if alias: 6169 comments.extend(alias.pop_comments()) 6170 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6171 column = this.this 6172 6173 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6174 if not this.comments and column and column.comments: 6175 this.comments = column.pop_comments() 6176 6177 return this 6178 6179 def _parse_id_var( 6180 self, 6181 any_token: bool = True, 6182 tokens: t.Optional[t.Collection[TokenType]] = None, 6183 ) -> t.Optional[exp.Expression]: 6184 expression = self._parse_identifier() 6185 if not expression and ( 6186 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6187 ): 6188 quoted = self._prev.token_type == TokenType.STRING 6189 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6190 6191 return expression 6192 6193 def _parse_string(self) -> t.Optional[exp.Expression]: 6194 if self._match_set(self.STRING_PARSERS): 6195 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6196 return self._parse_placeholder() 6197 6198 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6199 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6200 6201 def _parse_number(self) -> t.Optional[exp.Expression]: 6202 if self._match_set(self.NUMERIC_PARSERS): 6203 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6204 return self._parse_placeholder() 6205 6206 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6207 if self._match(TokenType.IDENTIFIER): 6208 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6209 return self._parse_placeholder() 6210 6211 def _parse_var( 6212 self, 6213 any_token: bool = False, 6214 tokens: t.Optional[t.Collection[TokenType]] = None, 6215 upper: bool = False, 6216 ) -> t.Optional[exp.Expression]: 6217 if ( 6218 (any_token and self._advance_any()) 6219 or self._match(TokenType.VAR) 6220 or (self._match_set(tokens) if tokens else False) 6221 ): 6222 return self.expression( 6223 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6224 ) 6225 return self._parse_placeholder() 6226 6227 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6228 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6229 self._advance() 6230 return self._prev 6231 return None 6232 6233 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6234 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6235 6236 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6237 return self._parse_primary() or self._parse_var(any_token=True) 6238 6239 def _parse_null(self) -> t.Optional[exp.Expression]: 6240 if self._match_set(self.NULL_TOKENS): 6241 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6242 return self._parse_placeholder() 6243 6244 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6245 if self._match(TokenType.TRUE): 6246 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6247 if self._match(TokenType.FALSE): 6248 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6249 return self._parse_placeholder() 6250 6251 def _parse_star(self) -> t.Optional[exp.Expression]: 6252 if self._match(TokenType.STAR): 6253 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6254 return self._parse_placeholder() 6255 6256 def _parse_parameter(self) -> exp.Parameter: 6257 this = self._parse_identifier() or self._parse_primary_or_var() 6258 return self.expression(exp.Parameter, this=this) 6259 6260 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6261 if self._match_set(self.PLACEHOLDER_PARSERS): 6262 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6263 if placeholder: 6264 return placeholder 6265 self._advance(-1) 6266 return None 6267 6268 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6269 if not self._match_texts(keywords): 6270 return None 6271 if self._match(TokenType.L_PAREN, advance=False): 6272 return self._parse_wrapped_csv(self._parse_expression) 6273 6274 expression = self._parse_expression() 6275 return [expression] if expression else None 6276 6277 def _parse_csv( 6278 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6279 ) -> t.List[exp.Expression]: 6280 parse_result = parse_method() 6281 items = [parse_result] if parse_result is not None else [] 6282 6283 while self._match(sep): 6284 self._add_comments(parse_result) 6285 parse_result = parse_method() 6286 if parse_result is not None: 6287 items.append(parse_result) 6288 6289 return items 6290 6291 def _parse_tokens( 6292 self, parse_method: t.Callable, expressions: t.Dict 6293 ) -> t.Optional[exp.Expression]: 6294 this = parse_method() 6295 6296 while self._match_set(expressions): 6297 this = self.expression( 6298 expressions[self._prev.token_type], 6299 this=this, 6300 comments=self._prev_comments, 6301 expression=parse_method(), 6302 ) 6303 6304 return this 6305 6306 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6307 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6308 6309 def _parse_wrapped_csv( 6310 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6311 ) -> t.List[exp.Expression]: 6312 return self._parse_wrapped( 6313 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6314 ) 6315 6316 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6317 wrapped = self._match(TokenType.L_PAREN) 6318 if not wrapped and not optional: 6319 self.raise_error("Expecting (") 6320 parse_result = parse_method() 6321 if wrapped: 6322 self._match_r_paren() 6323 return parse_result 6324 6325 def _parse_expressions(self) -> t.List[exp.Expression]: 6326 return self._parse_csv(self._parse_expression) 6327 6328 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6329 return self._parse_select() or self._parse_set_operations( 6330 self._parse_expression() if alias else self._parse_assignment() 6331 ) 6332 6333 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6334 return self._parse_query_modifiers( 6335 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6336 ) 6337 6338 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6339 this = None 6340 if self._match_texts(self.TRANSACTION_KIND): 6341 this = self._prev.text 6342 6343 self._match_texts(("TRANSACTION", "WORK")) 6344 6345 modes = [] 6346 while True: 6347 mode = [] 6348 while self._match(TokenType.VAR): 6349 mode.append(self._prev.text) 6350 6351 if mode: 6352 modes.append(" ".join(mode)) 6353 if not self._match(TokenType.COMMA): 6354 break 6355 6356 return self.expression(exp.Transaction, this=this, modes=modes) 6357 6358 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6359 chain = None 6360 savepoint = None 6361 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6362 6363 self._match_texts(("TRANSACTION", "WORK")) 6364 6365 if self._match_text_seq("TO"): 6366 self._match_text_seq("SAVEPOINT") 6367 savepoint = self._parse_id_var() 6368 6369 if self._match(TokenType.AND): 6370 chain = not self._match_text_seq("NO") 6371 self._match_text_seq("CHAIN") 6372 6373 if is_rollback: 6374 return self.expression(exp.Rollback, savepoint=savepoint) 6375 6376 return self.expression(exp.Commit, chain=chain) 6377 6378 def _parse_refresh(self) -> exp.Refresh: 6379 self._match(TokenType.TABLE) 6380 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6381 6382 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6383 if not self._match_text_seq("ADD"): 6384 return None 6385 6386 self._match(TokenType.COLUMN) 6387 exists_column = self._parse_exists(not_=True) 6388 expression = self._parse_field_def() 6389 6390 if expression: 6391 expression.set("exists", exists_column) 6392 6393 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6394 if self._match_texts(("FIRST", "AFTER")): 6395 position = self._prev.text 6396 column_position = self.expression( 6397 exp.ColumnPosition, this=self._parse_column(), position=position 6398 ) 6399 expression.set("position", column_position) 6400 6401 return expression 6402 6403 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6404 drop = self._match(TokenType.DROP) and self._parse_drop() 6405 if drop and not isinstance(drop, exp.Command): 6406 drop.set("kind", drop.args.get("kind", "COLUMN")) 6407 return drop 6408 6409 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6410 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6411 return self.expression( 6412 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6413 ) 6414 6415 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6416 index = self._index - 1 6417 6418 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6419 return self._parse_csv( 6420 lambda: self.expression( 6421 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6422 ) 6423 ) 6424 6425 self._retreat(index) 6426 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6427 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6428 6429 if self._match_text_seq("ADD", "COLUMNS"): 6430 schema = self._parse_schema() 6431 if schema: 6432 return [schema] 6433 return [] 6434 6435 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6436 6437 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6438 if self._match_texts(self.ALTER_ALTER_PARSERS): 6439 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6440 6441 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6442 # keyword after ALTER we default to parsing this statement 6443 self._match(TokenType.COLUMN) 6444 column = self._parse_field(any_token=True) 6445 6446 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6447 return self.expression(exp.AlterColumn, this=column, drop=True) 6448 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6449 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6450 if self._match(TokenType.COMMENT): 6451 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6452 if self._match_text_seq("DROP", "NOT", "NULL"): 6453 return self.expression( 6454 exp.AlterColumn, 6455 this=column, 6456 drop=True, 6457 allow_null=True, 6458 ) 6459 if self._match_text_seq("SET", "NOT", "NULL"): 6460 return self.expression( 6461 exp.AlterColumn, 6462 this=column, 6463 allow_null=False, 6464 ) 6465 self._match_text_seq("SET", "DATA") 6466 self._match_text_seq("TYPE") 6467 return self.expression( 6468 exp.AlterColumn, 6469 this=column, 6470 dtype=self._parse_types(), 6471 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6472 using=self._match(TokenType.USING) and self._parse_assignment(), 6473 ) 6474 6475 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6476 if self._match_texts(("ALL", "EVEN", "AUTO")): 6477 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6478 6479 self._match_text_seq("KEY", "DISTKEY") 6480 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6481 6482 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6483 if compound: 6484 self._match_text_seq("SORTKEY") 6485 6486 if self._match(TokenType.L_PAREN, advance=False): 6487 return self.expression( 6488 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6489 ) 6490 6491 self._match_texts(("AUTO", "NONE")) 6492 return self.expression( 6493 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6494 ) 6495 6496 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6497 index = self._index - 1 6498 6499 partition_exists = self._parse_exists() 6500 if self._match(TokenType.PARTITION, advance=False): 6501 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6502 6503 self._retreat(index) 6504 return self._parse_csv(self._parse_drop_column) 6505 6506 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6507 if self._match(TokenType.COLUMN): 6508 exists = self._parse_exists() 6509 old_column = self._parse_column() 6510 to = self._match_text_seq("TO") 6511 new_column = self._parse_column() 6512 6513 if old_column is None or to is None or new_column is None: 6514 return None 6515 6516 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6517 6518 self._match_text_seq("TO") 6519 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6520 6521 def _parse_alter_table_set(self) -> exp.AlterSet: 6522 alter_set = self.expression(exp.AlterSet) 6523 6524 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6525 "TABLE", "PROPERTIES" 6526 ): 6527 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6528 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6529 alter_set.set("expressions", [self._parse_assignment()]) 6530 elif self._match_texts(("LOGGED", "UNLOGGED")): 6531 alter_set.set("option", exp.var(self._prev.text.upper())) 6532 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6533 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6534 elif self._match_text_seq("LOCATION"): 6535 alter_set.set("location", self._parse_field()) 6536 elif self._match_text_seq("ACCESS", "METHOD"): 6537 alter_set.set("access_method", self._parse_field()) 6538 elif self._match_text_seq("TABLESPACE"): 6539 alter_set.set("tablespace", self._parse_field()) 6540 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6541 alter_set.set("file_format", [self._parse_field()]) 6542 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6543 alter_set.set("file_format", self._parse_wrapped_options()) 6544 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6545 alter_set.set("copy_options", self._parse_wrapped_options()) 6546 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6547 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6548 else: 6549 if self._match_text_seq("SERDE"): 6550 alter_set.set("serde", self._parse_field()) 6551 6552 alter_set.set("expressions", [self._parse_properties()]) 6553 6554 return alter_set 6555 6556 def _parse_alter(self) -> exp.Alter | exp.Command: 6557 start = self._prev 6558 6559 alter_token = self._match_set(self.ALTERABLES) and self._prev 6560 if not alter_token: 6561 return self._parse_as_command(start) 6562 6563 exists = self._parse_exists() 6564 only = self._match_text_seq("ONLY") 6565 this = self._parse_table(schema=True) 6566 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6567 6568 if self._next: 6569 self._advance() 6570 6571 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6572 if parser: 6573 actions = ensure_list(parser(self)) 6574 options = self._parse_csv(self._parse_property) 6575 6576 if not self._curr and actions: 6577 return self.expression( 6578 exp.Alter, 6579 this=this, 6580 kind=alter_token.text.upper(), 6581 exists=exists, 6582 actions=actions, 6583 only=only, 6584 options=options, 6585 cluster=cluster, 6586 ) 6587 6588 return self._parse_as_command(start) 6589 6590 def _parse_merge(self) -> exp.Merge: 6591 self._match(TokenType.INTO) 6592 target = self._parse_table() 6593 6594 if target and self._match(TokenType.ALIAS, advance=False): 6595 target.set("alias", self._parse_table_alias()) 6596 6597 self._match(TokenType.USING) 6598 using = self._parse_table() 6599 6600 self._match(TokenType.ON) 6601 on = self._parse_assignment() 6602 6603 return self.expression( 6604 exp.Merge, 6605 this=target, 6606 using=using, 6607 on=on, 6608 expressions=self._parse_when_matched(), 6609 ) 6610 6611 def _parse_when_matched(self) -> t.List[exp.When]: 6612 whens = [] 6613 6614 while self._match(TokenType.WHEN): 6615 matched = not self._match(TokenType.NOT) 6616 self._match_text_seq("MATCHED") 6617 source = ( 6618 False 6619 if self._match_text_seq("BY", "TARGET") 6620 else self._match_text_seq("BY", "SOURCE") 6621 ) 6622 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6623 6624 self._match(TokenType.THEN) 6625 6626 if self._match(TokenType.INSERT): 6627 _this = self._parse_star() 6628 if _this: 6629 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6630 else: 6631 then = self.expression( 6632 exp.Insert, 6633 this=self._parse_value(), 6634 expression=self._match_text_seq("VALUES") and self._parse_value(), 6635 ) 6636 elif self._match(TokenType.UPDATE): 6637 expressions = self._parse_star() 6638 if expressions: 6639 then = self.expression(exp.Update, expressions=expressions) 6640 else: 6641 then = self.expression( 6642 exp.Update, 6643 expressions=self._match(TokenType.SET) 6644 and self._parse_csv(self._parse_equality), 6645 ) 6646 elif self._match(TokenType.DELETE): 6647 then = self.expression(exp.Var, this=self._prev.text) 6648 else: 6649 then = None 6650 6651 whens.append( 6652 self.expression( 6653 exp.When, 6654 matched=matched, 6655 source=source, 6656 condition=condition, 6657 then=then, 6658 ) 6659 ) 6660 return whens 6661 6662 def _parse_show(self) -> t.Optional[exp.Expression]: 6663 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6664 if parser: 6665 return parser(self) 6666 return self._parse_as_command(self._prev) 6667 6668 def _parse_set_item_assignment( 6669 self, kind: t.Optional[str] = None 6670 ) -> t.Optional[exp.Expression]: 6671 index = self._index 6672 6673 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6674 return self._parse_set_transaction(global_=kind == "GLOBAL") 6675 6676 left = self._parse_primary() or self._parse_column() 6677 assignment_delimiter = self._match_texts(("=", "TO")) 6678 6679 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6680 self._retreat(index) 6681 return None 6682 6683 right = self._parse_statement() or self._parse_id_var() 6684 if isinstance(right, (exp.Column, exp.Identifier)): 6685 right = exp.var(right.name) 6686 6687 this = self.expression(exp.EQ, this=left, expression=right) 6688 return self.expression(exp.SetItem, this=this, kind=kind) 6689 6690 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6691 self._match_text_seq("TRANSACTION") 6692 characteristics = self._parse_csv( 6693 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6694 ) 6695 return self.expression( 6696 exp.SetItem, 6697 expressions=characteristics, 6698 kind="TRANSACTION", 6699 **{"global": global_}, # type: ignore 6700 ) 6701 6702 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6703 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6704 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6705 6706 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6707 index = self._index 6708 set_ = self.expression( 6709 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6710 ) 6711 6712 if self._curr: 6713 self._retreat(index) 6714 return self._parse_as_command(self._prev) 6715 6716 return set_ 6717 6718 def _parse_var_from_options( 6719 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6720 ) -> t.Optional[exp.Var]: 6721 start = self._curr 6722 if not start: 6723 return None 6724 6725 option = start.text.upper() 6726 continuations = options.get(option) 6727 6728 index = self._index 6729 self._advance() 6730 for keywords in continuations or []: 6731 if isinstance(keywords, str): 6732 keywords = (keywords,) 6733 6734 if self._match_text_seq(*keywords): 6735 option = f"{option} {' '.join(keywords)}" 6736 break 6737 else: 6738 if continuations or continuations is None: 6739 if raise_unmatched: 6740 self.raise_error(f"Unknown option {option}") 6741 6742 self._retreat(index) 6743 return None 6744 6745 return exp.var(option) 6746 6747 def _parse_as_command(self, start: Token) -> exp.Command: 6748 while self._curr: 6749 self._advance() 6750 text = self._find_sql(start, self._prev) 6751 size = len(start.text) 6752 self._warn_unsupported() 6753 return exp.Command(this=text[:size], expression=text[size:]) 6754 6755 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6756 settings = [] 6757 6758 self._match_l_paren() 6759 kind = self._parse_id_var() 6760 6761 if self._match(TokenType.L_PAREN): 6762 while True: 6763 key = self._parse_id_var() 6764 value = self._parse_primary() 6765 6766 if not key and value is None: 6767 break 6768 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6769 self._match(TokenType.R_PAREN) 6770 6771 self._match_r_paren() 6772 6773 return self.expression( 6774 exp.DictProperty, 6775 this=this, 6776 kind=kind.this if kind else None, 6777 settings=settings, 6778 ) 6779 6780 def _parse_dict_range(self, this: str) -> exp.DictRange: 6781 self._match_l_paren() 6782 has_min = self._match_text_seq("MIN") 6783 if has_min: 6784 min = self._parse_var() or self._parse_primary() 6785 self._match_text_seq("MAX") 6786 max = self._parse_var() or self._parse_primary() 6787 else: 6788 max = self._parse_var() or self._parse_primary() 6789 min = exp.Literal.number(0) 6790 self._match_r_paren() 6791 return self.expression(exp.DictRange, this=this, min=min, max=max) 6792 6793 def _parse_comprehension( 6794 self, this: t.Optional[exp.Expression] 6795 ) -> t.Optional[exp.Comprehension]: 6796 index = self._index 6797 expression = self._parse_column() 6798 if not self._match(TokenType.IN): 6799 self._retreat(index - 1) 6800 return None 6801 iterator = self._parse_column() 6802 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6803 return self.expression( 6804 exp.Comprehension, 6805 this=this, 6806 expression=expression, 6807 iterator=iterator, 6808 condition=condition, 6809 ) 6810 6811 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6812 if self._match(TokenType.HEREDOC_STRING): 6813 return self.expression(exp.Heredoc, this=self._prev.text) 6814 6815 if not self._match_text_seq("$"): 6816 return None 6817 6818 tags = ["$"] 6819 tag_text = None 6820 6821 if self._is_connected(): 6822 self._advance() 6823 tags.append(self._prev.text.upper()) 6824 else: 6825 self.raise_error("No closing $ found") 6826 6827 if tags[-1] != "$": 6828 if self._is_connected() and self._match_text_seq("$"): 6829 tag_text = tags[-1] 6830 tags.append("$") 6831 else: 6832 self.raise_error("No closing $ found") 6833 6834 heredoc_start = self._curr 6835 6836 while self._curr: 6837 if self._match_text_seq(*tags, advance=False): 6838 this = self._find_sql(heredoc_start, self._prev) 6839 self._advance(len(tags)) 6840 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6841 6842 self._advance() 6843 6844 self.raise_error(f"No closing {''.join(tags)} found") 6845 return None 6846 6847 def _find_parser( 6848 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6849 ) -> t.Optional[t.Callable]: 6850 if not self._curr: 6851 return None 6852 6853 index = self._index 6854 this = [] 6855 while True: 6856 # The current token might be multiple words 6857 curr = self._curr.text.upper() 6858 key = curr.split(" ") 6859 this.append(curr) 6860 6861 self._advance() 6862 result, trie = in_trie(trie, key) 6863 if result == TrieResult.FAILED: 6864 break 6865 6866 if result == TrieResult.EXISTS: 6867 subparser = parsers[" ".join(this)] 6868 return subparser 6869 6870 self._retreat(index) 6871 return None 6872 6873 def _match(self, token_type, advance=True, expression=None): 6874 if not self._curr: 6875 return None 6876 6877 if self._curr.token_type == token_type: 6878 if advance: 6879 self._advance() 6880 self._add_comments(expression) 6881 return True 6882 6883 return None 6884 6885 def _match_set(self, types, advance=True): 6886 if not self._curr: 6887 return None 6888 6889 if self._curr.token_type in types: 6890 if advance: 6891 self._advance() 6892 return True 6893 6894 return None 6895 6896 def _match_pair(self, token_type_a, token_type_b, advance=True): 6897 if not self._curr or not self._next: 6898 return None 6899 6900 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6901 if advance: 6902 self._advance(2) 6903 return True 6904 6905 return None 6906 6907 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6908 if not self._match(TokenType.L_PAREN, expression=expression): 6909 self.raise_error("Expecting (") 6910 6911 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6912 if not self._match(TokenType.R_PAREN, expression=expression): 6913 self.raise_error("Expecting )") 6914 6915 def _match_texts(self, texts, advance=True): 6916 if self._curr and self._curr.text.upper() in texts: 6917 if advance: 6918 self._advance() 6919 return True 6920 return None 6921 6922 def _match_text_seq(self, *texts, advance=True): 6923 index = self._index 6924 for text in texts: 6925 if self._curr and self._curr.text.upper() == text: 6926 self._advance() 6927 else: 6928 self._retreat(index) 6929 return None 6930 6931 if not advance: 6932 self._retreat(index) 6933 6934 return True 6935 6936 def _replace_lambda( 6937 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6938 ) -> t.Optional[exp.Expression]: 6939 if not node: 6940 return node 6941 6942 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6943 6944 for column in node.find_all(exp.Column): 6945 typ = lambda_types.get(column.parts[0].name) 6946 if typ is not None: 6947 dot_or_id = column.to_dot() if column.table else column.this 6948 6949 if typ: 6950 dot_or_id = self.expression( 6951 exp.Cast, 6952 this=dot_or_id, 6953 to=typ, 6954 ) 6955 6956 parent = column.parent 6957 6958 while isinstance(parent, exp.Dot): 6959 if not isinstance(parent.parent, exp.Dot): 6960 parent.replace(dot_or_id) 6961 break 6962 parent = parent.parent 6963 else: 6964 if column is node: 6965 node = dot_or_id 6966 else: 6967 column.replace(dot_or_id) 6968 return node 6969 6970 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6971 start = self._prev 6972 6973 # Not to be confused with TRUNCATE(number, decimals) function call 6974 if self._match(TokenType.L_PAREN): 6975 self._retreat(self._index - 2) 6976 return self._parse_function() 6977 6978 # Clickhouse supports TRUNCATE DATABASE as well 6979 is_database = self._match(TokenType.DATABASE) 6980 6981 self._match(TokenType.TABLE) 6982 6983 exists = self._parse_exists(not_=False) 6984 6985 expressions = self._parse_csv( 6986 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6987 ) 6988 6989 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6990 6991 if self._match_text_seq("RESTART", "IDENTITY"): 6992 identity = "RESTART" 6993 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6994 identity = "CONTINUE" 6995 else: 6996 identity = None 6997 6998 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6999 option = self._prev.text 7000 else: 7001 option = None 7002 7003 partition = self._parse_partition() 7004 7005 # Fallback case 7006 if self._curr: 7007 return self._parse_as_command(start) 7008 7009 return self.expression( 7010 exp.TruncateTable, 7011 expressions=expressions, 7012 is_database=is_database, 7013 exists=exists, 7014 cluster=cluster, 7015 identity=identity, 7016 option=option, 7017 partition=partition, 7018 ) 7019 7020 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7021 this = self._parse_ordered(self._parse_opclass) 7022 7023 if not self._match(TokenType.WITH): 7024 return this 7025 7026 op = self._parse_var(any_token=True) 7027 7028 return self.expression(exp.WithOperator, this=this, op=op) 7029 7030 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7031 self._match(TokenType.EQ) 7032 self._match(TokenType.L_PAREN) 7033 7034 opts: t.List[t.Optional[exp.Expression]] = [] 7035 while self._curr and not self._match(TokenType.R_PAREN): 7036 if self._match_text_seq("FORMAT_NAME", "="): 7037 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7038 # so we parse it separately to use _parse_field() 7039 prop = self.expression( 7040 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7041 ) 7042 opts.append(prop) 7043 else: 7044 opts.append(self._parse_property()) 7045 7046 self._match(TokenType.COMMA) 7047 7048 return opts 7049 7050 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7051 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7052 7053 options = [] 7054 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7055 option = self._parse_var(any_token=True) 7056 prev = self._prev.text.upper() 7057 7058 # Different dialects might separate options and values by white space, "=" and "AS" 7059 self._match(TokenType.EQ) 7060 self._match(TokenType.ALIAS) 7061 7062 param = self.expression(exp.CopyParameter, this=option) 7063 7064 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7065 TokenType.L_PAREN, advance=False 7066 ): 7067 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7068 param.set("expressions", self._parse_wrapped_options()) 7069 elif prev == "FILE_FORMAT": 7070 # T-SQL's external file format case 7071 param.set("expression", self._parse_field()) 7072 else: 7073 param.set("expression", self._parse_unquoted_field()) 7074 7075 options.append(param) 7076 self._match(sep) 7077 7078 return options 7079 7080 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7081 expr = self.expression(exp.Credentials) 7082 7083 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7084 expr.set("storage", self._parse_field()) 7085 if self._match_text_seq("CREDENTIALS"): 7086 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7087 creds = ( 7088 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7089 ) 7090 expr.set("credentials", creds) 7091 if self._match_text_seq("ENCRYPTION"): 7092 expr.set("encryption", self._parse_wrapped_options()) 7093 if self._match_text_seq("IAM_ROLE"): 7094 expr.set("iam_role", self._parse_field()) 7095 if self._match_text_seq("REGION"): 7096 expr.set("region", self._parse_field()) 7097 7098 return expr 7099 7100 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7101 return self._parse_field() 7102 7103 def _parse_copy(self) -> exp.Copy | exp.Command: 7104 start = self._prev 7105 7106 self._match(TokenType.INTO) 7107 7108 this = ( 7109 self._parse_select(nested=True, parse_subquery_alias=False) 7110 if self._match(TokenType.L_PAREN, advance=False) 7111 else self._parse_table(schema=True) 7112 ) 7113 7114 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7115 7116 files = self._parse_csv(self._parse_file_location) 7117 credentials = self._parse_credentials() 7118 7119 self._match_text_seq("WITH") 7120 7121 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7122 7123 # Fallback case 7124 if self._curr: 7125 return self._parse_as_command(start) 7126 7127 return self.expression( 7128 exp.Copy, 7129 this=this, 7130 kind=kind, 7131 credentials=credentials, 7132 files=files, 7133 params=params, 7134 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
154class Parser(metaclass=_Parser): 155 """ 156 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 157 158 Args: 159 error_level: The desired error level. 160 Default: ErrorLevel.IMMEDIATE 161 error_message_context: The amount of context to capture from a query string when displaying 162 the error message (in number of characters). 163 Default: 100 164 max_errors: Maximum number of error messages to include in a raised ParseError. 165 This is only relevant if error_level is ErrorLevel.RAISE. 166 Default: 3 167 """ 168 169 FUNCTIONS: t.Dict[str, t.Callable] = { 170 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 171 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 172 "CONCAT": lambda args, dialect: exp.Concat( 173 expressions=args, 174 safe=not dialect.STRICT_STRING_CONCAT, 175 coalesce=dialect.CONCAT_COALESCE, 176 ), 177 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 178 expressions=args, 179 safe=not dialect.STRICT_STRING_CONCAT, 180 coalesce=dialect.CONCAT_COALESCE, 181 ), 182 "CONVERT_TIMEZONE": build_convert_timezone, 183 "DATE_TO_DATE_STR": lambda args: exp.Cast( 184 this=seq_get(args, 0), 185 to=exp.DataType(this=exp.DataType.Type.TEXT), 186 ), 187 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 188 start=seq_get(args, 0), 189 end=seq_get(args, 1), 190 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 191 ), 192 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 193 "HEX": build_hex, 194 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 195 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 196 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 197 "LIKE": build_like, 198 "LOG": build_logarithm, 199 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 200 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 201 "LOWER": build_lower, 202 "LPAD": lambda args: build_pad(args), 203 "LEFTPAD": lambda args: build_pad(args), 204 "MOD": build_mod, 205 "RPAD": lambda args: build_pad(args, is_left=False), 206 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 207 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 208 if len(args) != 2 209 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 210 "TIME_TO_TIME_STR": lambda args: exp.Cast( 211 this=seq_get(args, 0), 212 to=exp.DataType(this=exp.DataType.Type.TEXT), 213 ), 214 "TO_HEX": build_hex, 215 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 216 this=exp.Cast( 217 this=seq_get(args, 0), 218 to=exp.DataType(this=exp.DataType.Type.TEXT), 219 ), 220 start=exp.Literal.number(1), 221 length=exp.Literal.number(10), 222 ), 223 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 224 "UPPER": build_upper, 225 "VAR_MAP": build_var_map, 226 } 227 228 NO_PAREN_FUNCTIONS = { 229 TokenType.CURRENT_DATE: exp.CurrentDate, 230 TokenType.CURRENT_DATETIME: exp.CurrentDate, 231 TokenType.CURRENT_TIME: exp.CurrentTime, 232 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 233 TokenType.CURRENT_USER: exp.CurrentUser, 234 } 235 236 STRUCT_TYPE_TOKENS = { 237 TokenType.NESTED, 238 TokenType.OBJECT, 239 TokenType.STRUCT, 240 } 241 242 NESTED_TYPE_TOKENS = { 243 TokenType.ARRAY, 244 TokenType.LIST, 245 TokenType.LOWCARDINALITY, 246 TokenType.MAP, 247 TokenType.NULLABLE, 248 *STRUCT_TYPE_TOKENS, 249 } 250 251 ENUM_TYPE_TOKENS = { 252 TokenType.ENUM, 253 TokenType.ENUM8, 254 TokenType.ENUM16, 255 } 256 257 AGGREGATE_TYPE_TOKENS = { 258 TokenType.AGGREGATEFUNCTION, 259 TokenType.SIMPLEAGGREGATEFUNCTION, 260 } 261 262 TYPE_TOKENS = { 263 TokenType.BIT, 264 TokenType.BOOLEAN, 265 TokenType.TINYINT, 266 TokenType.UTINYINT, 267 TokenType.SMALLINT, 268 TokenType.USMALLINT, 269 TokenType.INT, 270 TokenType.UINT, 271 TokenType.BIGINT, 272 TokenType.UBIGINT, 273 TokenType.INT128, 274 TokenType.UINT128, 275 TokenType.INT256, 276 TokenType.UINT256, 277 TokenType.MEDIUMINT, 278 TokenType.UMEDIUMINT, 279 TokenType.FIXEDSTRING, 280 TokenType.FLOAT, 281 TokenType.DOUBLE, 282 TokenType.CHAR, 283 TokenType.NCHAR, 284 TokenType.VARCHAR, 285 TokenType.NVARCHAR, 286 TokenType.BPCHAR, 287 TokenType.TEXT, 288 TokenType.MEDIUMTEXT, 289 TokenType.LONGTEXT, 290 TokenType.MEDIUMBLOB, 291 TokenType.LONGBLOB, 292 TokenType.BINARY, 293 TokenType.VARBINARY, 294 TokenType.JSON, 295 TokenType.JSONB, 296 TokenType.INTERVAL, 297 TokenType.TINYBLOB, 298 TokenType.TINYTEXT, 299 TokenType.TIME, 300 TokenType.TIMETZ, 301 TokenType.TIMESTAMP, 302 TokenType.TIMESTAMP_S, 303 TokenType.TIMESTAMP_MS, 304 TokenType.TIMESTAMP_NS, 305 TokenType.TIMESTAMPTZ, 306 TokenType.TIMESTAMPLTZ, 307 TokenType.TIMESTAMPNTZ, 308 TokenType.DATETIME, 309 TokenType.DATETIME64, 310 TokenType.DATE, 311 TokenType.DATE32, 312 TokenType.INT4RANGE, 313 TokenType.INT4MULTIRANGE, 314 TokenType.INT8RANGE, 315 TokenType.INT8MULTIRANGE, 316 TokenType.NUMRANGE, 317 TokenType.NUMMULTIRANGE, 318 TokenType.TSRANGE, 319 TokenType.TSMULTIRANGE, 320 TokenType.TSTZRANGE, 321 TokenType.TSTZMULTIRANGE, 322 TokenType.DATERANGE, 323 TokenType.DATEMULTIRANGE, 324 TokenType.DECIMAL, 325 TokenType.UDECIMAL, 326 TokenType.BIGDECIMAL, 327 TokenType.UUID, 328 TokenType.GEOGRAPHY, 329 TokenType.GEOMETRY, 330 TokenType.HLLSKETCH, 331 TokenType.HSTORE, 332 TokenType.PSEUDO_TYPE, 333 TokenType.SUPER, 334 TokenType.SERIAL, 335 TokenType.SMALLSERIAL, 336 TokenType.BIGSERIAL, 337 TokenType.XML, 338 TokenType.YEAR, 339 TokenType.UNIQUEIDENTIFIER, 340 TokenType.USERDEFINED, 341 TokenType.MONEY, 342 TokenType.SMALLMONEY, 343 TokenType.ROWVERSION, 344 TokenType.IMAGE, 345 TokenType.VARIANT, 346 TokenType.VECTOR, 347 TokenType.OBJECT, 348 TokenType.OBJECT_IDENTIFIER, 349 TokenType.INET, 350 TokenType.IPADDRESS, 351 TokenType.IPPREFIX, 352 TokenType.IPV4, 353 TokenType.IPV6, 354 TokenType.UNKNOWN, 355 TokenType.NULL, 356 TokenType.NAME, 357 TokenType.TDIGEST, 358 *ENUM_TYPE_TOKENS, 359 *NESTED_TYPE_TOKENS, 360 *AGGREGATE_TYPE_TOKENS, 361 } 362 363 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 364 TokenType.BIGINT: TokenType.UBIGINT, 365 TokenType.INT: TokenType.UINT, 366 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 367 TokenType.SMALLINT: TokenType.USMALLINT, 368 TokenType.TINYINT: TokenType.UTINYINT, 369 TokenType.DECIMAL: TokenType.UDECIMAL, 370 } 371 372 SUBQUERY_PREDICATES = { 373 TokenType.ANY: exp.Any, 374 TokenType.ALL: exp.All, 375 TokenType.EXISTS: exp.Exists, 376 TokenType.SOME: exp.Any, 377 } 378 379 RESERVED_TOKENS = { 380 *Tokenizer.SINGLE_TOKENS.values(), 381 TokenType.SELECT, 382 } - {TokenType.IDENTIFIER} 383 384 DB_CREATABLES = { 385 TokenType.DATABASE, 386 TokenType.DICTIONARY, 387 TokenType.MODEL, 388 TokenType.SCHEMA, 389 TokenType.SEQUENCE, 390 TokenType.STORAGE_INTEGRATION, 391 TokenType.TABLE, 392 TokenType.TAG, 393 TokenType.VIEW, 394 TokenType.WAREHOUSE, 395 TokenType.STREAMLIT, 396 } 397 398 CREATABLES = { 399 TokenType.COLUMN, 400 TokenType.CONSTRAINT, 401 TokenType.FOREIGN_KEY, 402 TokenType.FUNCTION, 403 TokenType.INDEX, 404 TokenType.PROCEDURE, 405 *DB_CREATABLES, 406 } 407 408 ALTERABLES = { 409 TokenType.TABLE, 410 TokenType.VIEW, 411 } 412 413 # Tokens that can represent identifiers 414 ID_VAR_TOKENS = { 415 TokenType.ALL, 416 TokenType.VAR, 417 TokenType.ANTI, 418 TokenType.APPLY, 419 TokenType.ASC, 420 TokenType.ASOF, 421 TokenType.AUTO_INCREMENT, 422 TokenType.BEGIN, 423 TokenType.BPCHAR, 424 TokenType.CACHE, 425 TokenType.CASE, 426 TokenType.COLLATE, 427 TokenType.COMMAND, 428 TokenType.COMMENT, 429 TokenType.COMMIT, 430 TokenType.CONSTRAINT, 431 TokenType.COPY, 432 TokenType.CUBE, 433 TokenType.DEFAULT, 434 TokenType.DELETE, 435 TokenType.DESC, 436 TokenType.DESCRIBE, 437 TokenType.DICTIONARY, 438 TokenType.DIV, 439 TokenType.END, 440 TokenType.EXECUTE, 441 TokenType.ESCAPE, 442 TokenType.FALSE, 443 TokenType.FIRST, 444 TokenType.FILTER, 445 TokenType.FINAL, 446 TokenType.FORMAT, 447 TokenType.FULL, 448 TokenType.IDENTIFIER, 449 TokenType.IS, 450 TokenType.ISNULL, 451 TokenType.INTERVAL, 452 TokenType.KEEP, 453 TokenType.KILL, 454 TokenType.LEFT, 455 TokenType.LOAD, 456 TokenType.MERGE, 457 TokenType.NATURAL, 458 TokenType.NEXT, 459 TokenType.OFFSET, 460 TokenType.OPERATOR, 461 TokenType.ORDINALITY, 462 TokenType.OVERLAPS, 463 TokenType.OVERWRITE, 464 TokenType.PARTITION, 465 TokenType.PERCENT, 466 TokenType.PIVOT, 467 TokenType.PRAGMA, 468 TokenType.RANGE, 469 TokenType.RECURSIVE, 470 TokenType.REFERENCES, 471 TokenType.REFRESH, 472 TokenType.RENAME, 473 TokenType.REPLACE, 474 TokenType.RIGHT, 475 TokenType.ROLLUP, 476 TokenType.ROW, 477 TokenType.ROWS, 478 TokenType.SEMI, 479 TokenType.SET, 480 TokenType.SETTINGS, 481 TokenType.SHOW, 482 TokenType.TEMPORARY, 483 TokenType.TOP, 484 TokenType.TRUE, 485 TokenType.TRUNCATE, 486 TokenType.UNIQUE, 487 TokenType.UNNEST, 488 TokenType.UNPIVOT, 489 TokenType.UPDATE, 490 TokenType.USE, 491 TokenType.VOLATILE, 492 TokenType.WINDOW, 493 *CREATABLES, 494 *SUBQUERY_PREDICATES, 495 *TYPE_TOKENS, 496 *NO_PAREN_FUNCTIONS, 497 } 498 499 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 500 501 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 502 TokenType.ANTI, 503 TokenType.APPLY, 504 TokenType.ASOF, 505 TokenType.FULL, 506 TokenType.LEFT, 507 TokenType.LOCK, 508 TokenType.NATURAL, 509 TokenType.OFFSET, 510 TokenType.RIGHT, 511 TokenType.SEMI, 512 TokenType.WINDOW, 513 } 514 515 ALIAS_TOKENS = ID_VAR_TOKENS 516 517 ARRAY_CONSTRUCTORS = { 518 "ARRAY": exp.Array, 519 "LIST": exp.List, 520 } 521 522 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 523 524 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 525 526 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 527 528 FUNC_TOKENS = { 529 TokenType.COLLATE, 530 TokenType.COMMAND, 531 TokenType.CURRENT_DATE, 532 TokenType.CURRENT_DATETIME, 533 TokenType.CURRENT_TIMESTAMP, 534 TokenType.CURRENT_TIME, 535 TokenType.CURRENT_USER, 536 TokenType.FILTER, 537 TokenType.FIRST, 538 TokenType.FORMAT, 539 TokenType.GLOB, 540 TokenType.IDENTIFIER, 541 TokenType.INDEX, 542 TokenType.ISNULL, 543 TokenType.ILIKE, 544 TokenType.INSERT, 545 TokenType.LIKE, 546 TokenType.MERGE, 547 TokenType.OFFSET, 548 TokenType.PRIMARY_KEY, 549 TokenType.RANGE, 550 TokenType.REPLACE, 551 TokenType.RLIKE, 552 TokenType.ROW, 553 TokenType.UNNEST, 554 TokenType.VAR, 555 TokenType.LEFT, 556 TokenType.RIGHT, 557 TokenType.SEQUENCE, 558 TokenType.DATE, 559 TokenType.DATETIME, 560 TokenType.TABLE, 561 TokenType.TIMESTAMP, 562 TokenType.TIMESTAMPTZ, 563 TokenType.TRUNCATE, 564 TokenType.WINDOW, 565 TokenType.XOR, 566 *TYPE_TOKENS, 567 *SUBQUERY_PREDICATES, 568 } 569 570 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 571 TokenType.AND: exp.And, 572 } 573 574 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 575 TokenType.COLON_EQ: exp.PropertyEQ, 576 } 577 578 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 579 TokenType.OR: exp.Or, 580 } 581 582 EQUALITY = { 583 TokenType.EQ: exp.EQ, 584 TokenType.NEQ: exp.NEQ, 585 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 586 } 587 588 COMPARISON = { 589 TokenType.GT: exp.GT, 590 TokenType.GTE: exp.GTE, 591 TokenType.LT: exp.LT, 592 TokenType.LTE: exp.LTE, 593 } 594 595 BITWISE = { 596 TokenType.AMP: exp.BitwiseAnd, 597 TokenType.CARET: exp.BitwiseXor, 598 TokenType.PIPE: exp.BitwiseOr, 599 } 600 601 TERM = { 602 TokenType.DASH: exp.Sub, 603 TokenType.PLUS: exp.Add, 604 TokenType.MOD: exp.Mod, 605 TokenType.COLLATE: exp.Collate, 606 } 607 608 FACTOR = { 609 TokenType.DIV: exp.IntDiv, 610 TokenType.LR_ARROW: exp.Distance, 611 TokenType.SLASH: exp.Div, 612 TokenType.STAR: exp.Mul, 613 } 614 615 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 616 617 TIMES = { 618 TokenType.TIME, 619 TokenType.TIMETZ, 620 } 621 622 TIMESTAMPS = { 623 TokenType.TIMESTAMP, 624 TokenType.TIMESTAMPTZ, 625 TokenType.TIMESTAMPLTZ, 626 *TIMES, 627 } 628 629 SET_OPERATIONS = { 630 TokenType.UNION, 631 TokenType.INTERSECT, 632 TokenType.EXCEPT, 633 } 634 635 JOIN_METHODS = { 636 TokenType.ASOF, 637 TokenType.NATURAL, 638 TokenType.POSITIONAL, 639 } 640 641 JOIN_SIDES = { 642 TokenType.LEFT, 643 TokenType.RIGHT, 644 TokenType.FULL, 645 } 646 647 JOIN_KINDS = { 648 TokenType.ANTI, 649 TokenType.CROSS, 650 TokenType.INNER, 651 TokenType.OUTER, 652 TokenType.SEMI, 653 TokenType.STRAIGHT_JOIN, 654 } 655 656 JOIN_HINTS: t.Set[str] = set() 657 658 LAMBDAS = { 659 TokenType.ARROW: lambda self, expressions: self.expression( 660 exp.Lambda, 661 this=self._replace_lambda( 662 self._parse_assignment(), 663 expressions, 664 ), 665 expressions=expressions, 666 ), 667 TokenType.FARROW: lambda self, expressions: self.expression( 668 exp.Kwarg, 669 this=exp.var(expressions[0].name), 670 expression=self._parse_assignment(), 671 ), 672 } 673 674 COLUMN_OPERATORS = { 675 TokenType.DOT: None, 676 TokenType.DCOLON: lambda self, this, to: self.expression( 677 exp.Cast if self.STRICT_CAST else exp.TryCast, 678 this=this, 679 to=to, 680 ), 681 TokenType.ARROW: lambda self, this, path: self.expression( 682 exp.JSONExtract, 683 this=this, 684 expression=self.dialect.to_json_path(path), 685 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 686 ), 687 TokenType.DARROW: lambda self, this, path: self.expression( 688 exp.JSONExtractScalar, 689 this=this, 690 expression=self.dialect.to_json_path(path), 691 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 692 ), 693 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 694 exp.JSONBExtract, 695 this=this, 696 expression=path, 697 ), 698 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 699 exp.JSONBExtractScalar, 700 this=this, 701 expression=path, 702 ), 703 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 704 exp.JSONBContains, 705 this=this, 706 expression=key, 707 ), 708 } 709 710 EXPRESSION_PARSERS = { 711 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 712 exp.Column: lambda self: self._parse_column(), 713 exp.Condition: lambda self: self._parse_assignment(), 714 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 715 exp.Expression: lambda self: self._parse_expression(), 716 exp.From: lambda self: self._parse_from(joins=True), 717 exp.Group: lambda self: self._parse_group(), 718 exp.Having: lambda self: self._parse_having(), 719 exp.Identifier: lambda self: self._parse_id_var(), 720 exp.Join: lambda self: self._parse_join(), 721 exp.Lambda: lambda self: self._parse_lambda(), 722 exp.Lateral: lambda self: self._parse_lateral(), 723 exp.Limit: lambda self: self._parse_limit(), 724 exp.Offset: lambda self: self._parse_offset(), 725 exp.Order: lambda self: self._parse_order(), 726 exp.Ordered: lambda self: self._parse_ordered(), 727 exp.Properties: lambda self: self._parse_properties(), 728 exp.Qualify: lambda self: self._parse_qualify(), 729 exp.Returning: lambda self: self._parse_returning(), 730 exp.Select: lambda self: self._parse_select(), 731 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 732 exp.Table: lambda self: self._parse_table_parts(), 733 exp.TableAlias: lambda self: self._parse_table_alias(), 734 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 735 exp.Where: lambda self: self._parse_where(), 736 exp.Window: lambda self: self._parse_named_window(), 737 exp.With: lambda self: self._parse_with(), 738 "JOIN_TYPE": lambda self: self._parse_join_parts(), 739 } 740 741 STATEMENT_PARSERS = { 742 TokenType.ALTER: lambda self: self._parse_alter(), 743 TokenType.BEGIN: lambda self: self._parse_transaction(), 744 TokenType.CACHE: lambda self: self._parse_cache(), 745 TokenType.COMMENT: lambda self: self._parse_comment(), 746 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 747 TokenType.COPY: lambda self: self._parse_copy(), 748 TokenType.CREATE: lambda self: self._parse_create(), 749 TokenType.DELETE: lambda self: self._parse_delete(), 750 TokenType.DESC: lambda self: self._parse_describe(), 751 TokenType.DESCRIBE: lambda self: self._parse_describe(), 752 TokenType.DROP: lambda self: self._parse_drop(), 753 TokenType.INSERT: lambda self: self._parse_insert(), 754 TokenType.KILL: lambda self: self._parse_kill(), 755 TokenType.LOAD: lambda self: self._parse_load(), 756 TokenType.MERGE: lambda self: self._parse_merge(), 757 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 758 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 759 TokenType.REFRESH: lambda self: self._parse_refresh(), 760 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 761 TokenType.SET: lambda self: self._parse_set(), 762 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 763 TokenType.UNCACHE: lambda self: self._parse_uncache(), 764 TokenType.UPDATE: lambda self: self._parse_update(), 765 TokenType.USE: lambda self: self.expression( 766 exp.Use, 767 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 768 this=self._parse_table(schema=False), 769 ), 770 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 771 } 772 773 UNARY_PARSERS = { 774 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 775 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 776 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 777 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 778 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 779 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 780 } 781 782 STRING_PARSERS = { 783 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 784 exp.RawString, this=token.text 785 ), 786 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 787 exp.National, this=token.text 788 ), 789 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 790 TokenType.STRING: lambda self, token: self.expression( 791 exp.Literal, this=token.text, is_string=True 792 ), 793 TokenType.UNICODE_STRING: lambda self, token: self.expression( 794 exp.UnicodeString, 795 this=token.text, 796 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 797 ), 798 } 799 800 NUMERIC_PARSERS = { 801 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 802 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 803 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 804 TokenType.NUMBER: lambda self, token: self.expression( 805 exp.Literal, this=token.text, is_string=False 806 ), 807 } 808 809 PRIMARY_PARSERS = { 810 **STRING_PARSERS, 811 **NUMERIC_PARSERS, 812 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 813 TokenType.NULL: lambda self, _: self.expression(exp.Null), 814 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 815 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 816 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 817 TokenType.STAR: lambda self, _: self.expression( 818 exp.Star, 819 **{ 820 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 821 "replace": self._parse_star_op("REPLACE"), 822 "rename": self._parse_star_op("RENAME"), 823 }, 824 ), 825 } 826 827 PLACEHOLDER_PARSERS = { 828 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 829 TokenType.PARAMETER: lambda self: self._parse_parameter(), 830 TokenType.COLON: lambda self: ( 831 self.expression(exp.Placeholder, this=self._prev.text) 832 if self._match_set(self.ID_VAR_TOKENS) 833 else None 834 ), 835 } 836 837 RANGE_PARSERS = { 838 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 839 TokenType.GLOB: binary_range_parser(exp.Glob), 840 TokenType.ILIKE: binary_range_parser(exp.ILike), 841 TokenType.IN: lambda self, this: self._parse_in(this), 842 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 843 TokenType.IS: lambda self, this: self._parse_is(this), 844 TokenType.LIKE: binary_range_parser(exp.Like), 845 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 846 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 847 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 848 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 849 } 850 851 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 852 "ALLOWED_VALUES": lambda self: self.expression( 853 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 854 ), 855 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 856 "AUTO": lambda self: self._parse_auto_property(), 857 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 858 "BACKUP": lambda self: self.expression( 859 exp.BackupProperty, this=self._parse_var(any_token=True) 860 ), 861 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 862 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 863 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 864 "CHECKSUM": lambda self: self._parse_checksum(), 865 "CLUSTER BY": lambda self: self._parse_cluster(), 866 "CLUSTERED": lambda self: self._parse_clustered_by(), 867 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 868 exp.CollateProperty, **kwargs 869 ), 870 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 871 "CONTAINS": lambda self: self._parse_contains_property(), 872 "COPY": lambda self: self._parse_copy_property(), 873 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 874 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 875 "DEFINER": lambda self: self._parse_definer(), 876 "DETERMINISTIC": lambda self: self.expression( 877 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 878 ), 879 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 880 "DISTKEY": lambda self: self._parse_distkey(), 881 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 882 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 883 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 884 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 885 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 886 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 887 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 888 "FREESPACE": lambda self: self._parse_freespace(), 889 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 890 "HEAP": lambda self: self.expression(exp.HeapProperty), 891 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 892 "IMMUTABLE": lambda self: self.expression( 893 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 894 ), 895 "INHERITS": lambda self: self.expression( 896 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 897 ), 898 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 899 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 900 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 901 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 902 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 903 "LIKE": lambda self: self._parse_create_like(), 904 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 905 "LOCK": lambda self: self._parse_locking(), 906 "LOCKING": lambda self: self._parse_locking(), 907 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 908 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 909 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 910 "MODIFIES": lambda self: self._parse_modifies_property(), 911 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 912 "NO": lambda self: self._parse_no_property(), 913 "ON": lambda self: self._parse_on_property(), 914 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 915 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 916 "PARTITION": lambda self: self._parse_partitioned_of(), 917 "PARTITION BY": lambda self: self._parse_partitioned_by(), 918 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 919 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 920 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 921 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 922 "READS": lambda self: self._parse_reads_property(), 923 "REMOTE": lambda self: self._parse_remote_with_connection(), 924 "RETURNS": lambda self: self._parse_returns(), 925 "STRICT": lambda self: self.expression(exp.StrictProperty), 926 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 927 "ROW": lambda self: self._parse_row(), 928 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 929 "SAMPLE": lambda self: self.expression( 930 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 931 ), 932 "SECURE": lambda self: self.expression(exp.SecureProperty), 933 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 934 "SETTINGS": lambda self: self._parse_settings_property(), 935 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 936 "SORTKEY": lambda self: self._parse_sortkey(), 937 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 938 "STABLE": lambda self: self.expression( 939 exp.StabilityProperty, this=exp.Literal.string("STABLE") 940 ), 941 "STORED": lambda self: self._parse_stored(), 942 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 943 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 944 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 945 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 946 "TO": lambda self: self._parse_to_table(), 947 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 948 "TRANSFORM": lambda self: self.expression( 949 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 950 ), 951 "TTL": lambda self: self._parse_ttl(), 952 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 953 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 954 "VOLATILE": lambda self: self._parse_volatile_property(), 955 "WITH": lambda self: self._parse_with_property(), 956 } 957 958 CONSTRAINT_PARSERS = { 959 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 960 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 961 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 962 "CHARACTER SET": lambda self: self.expression( 963 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 964 ), 965 "CHECK": lambda self: self.expression( 966 exp.CheckColumnConstraint, 967 this=self._parse_wrapped(self._parse_assignment), 968 enforced=self._match_text_seq("ENFORCED"), 969 ), 970 "COLLATE": lambda self: self.expression( 971 exp.CollateColumnConstraint, 972 this=self._parse_identifier() or self._parse_column(), 973 ), 974 "COMMENT": lambda self: self.expression( 975 exp.CommentColumnConstraint, this=self._parse_string() 976 ), 977 "COMPRESS": lambda self: self._parse_compress(), 978 "CLUSTERED": lambda self: self.expression( 979 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 980 ), 981 "NONCLUSTERED": lambda self: self.expression( 982 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 983 ), 984 "DEFAULT": lambda self: self.expression( 985 exp.DefaultColumnConstraint, this=self._parse_bitwise() 986 ), 987 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 988 "EPHEMERAL": lambda self: self.expression( 989 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 990 ), 991 "EXCLUDE": lambda self: self.expression( 992 exp.ExcludeColumnConstraint, this=self._parse_index_params() 993 ), 994 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 995 "FORMAT": lambda self: self.expression( 996 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 997 ), 998 "GENERATED": lambda self: self._parse_generated_as_identity(), 999 "IDENTITY": lambda self: self._parse_auto_increment(), 1000 "INLINE": lambda self: self._parse_inline(), 1001 "LIKE": lambda self: self._parse_create_like(), 1002 "NOT": lambda self: self._parse_not_constraint(), 1003 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1004 "ON": lambda self: ( 1005 self._match(TokenType.UPDATE) 1006 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1007 ) 1008 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1009 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1010 "PERIOD": lambda self: self._parse_period_for_system_time(), 1011 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1012 "REFERENCES": lambda self: self._parse_references(match=False), 1013 "TITLE": lambda self: self.expression( 1014 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1015 ), 1016 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1017 "UNIQUE": lambda self: self._parse_unique(), 1018 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1019 "WITH": lambda self: self.expression( 1020 exp.Properties, expressions=self._parse_wrapped_properties() 1021 ), 1022 } 1023 1024 ALTER_PARSERS = { 1025 "ADD": lambda self: self._parse_alter_table_add(), 1026 "ALTER": lambda self: self._parse_alter_table_alter(), 1027 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1028 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1029 "DROP": lambda self: self._parse_alter_table_drop(), 1030 "RENAME": lambda self: self._parse_alter_table_rename(), 1031 "SET": lambda self: self._parse_alter_table_set(), 1032 "AS": lambda self: self._parse_select(), 1033 } 1034 1035 ALTER_ALTER_PARSERS = { 1036 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1037 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1038 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1039 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1040 } 1041 1042 SCHEMA_UNNAMED_CONSTRAINTS = { 1043 "CHECK", 1044 "EXCLUDE", 1045 "FOREIGN KEY", 1046 "LIKE", 1047 "PERIOD", 1048 "PRIMARY KEY", 1049 "UNIQUE", 1050 } 1051 1052 NO_PAREN_FUNCTION_PARSERS = { 1053 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1054 "CASE": lambda self: self._parse_case(), 1055 "CONNECT_BY_ROOT": lambda self: self.expression( 1056 exp.ConnectByRoot, this=self._parse_column() 1057 ), 1058 "IF": lambda self: self._parse_if(), 1059 "NEXT": lambda self: self._parse_next_value_for(), 1060 } 1061 1062 INVALID_FUNC_NAME_TOKENS = { 1063 TokenType.IDENTIFIER, 1064 TokenType.STRING, 1065 } 1066 1067 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1068 1069 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1070 1071 FUNCTION_PARSERS = { 1072 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1073 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1074 "DECODE": lambda self: self._parse_decode(), 1075 "EXTRACT": lambda self: self._parse_extract(), 1076 "GAP_FILL": lambda self: self._parse_gap_fill(), 1077 "JSON_OBJECT": lambda self: self._parse_json_object(), 1078 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1079 "JSON_TABLE": lambda self: self._parse_json_table(), 1080 "MATCH": lambda self: self._parse_match_against(), 1081 "OPENJSON": lambda self: self._parse_open_json(), 1082 "POSITION": lambda self: self._parse_position(), 1083 "PREDICT": lambda self: self._parse_predict(), 1084 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1085 "STRING_AGG": lambda self: self._parse_string_agg(), 1086 "SUBSTRING": lambda self: self._parse_substring(), 1087 "TRIM": lambda self: self._parse_trim(), 1088 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1089 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1090 } 1091 1092 QUERY_MODIFIER_PARSERS = { 1093 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1094 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1095 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1096 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1097 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1098 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1099 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1100 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1101 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1102 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1103 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1104 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1105 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1106 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1107 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1108 TokenType.CLUSTER_BY: lambda self: ( 1109 "cluster", 1110 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1111 ), 1112 TokenType.DISTRIBUTE_BY: lambda self: ( 1113 "distribute", 1114 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1115 ), 1116 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1117 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1118 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1119 } 1120 1121 SET_PARSERS = { 1122 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1123 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1124 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1125 "TRANSACTION": lambda self: self._parse_set_transaction(), 1126 } 1127 1128 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1129 1130 TYPE_LITERAL_PARSERS = { 1131 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1132 } 1133 1134 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1135 1136 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1137 1138 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1139 1140 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1141 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1142 "ISOLATION": ( 1143 ("LEVEL", "REPEATABLE", "READ"), 1144 ("LEVEL", "READ", "COMMITTED"), 1145 ("LEVEL", "READ", "UNCOMITTED"), 1146 ("LEVEL", "SERIALIZABLE"), 1147 ), 1148 "READ": ("WRITE", "ONLY"), 1149 } 1150 1151 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1152 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1153 ) 1154 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1155 1156 CREATE_SEQUENCE: OPTIONS_TYPE = { 1157 "SCALE": ("EXTEND", "NOEXTEND"), 1158 "SHARD": ("EXTEND", "NOEXTEND"), 1159 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1160 **dict.fromkeys( 1161 ( 1162 "SESSION", 1163 "GLOBAL", 1164 "KEEP", 1165 "NOKEEP", 1166 "ORDER", 1167 "NOORDER", 1168 "NOCACHE", 1169 "CYCLE", 1170 "NOCYCLE", 1171 "NOMINVALUE", 1172 "NOMAXVALUE", 1173 "NOSCALE", 1174 "NOSHARD", 1175 ), 1176 tuple(), 1177 ), 1178 } 1179 1180 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1181 1182 USABLES: OPTIONS_TYPE = dict.fromkeys( 1183 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1184 ) 1185 1186 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1187 1188 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1189 "TYPE": ("EVOLUTION",), 1190 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1191 } 1192 1193 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1194 "NOT": ("ENFORCED",), 1195 "MATCH": ( 1196 "FULL", 1197 "PARTIAL", 1198 "SIMPLE", 1199 ), 1200 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1201 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1202 } 1203 1204 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1205 1206 CLONE_KEYWORDS = {"CLONE", "COPY"} 1207 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1208 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1209 1210 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1211 1212 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1213 1214 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1215 1216 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1217 1218 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1219 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1220 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1221 1222 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1223 1224 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1225 1226 ADD_CONSTRAINT_TOKENS = { 1227 TokenType.CONSTRAINT, 1228 TokenType.FOREIGN_KEY, 1229 TokenType.INDEX, 1230 TokenType.KEY, 1231 TokenType.PRIMARY_KEY, 1232 TokenType.UNIQUE, 1233 } 1234 1235 DISTINCT_TOKENS = {TokenType.DISTINCT} 1236 1237 NULL_TOKENS = {TokenType.NULL} 1238 1239 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1240 1241 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1242 1243 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1244 1245 STRICT_CAST = True 1246 1247 PREFIXED_PIVOT_COLUMNS = False 1248 IDENTIFY_PIVOT_STRINGS = False 1249 1250 LOG_DEFAULTS_TO_LN = False 1251 1252 # Whether ADD is present for each column added by ALTER TABLE 1253 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1254 1255 # Whether the table sample clause expects CSV syntax 1256 TABLESAMPLE_CSV = False 1257 1258 # The default method used for table sampling 1259 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1260 1261 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1262 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1263 1264 # Whether the TRIM function expects the characters to trim as its first argument 1265 TRIM_PATTERN_FIRST = False 1266 1267 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1268 STRING_ALIASES = False 1269 1270 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1271 MODIFIERS_ATTACHED_TO_SET_OP = True 1272 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1273 1274 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1275 NO_PAREN_IF_COMMANDS = True 1276 1277 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1278 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1279 1280 # Whether the `:` operator is used to extract a value from a VARIANT column 1281 COLON_IS_VARIANT_EXTRACT = False 1282 1283 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1284 # If this is True and '(' is not found, the keyword will be treated as an identifier 1285 VALUES_FOLLOWED_BY_PAREN = True 1286 1287 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1288 SUPPORTS_IMPLICIT_UNNEST = False 1289 1290 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1291 INTERVAL_SPANS = True 1292 1293 # Whether a PARTITION clause can follow a table reference 1294 SUPPORTS_PARTITION_SELECTION = False 1295 1296 __slots__ = ( 1297 "error_level", 1298 "error_message_context", 1299 "max_errors", 1300 "dialect", 1301 "sql", 1302 "errors", 1303 "_tokens", 1304 "_index", 1305 "_curr", 1306 "_next", 1307 "_prev", 1308 "_prev_comments", 1309 ) 1310 1311 # Autofilled 1312 SHOW_TRIE: t.Dict = {} 1313 SET_TRIE: t.Dict = {} 1314 1315 def __init__( 1316 self, 1317 error_level: t.Optional[ErrorLevel] = None, 1318 error_message_context: int = 100, 1319 max_errors: int = 3, 1320 dialect: DialectType = None, 1321 ): 1322 from sqlglot.dialects import Dialect 1323 1324 self.error_level = error_level or ErrorLevel.IMMEDIATE 1325 self.error_message_context = error_message_context 1326 self.max_errors = max_errors 1327 self.dialect = Dialect.get_or_raise(dialect) 1328 self.reset() 1329 1330 def reset(self): 1331 self.sql = "" 1332 self.errors = [] 1333 self._tokens = [] 1334 self._index = 0 1335 self._curr = None 1336 self._next = None 1337 self._prev = None 1338 self._prev_comments = None 1339 1340 def parse( 1341 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1342 ) -> t.List[t.Optional[exp.Expression]]: 1343 """ 1344 Parses a list of tokens and returns a list of syntax trees, one tree 1345 per parsed SQL statement. 1346 1347 Args: 1348 raw_tokens: The list of tokens. 1349 sql: The original SQL string, used to produce helpful debug messages. 1350 1351 Returns: 1352 The list of the produced syntax trees. 1353 """ 1354 return self._parse( 1355 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1356 ) 1357 1358 def parse_into( 1359 self, 1360 expression_types: exp.IntoType, 1361 raw_tokens: t.List[Token], 1362 sql: t.Optional[str] = None, 1363 ) -> t.List[t.Optional[exp.Expression]]: 1364 """ 1365 Parses a list of tokens into a given Expression type. If a collection of Expression 1366 types is given instead, this method will try to parse the token list into each one 1367 of them, stopping at the first for which the parsing succeeds. 1368 1369 Args: 1370 expression_types: The expression type(s) to try and parse the token list into. 1371 raw_tokens: The list of tokens. 1372 sql: The original SQL string, used to produce helpful debug messages. 1373 1374 Returns: 1375 The target Expression. 1376 """ 1377 errors = [] 1378 for expression_type in ensure_list(expression_types): 1379 parser = self.EXPRESSION_PARSERS.get(expression_type) 1380 if not parser: 1381 raise TypeError(f"No parser registered for {expression_type}") 1382 1383 try: 1384 return self._parse(parser, raw_tokens, sql) 1385 except ParseError as e: 1386 e.errors[0]["into_expression"] = expression_type 1387 errors.append(e) 1388 1389 raise ParseError( 1390 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1391 errors=merge_errors(errors), 1392 ) from errors[-1] 1393 1394 def _parse( 1395 self, 1396 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1397 raw_tokens: t.List[Token], 1398 sql: t.Optional[str] = None, 1399 ) -> t.List[t.Optional[exp.Expression]]: 1400 self.reset() 1401 self.sql = sql or "" 1402 1403 total = len(raw_tokens) 1404 chunks: t.List[t.List[Token]] = [[]] 1405 1406 for i, token in enumerate(raw_tokens): 1407 if token.token_type == TokenType.SEMICOLON: 1408 if token.comments: 1409 chunks.append([token]) 1410 1411 if i < total - 1: 1412 chunks.append([]) 1413 else: 1414 chunks[-1].append(token) 1415 1416 expressions = [] 1417 1418 for tokens in chunks: 1419 self._index = -1 1420 self._tokens = tokens 1421 self._advance() 1422 1423 expressions.append(parse_method(self)) 1424 1425 if self._index < len(self._tokens): 1426 self.raise_error("Invalid expression / Unexpected token") 1427 1428 self.check_errors() 1429 1430 return expressions 1431 1432 def check_errors(self) -> None: 1433 """Logs or raises any found errors, depending on the chosen error level setting.""" 1434 if self.error_level == ErrorLevel.WARN: 1435 for error in self.errors: 1436 logger.error(str(error)) 1437 elif self.error_level == ErrorLevel.RAISE and self.errors: 1438 raise ParseError( 1439 concat_messages(self.errors, self.max_errors), 1440 errors=merge_errors(self.errors), 1441 ) 1442 1443 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1444 """ 1445 Appends an error in the list of recorded errors or raises it, depending on the chosen 1446 error level setting. 1447 """ 1448 token = token or self._curr or self._prev or Token.string("") 1449 start = token.start 1450 end = token.end + 1 1451 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1452 highlight = self.sql[start:end] 1453 end_context = self.sql[end : end + self.error_message_context] 1454 1455 error = ParseError.new( 1456 f"{message}. Line {token.line}, Col: {token.col}.\n" 1457 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1458 description=message, 1459 line=token.line, 1460 col=token.col, 1461 start_context=start_context, 1462 highlight=highlight, 1463 end_context=end_context, 1464 ) 1465 1466 if self.error_level == ErrorLevel.IMMEDIATE: 1467 raise error 1468 1469 self.errors.append(error) 1470 1471 def expression( 1472 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1473 ) -> E: 1474 """ 1475 Creates a new, validated Expression. 1476 1477 Args: 1478 exp_class: The expression class to instantiate. 1479 comments: An optional list of comments to attach to the expression. 1480 kwargs: The arguments to set for the expression along with their respective values. 1481 1482 Returns: 1483 The target expression. 1484 """ 1485 instance = exp_class(**kwargs) 1486 instance.add_comments(comments) if comments else self._add_comments(instance) 1487 return self.validate_expression(instance) 1488 1489 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1490 if expression and self._prev_comments: 1491 expression.add_comments(self._prev_comments) 1492 self._prev_comments = None 1493 1494 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1495 """ 1496 Validates an Expression, making sure that all its mandatory arguments are set. 1497 1498 Args: 1499 expression: The expression to validate. 1500 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1501 1502 Returns: 1503 The validated expression. 1504 """ 1505 if self.error_level != ErrorLevel.IGNORE: 1506 for error_message in expression.error_messages(args): 1507 self.raise_error(error_message) 1508 1509 return expression 1510 1511 def _find_sql(self, start: Token, end: Token) -> str: 1512 return self.sql[start.start : end.end + 1] 1513 1514 def _is_connected(self) -> bool: 1515 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1516 1517 def _advance(self, times: int = 1) -> None: 1518 self._index += times 1519 self._curr = seq_get(self._tokens, self._index) 1520 self._next = seq_get(self._tokens, self._index + 1) 1521 1522 if self._index > 0: 1523 self._prev = self._tokens[self._index - 1] 1524 self._prev_comments = self._prev.comments 1525 else: 1526 self._prev = None 1527 self._prev_comments = None 1528 1529 def _retreat(self, index: int) -> None: 1530 if index != self._index: 1531 self._advance(index - self._index) 1532 1533 def _warn_unsupported(self) -> None: 1534 if len(self._tokens) <= 1: 1535 return 1536 1537 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1538 # interested in emitting a warning for the one being currently processed. 1539 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1540 1541 logger.warning( 1542 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1543 ) 1544 1545 def _parse_command(self) -> exp.Command: 1546 self._warn_unsupported() 1547 return self.expression( 1548 exp.Command, 1549 comments=self._prev_comments, 1550 this=self._prev.text.upper(), 1551 expression=self._parse_string(), 1552 ) 1553 1554 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1555 """ 1556 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1557 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1558 solve this by setting & resetting the parser state accordingly 1559 """ 1560 index = self._index 1561 error_level = self.error_level 1562 1563 self.error_level = ErrorLevel.IMMEDIATE 1564 try: 1565 this = parse_method() 1566 except ParseError: 1567 this = None 1568 finally: 1569 if not this or retreat: 1570 self._retreat(index) 1571 self.error_level = error_level 1572 1573 return this 1574 1575 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1576 start = self._prev 1577 exists = self._parse_exists() if allow_exists else None 1578 1579 self._match(TokenType.ON) 1580 1581 materialized = self._match_text_seq("MATERIALIZED") 1582 kind = self._match_set(self.CREATABLES) and self._prev 1583 if not kind: 1584 return self._parse_as_command(start) 1585 1586 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1587 this = self._parse_user_defined_function(kind=kind.token_type) 1588 elif kind.token_type == TokenType.TABLE: 1589 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1590 elif kind.token_type == TokenType.COLUMN: 1591 this = self._parse_column() 1592 else: 1593 this = self._parse_id_var() 1594 1595 self._match(TokenType.IS) 1596 1597 return self.expression( 1598 exp.Comment, 1599 this=this, 1600 kind=kind.text, 1601 expression=self._parse_string(), 1602 exists=exists, 1603 materialized=materialized, 1604 ) 1605 1606 def _parse_to_table( 1607 self, 1608 ) -> exp.ToTableProperty: 1609 table = self._parse_table_parts(schema=True) 1610 return self.expression(exp.ToTableProperty, this=table) 1611 1612 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1613 def _parse_ttl(self) -> exp.Expression: 1614 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1615 this = self._parse_bitwise() 1616 1617 if self._match_text_seq("DELETE"): 1618 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1619 if self._match_text_seq("RECOMPRESS"): 1620 return self.expression( 1621 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1622 ) 1623 if self._match_text_seq("TO", "DISK"): 1624 return self.expression( 1625 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1626 ) 1627 if self._match_text_seq("TO", "VOLUME"): 1628 return self.expression( 1629 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1630 ) 1631 1632 return this 1633 1634 expressions = self._parse_csv(_parse_ttl_action) 1635 where = self._parse_where() 1636 group = self._parse_group() 1637 1638 aggregates = None 1639 if group and self._match(TokenType.SET): 1640 aggregates = self._parse_csv(self._parse_set_item) 1641 1642 return self.expression( 1643 exp.MergeTreeTTL, 1644 expressions=expressions, 1645 where=where, 1646 group=group, 1647 aggregates=aggregates, 1648 ) 1649 1650 def _parse_statement(self) -> t.Optional[exp.Expression]: 1651 if self._curr is None: 1652 return None 1653 1654 if self._match_set(self.STATEMENT_PARSERS): 1655 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1656 1657 if self._match_set(self.dialect.tokenizer.COMMANDS): 1658 return self._parse_command() 1659 1660 expression = self._parse_expression() 1661 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1662 return self._parse_query_modifiers(expression) 1663 1664 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1665 start = self._prev 1666 temporary = self._match(TokenType.TEMPORARY) 1667 materialized = self._match_text_seq("MATERIALIZED") 1668 1669 kind = self._match_set(self.CREATABLES) and self._prev.text 1670 if not kind: 1671 return self._parse_as_command(start) 1672 1673 if_exists = exists or self._parse_exists() 1674 table = self._parse_table_parts( 1675 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1676 ) 1677 1678 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1679 1680 if self._match(TokenType.L_PAREN, advance=False): 1681 expressions = self._parse_wrapped_csv(self._parse_types) 1682 else: 1683 expressions = None 1684 1685 return self.expression( 1686 exp.Drop, 1687 comments=start.comments, 1688 exists=if_exists, 1689 this=table, 1690 expressions=expressions, 1691 kind=kind.upper(), 1692 temporary=temporary, 1693 materialized=materialized, 1694 cascade=self._match_text_seq("CASCADE"), 1695 constraints=self._match_text_seq("CONSTRAINTS"), 1696 purge=self._match_text_seq("PURGE"), 1697 cluster=cluster, 1698 ) 1699 1700 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1701 return ( 1702 self._match_text_seq("IF") 1703 and (not not_ or self._match(TokenType.NOT)) 1704 and self._match(TokenType.EXISTS) 1705 ) 1706 1707 def _parse_create(self) -> exp.Create | exp.Command: 1708 # Note: this can't be None because we've matched a statement parser 1709 start = self._prev 1710 comments = self._prev_comments 1711 1712 replace = ( 1713 start.token_type == TokenType.REPLACE 1714 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1715 or self._match_pair(TokenType.OR, TokenType.ALTER) 1716 ) 1717 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1718 1719 unique = self._match(TokenType.UNIQUE) 1720 1721 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1722 clustered = True 1723 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1724 "COLUMNSTORE" 1725 ): 1726 clustered = False 1727 else: 1728 clustered = None 1729 1730 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1731 self._advance() 1732 1733 properties = None 1734 create_token = self._match_set(self.CREATABLES) and self._prev 1735 1736 if not create_token: 1737 # exp.Properties.Location.POST_CREATE 1738 properties = self._parse_properties() 1739 create_token = self._match_set(self.CREATABLES) and self._prev 1740 1741 if not properties or not create_token: 1742 return self._parse_as_command(start) 1743 1744 concurrently = self._match_text_seq("CONCURRENTLY") 1745 exists = self._parse_exists(not_=True) 1746 this = None 1747 expression: t.Optional[exp.Expression] = None 1748 indexes = None 1749 no_schema_binding = None 1750 begin = None 1751 end = None 1752 clone = None 1753 1754 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1755 nonlocal properties 1756 if properties and temp_props: 1757 properties.expressions.extend(temp_props.expressions) 1758 elif temp_props: 1759 properties = temp_props 1760 1761 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1762 this = self._parse_user_defined_function(kind=create_token.token_type) 1763 1764 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1765 extend_props(self._parse_properties()) 1766 1767 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1768 extend_props(self._parse_properties()) 1769 1770 if not expression: 1771 if self._match(TokenType.COMMAND): 1772 expression = self._parse_as_command(self._prev) 1773 else: 1774 begin = self._match(TokenType.BEGIN) 1775 return_ = self._match_text_seq("RETURN") 1776 1777 if self._match(TokenType.STRING, advance=False): 1778 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1779 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1780 expression = self._parse_string() 1781 extend_props(self._parse_properties()) 1782 else: 1783 expression = self._parse_statement() 1784 1785 end = self._match_text_seq("END") 1786 1787 if return_: 1788 expression = self.expression(exp.Return, this=expression) 1789 elif create_token.token_type == TokenType.INDEX: 1790 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1791 if not self._match(TokenType.ON): 1792 index = self._parse_id_var() 1793 anonymous = False 1794 else: 1795 index = None 1796 anonymous = True 1797 1798 this = self._parse_index(index=index, anonymous=anonymous) 1799 elif create_token.token_type in self.DB_CREATABLES: 1800 table_parts = self._parse_table_parts( 1801 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1802 ) 1803 1804 # exp.Properties.Location.POST_NAME 1805 self._match(TokenType.COMMA) 1806 extend_props(self._parse_properties(before=True)) 1807 1808 this = self._parse_schema(this=table_parts) 1809 1810 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1811 extend_props(self._parse_properties()) 1812 1813 self._match(TokenType.ALIAS) 1814 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1815 # exp.Properties.Location.POST_ALIAS 1816 extend_props(self._parse_properties()) 1817 1818 if create_token.token_type == TokenType.SEQUENCE: 1819 expression = self._parse_types() 1820 extend_props(self._parse_properties()) 1821 else: 1822 expression = self._parse_ddl_select() 1823 1824 if create_token.token_type == TokenType.TABLE: 1825 # exp.Properties.Location.POST_EXPRESSION 1826 extend_props(self._parse_properties()) 1827 1828 indexes = [] 1829 while True: 1830 index = self._parse_index() 1831 1832 # exp.Properties.Location.POST_INDEX 1833 extend_props(self._parse_properties()) 1834 if not index: 1835 break 1836 else: 1837 self._match(TokenType.COMMA) 1838 indexes.append(index) 1839 elif create_token.token_type == TokenType.VIEW: 1840 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1841 no_schema_binding = True 1842 1843 shallow = self._match_text_seq("SHALLOW") 1844 1845 if self._match_texts(self.CLONE_KEYWORDS): 1846 copy = self._prev.text.lower() == "copy" 1847 clone = self.expression( 1848 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1849 ) 1850 1851 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1852 return self._parse_as_command(start) 1853 1854 return self.expression( 1855 exp.Create, 1856 comments=comments, 1857 this=this, 1858 kind=create_token.text.upper(), 1859 replace=replace, 1860 refresh=refresh, 1861 unique=unique, 1862 expression=expression, 1863 exists=exists, 1864 properties=properties, 1865 indexes=indexes, 1866 no_schema_binding=no_schema_binding, 1867 begin=begin, 1868 end=end, 1869 clone=clone, 1870 concurrently=concurrently, 1871 clustered=clustered, 1872 ) 1873 1874 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1875 seq = exp.SequenceProperties() 1876 1877 options = [] 1878 index = self._index 1879 1880 while self._curr: 1881 self._match(TokenType.COMMA) 1882 if self._match_text_seq("INCREMENT"): 1883 self._match_text_seq("BY") 1884 self._match_text_seq("=") 1885 seq.set("increment", self._parse_term()) 1886 elif self._match_text_seq("MINVALUE"): 1887 seq.set("minvalue", self._parse_term()) 1888 elif self._match_text_seq("MAXVALUE"): 1889 seq.set("maxvalue", self._parse_term()) 1890 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1891 self._match_text_seq("=") 1892 seq.set("start", self._parse_term()) 1893 elif self._match_text_seq("CACHE"): 1894 # T-SQL allows empty CACHE which is initialized dynamically 1895 seq.set("cache", self._parse_number() or True) 1896 elif self._match_text_seq("OWNED", "BY"): 1897 # "OWNED BY NONE" is the default 1898 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1899 else: 1900 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1901 if opt: 1902 options.append(opt) 1903 else: 1904 break 1905 1906 seq.set("options", options if options else None) 1907 return None if self._index == index else seq 1908 1909 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1910 # only used for teradata currently 1911 self._match(TokenType.COMMA) 1912 1913 kwargs = { 1914 "no": self._match_text_seq("NO"), 1915 "dual": self._match_text_seq("DUAL"), 1916 "before": self._match_text_seq("BEFORE"), 1917 "default": self._match_text_seq("DEFAULT"), 1918 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1919 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1920 "after": self._match_text_seq("AFTER"), 1921 "minimum": self._match_texts(("MIN", "MINIMUM")), 1922 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1923 } 1924 1925 if self._match_texts(self.PROPERTY_PARSERS): 1926 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1927 try: 1928 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1929 except TypeError: 1930 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1931 1932 return None 1933 1934 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1935 return self._parse_wrapped_csv(self._parse_property) 1936 1937 def _parse_property(self) -> t.Optional[exp.Expression]: 1938 if self._match_texts(self.PROPERTY_PARSERS): 1939 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1940 1941 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1942 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1943 1944 if self._match_text_seq("COMPOUND", "SORTKEY"): 1945 return self._parse_sortkey(compound=True) 1946 1947 if self._match_text_seq("SQL", "SECURITY"): 1948 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1949 1950 index = self._index 1951 key = self._parse_column() 1952 1953 if not self._match(TokenType.EQ): 1954 self._retreat(index) 1955 return self._parse_sequence_properties() 1956 1957 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1958 if isinstance(key, exp.Column): 1959 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1960 1961 value = self._parse_bitwise() or self._parse_var(any_token=True) 1962 1963 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1964 if isinstance(value, exp.Column): 1965 value = exp.var(value.name) 1966 1967 return self.expression(exp.Property, this=key, value=value) 1968 1969 def _parse_stored(self) -> exp.FileFormatProperty: 1970 self._match(TokenType.ALIAS) 1971 1972 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1973 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1974 1975 return self.expression( 1976 exp.FileFormatProperty, 1977 this=( 1978 self.expression( 1979 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1980 ) 1981 if input_format or output_format 1982 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1983 ), 1984 ) 1985 1986 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1987 field = self._parse_field() 1988 if isinstance(field, exp.Identifier) and not field.quoted: 1989 field = exp.var(field) 1990 1991 return field 1992 1993 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1994 self._match(TokenType.EQ) 1995 self._match(TokenType.ALIAS) 1996 1997 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1998 1999 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2000 properties = [] 2001 while True: 2002 if before: 2003 prop = self._parse_property_before() 2004 else: 2005 prop = self._parse_property() 2006 if not prop: 2007 break 2008 for p in ensure_list(prop): 2009 properties.append(p) 2010 2011 if properties: 2012 return self.expression(exp.Properties, expressions=properties) 2013 2014 return None 2015 2016 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2017 return self.expression( 2018 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2019 ) 2020 2021 def _parse_settings_property(self) -> exp.SettingsProperty: 2022 return self.expression( 2023 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2024 ) 2025 2026 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2027 if self._index >= 2: 2028 pre_volatile_token = self._tokens[self._index - 2] 2029 else: 2030 pre_volatile_token = None 2031 2032 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2033 return exp.VolatileProperty() 2034 2035 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2036 2037 def _parse_retention_period(self) -> exp.Var: 2038 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2039 number = self._parse_number() 2040 number_str = f"{number} " if number else "" 2041 unit = self._parse_var(any_token=True) 2042 return exp.var(f"{number_str}{unit}") 2043 2044 def _parse_system_versioning_property( 2045 self, with_: bool = False 2046 ) -> exp.WithSystemVersioningProperty: 2047 self._match(TokenType.EQ) 2048 prop = self.expression( 2049 exp.WithSystemVersioningProperty, 2050 **{ # type: ignore 2051 "on": True, 2052 "with": with_, 2053 }, 2054 ) 2055 2056 if self._match_text_seq("OFF"): 2057 prop.set("on", False) 2058 return prop 2059 2060 self._match(TokenType.ON) 2061 if self._match(TokenType.L_PAREN): 2062 while self._curr and not self._match(TokenType.R_PAREN): 2063 if self._match_text_seq("HISTORY_TABLE", "="): 2064 prop.set("this", self._parse_table_parts()) 2065 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2066 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2067 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2068 prop.set("retention_period", self._parse_retention_period()) 2069 2070 self._match(TokenType.COMMA) 2071 2072 return prop 2073 2074 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2075 self._match(TokenType.EQ) 2076 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2077 prop = self.expression(exp.DataDeletionProperty, on=on) 2078 2079 if self._match(TokenType.L_PAREN): 2080 while self._curr and not self._match(TokenType.R_PAREN): 2081 if self._match_text_seq("FILTER_COLUMN", "="): 2082 prop.set("filter_column", self._parse_column()) 2083 elif self._match_text_seq("RETENTION_PERIOD", "="): 2084 prop.set("retention_period", self._parse_retention_period()) 2085 2086 self._match(TokenType.COMMA) 2087 2088 return prop 2089 2090 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2091 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2092 prop = self._parse_system_versioning_property(with_=True) 2093 self._match_r_paren() 2094 return prop 2095 2096 if self._match(TokenType.L_PAREN, advance=False): 2097 return self._parse_wrapped_properties() 2098 2099 if self._match_text_seq("JOURNAL"): 2100 return self._parse_withjournaltable() 2101 2102 if self._match_texts(self.VIEW_ATTRIBUTES): 2103 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2104 2105 if self._match_text_seq("DATA"): 2106 return self._parse_withdata(no=False) 2107 elif self._match_text_seq("NO", "DATA"): 2108 return self._parse_withdata(no=True) 2109 2110 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2111 return self._parse_serde_properties(with_=True) 2112 2113 if self._match(TokenType.SCHEMA): 2114 return self.expression( 2115 exp.WithSchemaBindingProperty, 2116 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2117 ) 2118 2119 if not self._next: 2120 return None 2121 2122 return self._parse_withisolatedloading() 2123 2124 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2125 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2126 self._match(TokenType.EQ) 2127 2128 user = self._parse_id_var() 2129 self._match(TokenType.PARAMETER) 2130 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2131 2132 if not user or not host: 2133 return None 2134 2135 return exp.DefinerProperty(this=f"{user}@{host}") 2136 2137 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2138 self._match(TokenType.TABLE) 2139 self._match(TokenType.EQ) 2140 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2141 2142 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2143 return self.expression(exp.LogProperty, no=no) 2144 2145 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2146 return self.expression(exp.JournalProperty, **kwargs) 2147 2148 def _parse_checksum(self) -> exp.ChecksumProperty: 2149 self._match(TokenType.EQ) 2150 2151 on = None 2152 if self._match(TokenType.ON): 2153 on = True 2154 elif self._match_text_seq("OFF"): 2155 on = False 2156 2157 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2158 2159 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2160 return self.expression( 2161 exp.Cluster, 2162 expressions=( 2163 self._parse_wrapped_csv(self._parse_ordered) 2164 if wrapped 2165 else self._parse_csv(self._parse_ordered) 2166 ), 2167 ) 2168 2169 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2170 self._match_text_seq("BY") 2171 2172 self._match_l_paren() 2173 expressions = self._parse_csv(self._parse_column) 2174 self._match_r_paren() 2175 2176 if self._match_text_seq("SORTED", "BY"): 2177 self._match_l_paren() 2178 sorted_by = self._parse_csv(self._parse_ordered) 2179 self._match_r_paren() 2180 else: 2181 sorted_by = None 2182 2183 self._match(TokenType.INTO) 2184 buckets = self._parse_number() 2185 self._match_text_seq("BUCKETS") 2186 2187 return self.expression( 2188 exp.ClusteredByProperty, 2189 expressions=expressions, 2190 sorted_by=sorted_by, 2191 buckets=buckets, 2192 ) 2193 2194 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2195 if not self._match_text_seq("GRANTS"): 2196 self._retreat(self._index - 1) 2197 return None 2198 2199 return self.expression(exp.CopyGrantsProperty) 2200 2201 def _parse_freespace(self) -> exp.FreespaceProperty: 2202 self._match(TokenType.EQ) 2203 return self.expression( 2204 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2205 ) 2206 2207 def _parse_mergeblockratio( 2208 self, no: bool = False, default: bool = False 2209 ) -> exp.MergeBlockRatioProperty: 2210 if self._match(TokenType.EQ): 2211 return self.expression( 2212 exp.MergeBlockRatioProperty, 2213 this=self._parse_number(), 2214 percent=self._match(TokenType.PERCENT), 2215 ) 2216 2217 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2218 2219 def _parse_datablocksize( 2220 self, 2221 default: t.Optional[bool] = None, 2222 minimum: t.Optional[bool] = None, 2223 maximum: t.Optional[bool] = None, 2224 ) -> exp.DataBlocksizeProperty: 2225 self._match(TokenType.EQ) 2226 size = self._parse_number() 2227 2228 units = None 2229 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2230 units = self._prev.text 2231 2232 return self.expression( 2233 exp.DataBlocksizeProperty, 2234 size=size, 2235 units=units, 2236 default=default, 2237 minimum=minimum, 2238 maximum=maximum, 2239 ) 2240 2241 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2242 self._match(TokenType.EQ) 2243 always = self._match_text_seq("ALWAYS") 2244 manual = self._match_text_seq("MANUAL") 2245 never = self._match_text_seq("NEVER") 2246 default = self._match_text_seq("DEFAULT") 2247 2248 autotemp = None 2249 if self._match_text_seq("AUTOTEMP"): 2250 autotemp = self._parse_schema() 2251 2252 return self.expression( 2253 exp.BlockCompressionProperty, 2254 always=always, 2255 manual=manual, 2256 never=never, 2257 default=default, 2258 autotemp=autotemp, 2259 ) 2260 2261 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2262 index = self._index 2263 no = self._match_text_seq("NO") 2264 concurrent = self._match_text_seq("CONCURRENT") 2265 2266 if not self._match_text_seq("ISOLATED", "LOADING"): 2267 self._retreat(index) 2268 return None 2269 2270 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2271 return self.expression( 2272 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2273 ) 2274 2275 def _parse_locking(self) -> exp.LockingProperty: 2276 if self._match(TokenType.TABLE): 2277 kind = "TABLE" 2278 elif self._match(TokenType.VIEW): 2279 kind = "VIEW" 2280 elif self._match(TokenType.ROW): 2281 kind = "ROW" 2282 elif self._match_text_seq("DATABASE"): 2283 kind = "DATABASE" 2284 else: 2285 kind = None 2286 2287 if kind in ("DATABASE", "TABLE", "VIEW"): 2288 this = self._parse_table_parts() 2289 else: 2290 this = None 2291 2292 if self._match(TokenType.FOR): 2293 for_or_in = "FOR" 2294 elif self._match(TokenType.IN): 2295 for_or_in = "IN" 2296 else: 2297 for_or_in = None 2298 2299 if self._match_text_seq("ACCESS"): 2300 lock_type = "ACCESS" 2301 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2302 lock_type = "EXCLUSIVE" 2303 elif self._match_text_seq("SHARE"): 2304 lock_type = "SHARE" 2305 elif self._match_text_seq("READ"): 2306 lock_type = "READ" 2307 elif self._match_text_seq("WRITE"): 2308 lock_type = "WRITE" 2309 elif self._match_text_seq("CHECKSUM"): 2310 lock_type = "CHECKSUM" 2311 else: 2312 lock_type = None 2313 2314 override = self._match_text_seq("OVERRIDE") 2315 2316 return self.expression( 2317 exp.LockingProperty, 2318 this=this, 2319 kind=kind, 2320 for_or_in=for_or_in, 2321 lock_type=lock_type, 2322 override=override, 2323 ) 2324 2325 def _parse_partition_by(self) -> t.List[exp.Expression]: 2326 if self._match(TokenType.PARTITION_BY): 2327 return self._parse_csv(self._parse_assignment) 2328 return [] 2329 2330 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2331 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2332 if self._match_text_seq("MINVALUE"): 2333 return exp.var("MINVALUE") 2334 if self._match_text_seq("MAXVALUE"): 2335 return exp.var("MAXVALUE") 2336 return self._parse_bitwise() 2337 2338 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2339 expression = None 2340 from_expressions = None 2341 to_expressions = None 2342 2343 if self._match(TokenType.IN): 2344 this = self._parse_wrapped_csv(self._parse_bitwise) 2345 elif self._match(TokenType.FROM): 2346 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2347 self._match_text_seq("TO") 2348 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2349 elif self._match_text_seq("WITH", "(", "MODULUS"): 2350 this = self._parse_number() 2351 self._match_text_seq(",", "REMAINDER") 2352 expression = self._parse_number() 2353 self._match_r_paren() 2354 else: 2355 self.raise_error("Failed to parse partition bound spec.") 2356 2357 return self.expression( 2358 exp.PartitionBoundSpec, 2359 this=this, 2360 expression=expression, 2361 from_expressions=from_expressions, 2362 to_expressions=to_expressions, 2363 ) 2364 2365 # https://www.postgresql.org/docs/current/sql-createtable.html 2366 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2367 if not self._match_text_seq("OF"): 2368 self._retreat(self._index - 1) 2369 return None 2370 2371 this = self._parse_table(schema=True) 2372 2373 if self._match(TokenType.DEFAULT): 2374 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2375 elif self._match_text_seq("FOR", "VALUES"): 2376 expression = self._parse_partition_bound_spec() 2377 else: 2378 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2379 2380 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2381 2382 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2383 self._match(TokenType.EQ) 2384 return self.expression( 2385 exp.PartitionedByProperty, 2386 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2387 ) 2388 2389 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2390 if self._match_text_seq("AND", "STATISTICS"): 2391 statistics = True 2392 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2393 statistics = False 2394 else: 2395 statistics = None 2396 2397 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2398 2399 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2400 if self._match_text_seq("SQL"): 2401 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2402 return None 2403 2404 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2405 if self._match_text_seq("SQL", "DATA"): 2406 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2407 return None 2408 2409 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2410 if self._match_text_seq("PRIMARY", "INDEX"): 2411 return exp.NoPrimaryIndexProperty() 2412 if self._match_text_seq("SQL"): 2413 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2414 return None 2415 2416 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2417 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2418 return exp.OnCommitProperty() 2419 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2420 return exp.OnCommitProperty(delete=True) 2421 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2422 2423 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2424 if self._match_text_seq("SQL", "DATA"): 2425 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2426 return None 2427 2428 def _parse_distkey(self) -> exp.DistKeyProperty: 2429 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2430 2431 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2432 table = self._parse_table(schema=True) 2433 2434 options = [] 2435 while self._match_texts(("INCLUDING", "EXCLUDING")): 2436 this = self._prev.text.upper() 2437 2438 id_var = self._parse_id_var() 2439 if not id_var: 2440 return None 2441 2442 options.append( 2443 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2444 ) 2445 2446 return self.expression(exp.LikeProperty, this=table, expressions=options) 2447 2448 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2449 return self.expression( 2450 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2451 ) 2452 2453 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2454 self._match(TokenType.EQ) 2455 return self.expression( 2456 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2457 ) 2458 2459 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2460 self._match_text_seq("WITH", "CONNECTION") 2461 return self.expression( 2462 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2463 ) 2464 2465 def _parse_returns(self) -> exp.ReturnsProperty: 2466 value: t.Optional[exp.Expression] 2467 null = None 2468 is_table = self._match(TokenType.TABLE) 2469 2470 if is_table: 2471 if self._match(TokenType.LT): 2472 value = self.expression( 2473 exp.Schema, 2474 this="TABLE", 2475 expressions=self._parse_csv(self._parse_struct_types), 2476 ) 2477 if not self._match(TokenType.GT): 2478 self.raise_error("Expecting >") 2479 else: 2480 value = self._parse_schema(exp.var("TABLE")) 2481 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2482 null = True 2483 value = None 2484 else: 2485 value = self._parse_types() 2486 2487 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2488 2489 def _parse_describe(self) -> exp.Describe: 2490 kind = self._match_set(self.CREATABLES) and self._prev.text 2491 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2492 if self._match(TokenType.DOT): 2493 style = None 2494 self._retreat(self._index - 2) 2495 this = self._parse_table(schema=True) 2496 properties = self._parse_properties() 2497 expressions = properties.expressions if properties else None 2498 return self.expression( 2499 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2500 ) 2501 2502 def _parse_insert(self) -> exp.Insert: 2503 comments = ensure_list(self._prev_comments) 2504 hint = self._parse_hint() 2505 overwrite = self._match(TokenType.OVERWRITE) 2506 ignore = self._match(TokenType.IGNORE) 2507 local = self._match_text_seq("LOCAL") 2508 alternative = None 2509 is_function = None 2510 2511 if self._match_text_seq("DIRECTORY"): 2512 this: t.Optional[exp.Expression] = self.expression( 2513 exp.Directory, 2514 this=self._parse_var_or_string(), 2515 local=local, 2516 row_format=self._parse_row_format(match_row=True), 2517 ) 2518 else: 2519 if self._match(TokenType.OR): 2520 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2521 2522 self._match(TokenType.INTO) 2523 comments += ensure_list(self._prev_comments) 2524 self._match(TokenType.TABLE) 2525 is_function = self._match(TokenType.FUNCTION) 2526 2527 this = ( 2528 self._parse_table(schema=True, parse_partition=True) 2529 if not is_function 2530 else self._parse_function() 2531 ) 2532 2533 returning = self._parse_returning() 2534 2535 return self.expression( 2536 exp.Insert, 2537 comments=comments, 2538 hint=hint, 2539 is_function=is_function, 2540 this=this, 2541 stored=self._match_text_seq("STORED") and self._parse_stored(), 2542 by_name=self._match_text_seq("BY", "NAME"), 2543 exists=self._parse_exists(), 2544 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2545 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2546 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2547 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2548 conflict=self._parse_on_conflict(), 2549 returning=returning or self._parse_returning(), 2550 overwrite=overwrite, 2551 alternative=alternative, 2552 ignore=ignore, 2553 ) 2554 2555 def _parse_kill(self) -> exp.Kill: 2556 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2557 2558 return self.expression( 2559 exp.Kill, 2560 this=self._parse_primary(), 2561 kind=kind, 2562 ) 2563 2564 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2565 conflict = self._match_text_seq("ON", "CONFLICT") 2566 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2567 2568 if not conflict and not duplicate: 2569 return None 2570 2571 conflict_keys = None 2572 constraint = None 2573 2574 if conflict: 2575 if self._match_text_seq("ON", "CONSTRAINT"): 2576 constraint = self._parse_id_var() 2577 elif self._match(TokenType.L_PAREN): 2578 conflict_keys = self._parse_csv(self._parse_id_var) 2579 self._match_r_paren() 2580 2581 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2582 if self._prev.token_type == TokenType.UPDATE: 2583 self._match(TokenType.SET) 2584 expressions = self._parse_csv(self._parse_equality) 2585 else: 2586 expressions = None 2587 2588 return self.expression( 2589 exp.OnConflict, 2590 duplicate=duplicate, 2591 expressions=expressions, 2592 action=action, 2593 conflict_keys=conflict_keys, 2594 constraint=constraint, 2595 ) 2596 2597 def _parse_returning(self) -> t.Optional[exp.Returning]: 2598 if not self._match(TokenType.RETURNING): 2599 return None 2600 return self.expression( 2601 exp.Returning, 2602 expressions=self._parse_csv(self._parse_expression), 2603 into=self._match(TokenType.INTO) and self._parse_table_part(), 2604 ) 2605 2606 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2607 if not self._match(TokenType.FORMAT): 2608 return None 2609 return self._parse_row_format() 2610 2611 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2612 index = self._index 2613 with_ = with_ or self._match_text_seq("WITH") 2614 2615 if not self._match(TokenType.SERDE_PROPERTIES): 2616 self._retreat(index) 2617 return None 2618 return self.expression( 2619 exp.SerdeProperties, 2620 **{ # type: ignore 2621 "expressions": self._parse_wrapped_properties(), 2622 "with": with_, 2623 }, 2624 ) 2625 2626 def _parse_row_format( 2627 self, match_row: bool = False 2628 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2629 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2630 return None 2631 2632 if self._match_text_seq("SERDE"): 2633 this = self._parse_string() 2634 2635 serde_properties = self._parse_serde_properties() 2636 2637 return self.expression( 2638 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2639 ) 2640 2641 self._match_text_seq("DELIMITED") 2642 2643 kwargs = {} 2644 2645 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2646 kwargs["fields"] = self._parse_string() 2647 if self._match_text_seq("ESCAPED", "BY"): 2648 kwargs["escaped"] = self._parse_string() 2649 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2650 kwargs["collection_items"] = self._parse_string() 2651 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2652 kwargs["map_keys"] = self._parse_string() 2653 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2654 kwargs["lines"] = self._parse_string() 2655 if self._match_text_seq("NULL", "DEFINED", "AS"): 2656 kwargs["null"] = self._parse_string() 2657 2658 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2659 2660 def _parse_load(self) -> exp.LoadData | exp.Command: 2661 if self._match_text_seq("DATA"): 2662 local = self._match_text_seq("LOCAL") 2663 self._match_text_seq("INPATH") 2664 inpath = self._parse_string() 2665 overwrite = self._match(TokenType.OVERWRITE) 2666 self._match_pair(TokenType.INTO, TokenType.TABLE) 2667 2668 return self.expression( 2669 exp.LoadData, 2670 this=self._parse_table(schema=True), 2671 local=local, 2672 overwrite=overwrite, 2673 inpath=inpath, 2674 partition=self._parse_partition(), 2675 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2676 serde=self._match_text_seq("SERDE") and self._parse_string(), 2677 ) 2678 return self._parse_as_command(self._prev) 2679 2680 def _parse_delete(self) -> exp.Delete: 2681 # This handles MySQL's "Multiple-Table Syntax" 2682 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2683 tables = None 2684 comments = self._prev_comments 2685 if not self._match(TokenType.FROM, advance=False): 2686 tables = self._parse_csv(self._parse_table) or None 2687 2688 returning = self._parse_returning() 2689 2690 return self.expression( 2691 exp.Delete, 2692 comments=comments, 2693 tables=tables, 2694 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2695 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2696 where=self._parse_where(), 2697 returning=returning or self._parse_returning(), 2698 limit=self._parse_limit(), 2699 ) 2700 2701 def _parse_update(self) -> exp.Update: 2702 comments = self._prev_comments 2703 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2704 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2705 returning = self._parse_returning() 2706 return self.expression( 2707 exp.Update, 2708 comments=comments, 2709 **{ # type: ignore 2710 "this": this, 2711 "expressions": expressions, 2712 "from": self._parse_from(joins=True), 2713 "where": self._parse_where(), 2714 "returning": returning or self._parse_returning(), 2715 "order": self._parse_order(), 2716 "limit": self._parse_limit(), 2717 }, 2718 ) 2719 2720 def _parse_uncache(self) -> exp.Uncache: 2721 if not self._match(TokenType.TABLE): 2722 self.raise_error("Expecting TABLE after UNCACHE") 2723 2724 return self.expression( 2725 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2726 ) 2727 2728 def _parse_cache(self) -> exp.Cache: 2729 lazy = self._match_text_seq("LAZY") 2730 self._match(TokenType.TABLE) 2731 table = self._parse_table(schema=True) 2732 2733 options = [] 2734 if self._match_text_seq("OPTIONS"): 2735 self._match_l_paren() 2736 k = self._parse_string() 2737 self._match(TokenType.EQ) 2738 v = self._parse_string() 2739 options = [k, v] 2740 self._match_r_paren() 2741 2742 self._match(TokenType.ALIAS) 2743 return self.expression( 2744 exp.Cache, 2745 this=table, 2746 lazy=lazy, 2747 options=options, 2748 expression=self._parse_select(nested=True), 2749 ) 2750 2751 def _parse_partition(self) -> t.Optional[exp.Partition]: 2752 if not self._match(TokenType.PARTITION): 2753 return None 2754 2755 return self.expression( 2756 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2757 ) 2758 2759 def _parse_value(self) -> t.Optional[exp.Tuple]: 2760 if self._match(TokenType.L_PAREN): 2761 expressions = self._parse_csv(self._parse_expression) 2762 self._match_r_paren() 2763 return self.expression(exp.Tuple, expressions=expressions) 2764 2765 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2766 expression = self._parse_expression() 2767 if expression: 2768 return self.expression(exp.Tuple, expressions=[expression]) 2769 return None 2770 2771 def _parse_projections(self) -> t.List[exp.Expression]: 2772 return self._parse_expressions() 2773 2774 def _parse_select( 2775 self, 2776 nested: bool = False, 2777 table: bool = False, 2778 parse_subquery_alias: bool = True, 2779 parse_set_operation: bool = True, 2780 ) -> t.Optional[exp.Expression]: 2781 cte = self._parse_with() 2782 2783 if cte: 2784 this = self._parse_statement() 2785 2786 if not this: 2787 self.raise_error("Failed to parse any statement following CTE") 2788 return cte 2789 2790 if "with" in this.arg_types: 2791 this.set("with", cte) 2792 else: 2793 self.raise_error(f"{this.key} does not support CTE") 2794 this = cte 2795 2796 return this 2797 2798 # duckdb supports leading with FROM x 2799 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2800 2801 if self._match(TokenType.SELECT): 2802 comments = self._prev_comments 2803 2804 hint = self._parse_hint() 2805 2806 if self._next and not self._next.token_type == TokenType.DOT: 2807 all_ = self._match(TokenType.ALL) 2808 distinct = self._match_set(self.DISTINCT_TOKENS) 2809 else: 2810 all_, distinct = None, None 2811 2812 kind = ( 2813 self._match(TokenType.ALIAS) 2814 and self._match_texts(("STRUCT", "VALUE")) 2815 and self._prev.text.upper() 2816 ) 2817 2818 if distinct: 2819 distinct = self.expression( 2820 exp.Distinct, 2821 on=self._parse_value() if self._match(TokenType.ON) else None, 2822 ) 2823 2824 if all_ and distinct: 2825 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2826 2827 limit = self._parse_limit(top=True) 2828 projections = self._parse_projections() 2829 2830 this = self.expression( 2831 exp.Select, 2832 kind=kind, 2833 hint=hint, 2834 distinct=distinct, 2835 expressions=projections, 2836 limit=limit, 2837 ) 2838 this.comments = comments 2839 2840 into = self._parse_into() 2841 if into: 2842 this.set("into", into) 2843 2844 if not from_: 2845 from_ = self._parse_from() 2846 2847 if from_: 2848 this.set("from", from_) 2849 2850 this = self._parse_query_modifiers(this) 2851 elif (table or nested) and self._match(TokenType.L_PAREN): 2852 if self._match(TokenType.PIVOT): 2853 this = self._parse_simplified_pivot() 2854 elif self._match(TokenType.FROM): 2855 this = exp.select("*").from_( 2856 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2857 ) 2858 else: 2859 this = ( 2860 self._parse_table() 2861 if table 2862 else self._parse_select(nested=True, parse_set_operation=False) 2863 ) 2864 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2865 2866 self._match_r_paren() 2867 2868 # We return early here so that the UNION isn't attached to the subquery by the 2869 # following call to _parse_set_operations, but instead becomes the parent node 2870 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2871 elif self._match(TokenType.VALUES, advance=False): 2872 this = self._parse_derived_table_values() 2873 elif from_: 2874 this = exp.select("*").from_(from_.this, copy=False) 2875 elif self._match(TokenType.SUMMARIZE): 2876 table = self._match(TokenType.TABLE) 2877 this = self._parse_select() or self._parse_string() or self._parse_table() 2878 return self.expression(exp.Summarize, this=this, table=table) 2879 elif self._match(TokenType.DESCRIBE): 2880 this = self._parse_describe() 2881 elif self._match_text_seq("STREAM"): 2882 this = self.expression(exp.Stream, this=self._parse_function()) 2883 else: 2884 this = None 2885 2886 return self._parse_set_operations(this) if parse_set_operation else this 2887 2888 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2889 if not skip_with_token and not self._match(TokenType.WITH): 2890 return None 2891 2892 comments = self._prev_comments 2893 recursive = self._match(TokenType.RECURSIVE) 2894 2895 expressions = [] 2896 while True: 2897 expressions.append(self._parse_cte()) 2898 2899 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2900 break 2901 else: 2902 self._match(TokenType.WITH) 2903 2904 return self.expression( 2905 exp.With, comments=comments, expressions=expressions, recursive=recursive 2906 ) 2907 2908 def _parse_cte(self) -> exp.CTE: 2909 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2910 if not alias or not alias.this: 2911 self.raise_error("Expected CTE to have alias") 2912 2913 self._match(TokenType.ALIAS) 2914 comments = self._prev_comments 2915 2916 if self._match_text_seq("NOT", "MATERIALIZED"): 2917 materialized = False 2918 elif self._match_text_seq("MATERIALIZED"): 2919 materialized = True 2920 else: 2921 materialized = None 2922 2923 return self.expression( 2924 exp.CTE, 2925 this=self._parse_wrapped(self._parse_statement), 2926 alias=alias, 2927 materialized=materialized, 2928 comments=comments, 2929 ) 2930 2931 def _parse_table_alias( 2932 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2933 ) -> t.Optional[exp.TableAlias]: 2934 any_token = self._match(TokenType.ALIAS) 2935 alias = ( 2936 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2937 or self._parse_string_as_identifier() 2938 ) 2939 2940 index = self._index 2941 if self._match(TokenType.L_PAREN): 2942 columns = self._parse_csv(self._parse_function_parameter) 2943 self._match_r_paren() if columns else self._retreat(index) 2944 else: 2945 columns = None 2946 2947 if not alias and not columns: 2948 return None 2949 2950 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2951 2952 # We bubble up comments from the Identifier to the TableAlias 2953 if isinstance(alias, exp.Identifier): 2954 table_alias.add_comments(alias.pop_comments()) 2955 2956 return table_alias 2957 2958 def _parse_subquery( 2959 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2960 ) -> t.Optional[exp.Subquery]: 2961 if not this: 2962 return None 2963 2964 return self.expression( 2965 exp.Subquery, 2966 this=this, 2967 pivots=self._parse_pivots(), 2968 alias=self._parse_table_alias() if parse_alias else None, 2969 ) 2970 2971 def _implicit_unnests_to_explicit(self, this: E) -> E: 2972 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2973 2974 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2975 for i, join in enumerate(this.args.get("joins") or []): 2976 table = join.this 2977 normalized_table = table.copy() 2978 normalized_table.meta["maybe_column"] = True 2979 normalized_table = _norm(normalized_table, dialect=self.dialect) 2980 2981 if isinstance(table, exp.Table) and not join.args.get("on"): 2982 if normalized_table.parts[0].name in refs: 2983 table_as_column = table.to_column() 2984 unnest = exp.Unnest(expressions=[table_as_column]) 2985 2986 # Table.to_column creates a parent Alias node that we want to convert to 2987 # a TableAlias and attach to the Unnest, so it matches the parser's output 2988 if isinstance(table.args.get("alias"), exp.TableAlias): 2989 table_as_column.replace(table_as_column.this) 2990 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2991 2992 table.replace(unnest) 2993 2994 refs.add(normalized_table.alias_or_name) 2995 2996 return this 2997 2998 def _parse_query_modifiers( 2999 self, this: t.Optional[exp.Expression] 3000 ) -> t.Optional[exp.Expression]: 3001 if isinstance(this, (exp.Query, exp.Table)): 3002 for join in self._parse_joins(): 3003 this.append("joins", join) 3004 for lateral in iter(self._parse_lateral, None): 3005 this.append("laterals", lateral) 3006 3007 while True: 3008 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3009 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3010 key, expression = parser(self) 3011 3012 if expression: 3013 this.set(key, expression) 3014 if key == "limit": 3015 offset = expression.args.pop("offset", None) 3016 3017 if offset: 3018 offset = exp.Offset(expression=offset) 3019 this.set("offset", offset) 3020 3021 limit_by_expressions = expression.expressions 3022 expression.set("expressions", None) 3023 offset.set("expressions", limit_by_expressions) 3024 continue 3025 break 3026 3027 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3028 this = self._implicit_unnests_to_explicit(this) 3029 3030 return this 3031 3032 def _parse_hint(self) -> t.Optional[exp.Hint]: 3033 if self._match(TokenType.HINT): 3034 hints = [] 3035 for hint in iter( 3036 lambda: self._parse_csv( 3037 lambda: self._parse_function() or self._parse_var(upper=True) 3038 ), 3039 [], 3040 ): 3041 hints.extend(hint) 3042 3043 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3044 self.raise_error("Expected */ after HINT") 3045 3046 return self.expression(exp.Hint, expressions=hints) 3047 3048 return None 3049 3050 def _parse_into(self) -> t.Optional[exp.Into]: 3051 if not self._match(TokenType.INTO): 3052 return None 3053 3054 temp = self._match(TokenType.TEMPORARY) 3055 unlogged = self._match_text_seq("UNLOGGED") 3056 self._match(TokenType.TABLE) 3057 3058 return self.expression( 3059 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3060 ) 3061 3062 def _parse_from( 3063 self, joins: bool = False, skip_from_token: bool = False 3064 ) -> t.Optional[exp.From]: 3065 if not skip_from_token and not self._match(TokenType.FROM): 3066 return None 3067 3068 return self.expression( 3069 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3070 ) 3071 3072 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3073 return self.expression( 3074 exp.MatchRecognizeMeasure, 3075 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3076 this=self._parse_expression(), 3077 ) 3078 3079 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3080 if not self._match(TokenType.MATCH_RECOGNIZE): 3081 return None 3082 3083 self._match_l_paren() 3084 3085 partition = self._parse_partition_by() 3086 order = self._parse_order() 3087 3088 measures = ( 3089 self._parse_csv(self._parse_match_recognize_measure) 3090 if self._match_text_seq("MEASURES") 3091 else None 3092 ) 3093 3094 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3095 rows = exp.var("ONE ROW PER MATCH") 3096 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3097 text = "ALL ROWS PER MATCH" 3098 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3099 text += " SHOW EMPTY MATCHES" 3100 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3101 text += " OMIT EMPTY MATCHES" 3102 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3103 text += " WITH UNMATCHED ROWS" 3104 rows = exp.var(text) 3105 else: 3106 rows = None 3107 3108 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3109 text = "AFTER MATCH SKIP" 3110 if self._match_text_seq("PAST", "LAST", "ROW"): 3111 text += " PAST LAST ROW" 3112 elif self._match_text_seq("TO", "NEXT", "ROW"): 3113 text += " TO NEXT ROW" 3114 elif self._match_text_seq("TO", "FIRST"): 3115 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3116 elif self._match_text_seq("TO", "LAST"): 3117 text += f" TO LAST {self._advance_any().text}" # type: ignore 3118 after = exp.var(text) 3119 else: 3120 after = None 3121 3122 if self._match_text_seq("PATTERN"): 3123 self._match_l_paren() 3124 3125 if not self._curr: 3126 self.raise_error("Expecting )", self._curr) 3127 3128 paren = 1 3129 start = self._curr 3130 3131 while self._curr and paren > 0: 3132 if self._curr.token_type == TokenType.L_PAREN: 3133 paren += 1 3134 if self._curr.token_type == TokenType.R_PAREN: 3135 paren -= 1 3136 3137 end = self._prev 3138 self._advance() 3139 3140 if paren > 0: 3141 self.raise_error("Expecting )", self._curr) 3142 3143 pattern = exp.var(self._find_sql(start, end)) 3144 else: 3145 pattern = None 3146 3147 define = ( 3148 self._parse_csv(self._parse_name_as_expression) 3149 if self._match_text_seq("DEFINE") 3150 else None 3151 ) 3152 3153 self._match_r_paren() 3154 3155 return self.expression( 3156 exp.MatchRecognize, 3157 partition_by=partition, 3158 order=order, 3159 measures=measures, 3160 rows=rows, 3161 after=after, 3162 pattern=pattern, 3163 define=define, 3164 alias=self._parse_table_alias(), 3165 ) 3166 3167 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3168 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3169 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3170 cross_apply = False 3171 3172 if cross_apply is not None: 3173 this = self._parse_select(table=True) 3174 view = None 3175 outer = None 3176 elif self._match(TokenType.LATERAL): 3177 this = self._parse_select(table=True) 3178 view = self._match(TokenType.VIEW) 3179 outer = self._match(TokenType.OUTER) 3180 else: 3181 return None 3182 3183 if not this: 3184 this = ( 3185 self._parse_unnest() 3186 or self._parse_function() 3187 or self._parse_id_var(any_token=False) 3188 ) 3189 3190 while self._match(TokenType.DOT): 3191 this = exp.Dot( 3192 this=this, 3193 expression=self._parse_function() or self._parse_id_var(any_token=False), 3194 ) 3195 3196 if view: 3197 table = self._parse_id_var(any_token=False) 3198 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3199 table_alias: t.Optional[exp.TableAlias] = self.expression( 3200 exp.TableAlias, this=table, columns=columns 3201 ) 3202 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3203 # We move the alias from the lateral's child node to the lateral itself 3204 table_alias = this.args["alias"].pop() 3205 else: 3206 table_alias = self._parse_table_alias() 3207 3208 return self.expression( 3209 exp.Lateral, 3210 this=this, 3211 view=view, 3212 outer=outer, 3213 alias=table_alias, 3214 cross_apply=cross_apply, 3215 ) 3216 3217 def _parse_join_parts( 3218 self, 3219 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3220 return ( 3221 self._match_set(self.JOIN_METHODS) and self._prev, 3222 self._match_set(self.JOIN_SIDES) and self._prev, 3223 self._match_set(self.JOIN_KINDS) and self._prev, 3224 ) 3225 3226 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3227 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3228 this = self._parse_column() 3229 if isinstance(this, exp.Column): 3230 return this.this 3231 return this 3232 3233 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3234 3235 def _parse_join( 3236 self, skip_join_token: bool = False, parse_bracket: bool = False 3237 ) -> t.Optional[exp.Join]: 3238 if self._match(TokenType.COMMA): 3239 return self.expression(exp.Join, this=self._parse_table()) 3240 3241 index = self._index 3242 method, side, kind = self._parse_join_parts() 3243 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3244 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3245 3246 if not skip_join_token and not join: 3247 self._retreat(index) 3248 kind = None 3249 method = None 3250 side = None 3251 3252 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3253 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3254 3255 if not skip_join_token and not join and not outer_apply and not cross_apply: 3256 return None 3257 3258 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3259 3260 if method: 3261 kwargs["method"] = method.text 3262 if side: 3263 kwargs["side"] = side.text 3264 if kind: 3265 kwargs["kind"] = kind.text 3266 if hint: 3267 kwargs["hint"] = hint 3268 3269 if self._match(TokenType.MATCH_CONDITION): 3270 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3271 3272 if self._match(TokenType.ON): 3273 kwargs["on"] = self._parse_assignment() 3274 elif self._match(TokenType.USING): 3275 kwargs["using"] = self._parse_using_identifiers() 3276 elif ( 3277 not (outer_apply or cross_apply) 3278 and not isinstance(kwargs["this"], exp.Unnest) 3279 and not (kind and kind.token_type == TokenType.CROSS) 3280 ): 3281 index = self._index 3282 joins: t.Optional[list] = list(self._parse_joins()) 3283 3284 if joins and self._match(TokenType.ON): 3285 kwargs["on"] = self._parse_assignment() 3286 elif joins and self._match(TokenType.USING): 3287 kwargs["using"] = self._parse_using_identifiers() 3288 else: 3289 joins = None 3290 self._retreat(index) 3291 3292 kwargs["this"].set("joins", joins if joins else None) 3293 3294 comments = [c for token in (method, side, kind) if token for c in token.comments] 3295 return self.expression(exp.Join, comments=comments, **kwargs) 3296 3297 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3298 this = self._parse_assignment() 3299 3300 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3301 return this 3302 3303 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3304 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3305 3306 return this 3307 3308 def _parse_index_params(self) -> exp.IndexParameters: 3309 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3310 3311 if self._match(TokenType.L_PAREN, advance=False): 3312 columns = self._parse_wrapped_csv(self._parse_with_operator) 3313 else: 3314 columns = None 3315 3316 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3317 partition_by = self._parse_partition_by() 3318 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3319 tablespace = ( 3320 self._parse_var(any_token=True) 3321 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3322 else None 3323 ) 3324 where = self._parse_where() 3325 3326 on = self._parse_field() if self._match(TokenType.ON) else None 3327 3328 return self.expression( 3329 exp.IndexParameters, 3330 using=using, 3331 columns=columns, 3332 include=include, 3333 partition_by=partition_by, 3334 where=where, 3335 with_storage=with_storage, 3336 tablespace=tablespace, 3337 on=on, 3338 ) 3339 3340 def _parse_index( 3341 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3342 ) -> t.Optional[exp.Index]: 3343 if index or anonymous: 3344 unique = None 3345 primary = None 3346 amp = None 3347 3348 self._match(TokenType.ON) 3349 self._match(TokenType.TABLE) # hive 3350 table = self._parse_table_parts(schema=True) 3351 else: 3352 unique = self._match(TokenType.UNIQUE) 3353 primary = self._match_text_seq("PRIMARY") 3354 amp = self._match_text_seq("AMP") 3355 3356 if not self._match(TokenType.INDEX): 3357 return None 3358 3359 index = self._parse_id_var() 3360 table = None 3361 3362 params = self._parse_index_params() 3363 3364 return self.expression( 3365 exp.Index, 3366 this=index, 3367 table=table, 3368 unique=unique, 3369 primary=primary, 3370 amp=amp, 3371 params=params, 3372 ) 3373 3374 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3375 hints: t.List[exp.Expression] = [] 3376 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3377 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3378 hints.append( 3379 self.expression( 3380 exp.WithTableHint, 3381 expressions=self._parse_csv( 3382 lambda: self._parse_function() or self._parse_var(any_token=True) 3383 ), 3384 ) 3385 ) 3386 self._match_r_paren() 3387 else: 3388 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3389 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3390 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3391 3392 self._match_set((TokenType.INDEX, TokenType.KEY)) 3393 if self._match(TokenType.FOR): 3394 hint.set("target", self._advance_any() and self._prev.text.upper()) 3395 3396 hint.set("expressions", self._parse_wrapped_id_vars()) 3397 hints.append(hint) 3398 3399 return hints or None 3400 3401 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3402 return ( 3403 (not schema and self._parse_function(optional_parens=False)) 3404 or self._parse_id_var(any_token=False) 3405 or self._parse_string_as_identifier() 3406 or self._parse_placeholder() 3407 ) 3408 3409 def _parse_table_parts( 3410 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3411 ) -> exp.Table: 3412 catalog = None 3413 db = None 3414 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3415 3416 while self._match(TokenType.DOT): 3417 if catalog: 3418 # This allows nesting the table in arbitrarily many dot expressions if needed 3419 table = self.expression( 3420 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3421 ) 3422 else: 3423 catalog = db 3424 db = table 3425 # "" used for tsql FROM a..b case 3426 table = self._parse_table_part(schema=schema) or "" 3427 3428 if ( 3429 wildcard 3430 and self._is_connected() 3431 and (isinstance(table, exp.Identifier) or not table) 3432 and self._match(TokenType.STAR) 3433 ): 3434 if isinstance(table, exp.Identifier): 3435 table.args["this"] += "*" 3436 else: 3437 table = exp.Identifier(this="*") 3438 3439 # We bubble up comments from the Identifier to the Table 3440 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3441 3442 if is_db_reference: 3443 catalog = db 3444 db = table 3445 table = None 3446 3447 if not table and not is_db_reference: 3448 self.raise_error(f"Expected table name but got {self._curr}") 3449 if not db and is_db_reference: 3450 self.raise_error(f"Expected database name but got {self._curr}") 3451 3452 table = self.expression( 3453 exp.Table, 3454 comments=comments, 3455 this=table, 3456 db=db, 3457 catalog=catalog, 3458 ) 3459 3460 changes = self._parse_changes() 3461 if changes: 3462 table.set("changes", changes) 3463 3464 at_before = self._parse_historical_data() 3465 if at_before: 3466 table.set("when", at_before) 3467 3468 pivots = self._parse_pivots() 3469 if pivots: 3470 table.set("pivots", pivots) 3471 3472 return table 3473 3474 def _parse_table( 3475 self, 3476 schema: bool = False, 3477 joins: bool = False, 3478 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3479 parse_bracket: bool = False, 3480 is_db_reference: bool = False, 3481 parse_partition: bool = False, 3482 ) -> t.Optional[exp.Expression]: 3483 lateral = self._parse_lateral() 3484 if lateral: 3485 return lateral 3486 3487 unnest = self._parse_unnest() 3488 if unnest: 3489 return unnest 3490 3491 values = self._parse_derived_table_values() 3492 if values: 3493 return values 3494 3495 subquery = self._parse_select(table=True) 3496 if subquery: 3497 if not subquery.args.get("pivots"): 3498 subquery.set("pivots", self._parse_pivots()) 3499 return subquery 3500 3501 bracket = parse_bracket and self._parse_bracket(None) 3502 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3503 3504 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3505 self._parse_table 3506 ) 3507 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3508 3509 only = self._match(TokenType.ONLY) 3510 3511 this = t.cast( 3512 exp.Expression, 3513 bracket 3514 or rows_from 3515 or self._parse_bracket( 3516 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3517 ), 3518 ) 3519 3520 if only: 3521 this.set("only", only) 3522 3523 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3524 self._match_text_seq("*") 3525 3526 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3527 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3528 this.set("partition", self._parse_partition()) 3529 3530 if schema: 3531 return self._parse_schema(this=this) 3532 3533 version = self._parse_version() 3534 3535 if version: 3536 this.set("version", version) 3537 3538 if self.dialect.ALIAS_POST_TABLESAMPLE: 3539 table_sample = self._parse_table_sample() 3540 3541 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3542 if alias: 3543 this.set("alias", alias) 3544 3545 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3546 return self.expression( 3547 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3548 ) 3549 3550 this.set("hints", self._parse_table_hints()) 3551 3552 if not this.args.get("pivots"): 3553 this.set("pivots", self._parse_pivots()) 3554 3555 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3556 table_sample = self._parse_table_sample() 3557 3558 if table_sample: 3559 table_sample.set("this", this) 3560 this = table_sample 3561 3562 if joins: 3563 for join in self._parse_joins(): 3564 this.append("joins", join) 3565 3566 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3567 this.set("ordinality", True) 3568 this.set("alias", self._parse_table_alias()) 3569 3570 return this 3571 3572 def _parse_version(self) -> t.Optional[exp.Version]: 3573 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3574 this = "TIMESTAMP" 3575 elif self._match(TokenType.VERSION_SNAPSHOT): 3576 this = "VERSION" 3577 else: 3578 return None 3579 3580 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3581 kind = self._prev.text.upper() 3582 start = self._parse_bitwise() 3583 self._match_texts(("TO", "AND")) 3584 end = self._parse_bitwise() 3585 expression: t.Optional[exp.Expression] = self.expression( 3586 exp.Tuple, expressions=[start, end] 3587 ) 3588 elif self._match_text_seq("CONTAINED", "IN"): 3589 kind = "CONTAINED IN" 3590 expression = self.expression( 3591 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3592 ) 3593 elif self._match(TokenType.ALL): 3594 kind = "ALL" 3595 expression = None 3596 else: 3597 self._match_text_seq("AS", "OF") 3598 kind = "AS OF" 3599 expression = self._parse_type() 3600 3601 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3602 3603 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3604 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3605 index = self._index 3606 historical_data = None 3607 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3608 this = self._prev.text.upper() 3609 kind = ( 3610 self._match(TokenType.L_PAREN) 3611 and self._match_texts(self.HISTORICAL_DATA_KIND) 3612 and self._prev.text.upper() 3613 ) 3614 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3615 3616 if expression: 3617 self._match_r_paren() 3618 historical_data = self.expression( 3619 exp.HistoricalData, this=this, kind=kind, expression=expression 3620 ) 3621 else: 3622 self._retreat(index) 3623 3624 return historical_data 3625 3626 def _parse_changes(self) -> t.Optional[exp.Changes]: 3627 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3628 return None 3629 3630 information = self._parse_var(any_token=True) 3631 self._match_r_paren() 3632 3633 return self.expression( 3634 exp.Changes, 3635 information=information, 3636 at_before=self._parse_historical_data(), 3637 end=self._parse_historical_data(), 3638 ) 3639 3640 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3641 if not self._match(TokenType.UNNEST): 3642 return None 3643 3644 expressions = self._parse_wrapped_csv(self._parse_equality) 3645 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3646 3647 alias = self._parse_table_alias() if with_alias else None 3648 3649 if alias: 3650 if self.dialect.UNNEST_COLUMN_ONLY: 3651 if alias.args.get("columns"): 3652 self.raise_error("Unexpected extra column alias in unnest.") 3653 3654 alias.set("columns", [alias.this]) 3655 alias.set("this", None) 3656 3657 columns = alias.args.get("columns") or [] 3658 if offset and len(expressions) < len(columns): 3659 offset = columns.pop() 3660 3661 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3662 self._match(TokenType.ALIAS) 3663 offset = self._parse_id_var( 3664 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3665 ) or exp.to_identifier("offset") 3666 3667 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3668 3669 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3670 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3671 if not is_derived and not ( 3672 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3673 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3674 ): 3675 return None 3676 3677 expressions = self._parse_csv(self._parse_value) 3678 alias = self._parse_table_alias() 3679 3680 if is_derived: 3681 self._match_r_paren() 3682 3683 return self.expression( 3684 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3685 ) 3686 3687 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3688 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3689 as_modifier and self._match_text_seq("USING", "SAMPLE") 3690 ): 3691 return None 3692 3693 bucket_numerator = None 3694 bucket_denominator = None 3695 bucket_field = None 3696 percent = None 3697 size = None 3698 seed = None 3699 3700 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3701 matched_l_paren = self._match(TokenType.L_PAREN) 3702 3703 if self.TABLESAMPLE_CSV: 3704 num = None 3705 expressions = self._parse_csv(self._parse_primary) 3706 else: 3707 expressions = None 3708 num = ( 3709 self._parse_factor() 3710 if self._match(TokenType.NUMBER, advance=False) 3711 else self._parse_primary() or self._parse_placeholder() 3712 ) 3713 3714 if self._match_text_seq("BUCKET"): 3715 bucket_numerator = self._parse_number() 3716 self._match_text_seq("OUT", "OF") 3717 bucket_denominator = bucket_denominator = self._parse_number() 3718 self._match(TokenType.ON) 3719 bucket_field = self._parse_field() 3720 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3721 percent = num 3722 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3723 size = num 3724 else: 3725 percent = num 3726 3727 if matched_l_paren: 3728 self._match_r_paren() 3729 3730 if self._match(TokenType.L_PAREN): 3731 method = self._parse_var(upper=True) 3732 seed = self._match(TokenType.COMMA) and self._parse_number() 3733 self._match_r_paren() 3734 elif self._match_texts(("SEED", "REPEATABLE")): 3735 seed = self._parse_wrapped(self._parse_number) 3736 3737 if not method and self.DEFAULT_SAMPLING_METHOD: 3738 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3739 3740 return self.expression( 3741 exp.TableSample, 3742 expressions=expressions, 3743 method=method, 3744 bucket_numerator=bucket_numerator, 3745 bucket_denominator=bucket_denominator, 3746 bucket_field=bucket_field, 3747 percent=percent, 3748 size=size, 3749 seed=seed, 3750 ) 3751 3752 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3753 return list(iter(self._parse_pivot, None)) or None 3754 3755 def _parse_joins(self) -> t.Iterator[exp.Join]: 3756 return iter(self._parse_join, None) 3757 3758 # https://duckdb.org/docs/sql/statements/pivot 3759 def _parse_simplified_pivot(self) -> exp.Pivot: 3760 def _parse_on() -> t.Optional[exp.Expression]: 3761 this = self._parse_bitwise() 3762 return self._parse_in(this) if self._match(TokenType.IN) else this 3763 3764 this = self._parse_table() 3765 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3766 using = self._match(TokenType.USING) and self._parse_csv( 3767 lambda: self._parse_alias(self._parse_function()) 3768 ) 3769 group = self._parse_group() 3770 return self.expression( 3771 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3772 ) 3773 3774 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3775 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3776 this = self._parse_select_or_expression() 3777 3778 self._match(TokenType.ALIAS) 3779 alias = self._parse_field() 3780 if alias: 3781 return self.expression(exp.PivotAlias, this=this, alias=alias) 3782 3783 return this 3784 3785 value = self._parse_column() 3786 3787 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3788 self.raise_error("Expecting IN (") 3789 3790 if self._match(TokenType.ANY): 3791 expr: exp.PivotAny | exp.In = self.expression(exp.PivotAny, this=self._parse_order()) 3792 else: 3793 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3794 expr = self.expression(exp.In, this=value, expressions=aliased_expressions) 3795 3796 self._match_r_paren() 3797 return expr 3798 3799 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3800 index = self._index 3801 include_nulls = None 3802 3803 if self._match(TokenType.PIVOT): 3804 unpivot = False 3805 elif self._match(TokenType.UNPIVOT): 3806 unpivot = True 3807 3808 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3809 if self._match_text_seq("INCLUDE", "NULLS"): 3810 include_nulls = True 3811 elif self._match_text_seq("EXCLUDE", "NULLS"): 3812 include_nulls = False 3813 else: 3814 return None 3815 3816 expressions = [] 3817 3818 if not self._match(TokenType.L_PAREN): 3819 self._retreat(index) 3820 return None 3821 3822 if unpivot: 3823 expressions = self._parse_csv(self._parse_column) 3824 else: 3825 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3826 3827 if not expressions: 3828 self.raise_error("Failed to parse PIVOT's aggregation list") 3829 3830 if not self._match(TokenType.FOR): 3831 self.raise_error("Expecting FOR") 3832 3833 field = self._parse_pivot_in() 3834 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3835 self._parse_bitwise 3836 ) 3837 3838 self._match_r_paren() 3839 3840 pivot = self.expression( 3841 exp.Pivot, 3842 expressions=expressions, 3843 field=field, 3844 unpivot=unpivot, 3845 include_nulls=include_nulls, 3846 default_on_null=default_on_null, 3847 ) 3848 3849 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3850 pivot.set("alias", self._parse_table_alias()) 3851 3852 if not unpivot: 3853 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3854 3855 columns: t.List[exp.Expression] = [] 3856 for fld in pivot.args["field"].expressions: 3857 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3858 for name in names: 3859 if self.PREFIXED_PIVOT_COLUMNS: 3860 name = f"{name}_{field_name}" if name else field_name 3861 else: 3862 name = f"{field_name}_{name}" if name else field_name 3863 3864 columns.append(exp.to_identifier(name)) 3865 3866 pivot.set("columns", columns) 3867 3868 return pivot 3869 3870 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3871 return [agg.alias for agg in aggregations] 3872 3873 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3874 if not skip_where_token and not self._match(TokenType.PREWHERE): 3875 return None 3876 3877 return self.expression( 3878 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3879 ) 3880 3881 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3882 if not skip_where_token and not self._match(TokenType.WHERE): 3883 return None 3884 3885 return self.expression( 3886 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3887 ) 3888 3889 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3890 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3891 return None 3892 3893 elements: t.Dict[str, t.Any] = defaultdict(list) 3894 3895 if self._match(TokenType.ALL): 3896 elements["all"] = True 3897 elif self._match(TokenType.DISTINCT): 3898 elements["all"] = False 3899 3900 while True: 3901 expressions = self._parse_csv( 3902 lambda: None 3903 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 3904 else self._parse_assignment() 3905 ) 3906 if expressions: 3907 elements["expressions"].extend(expressions) 3908 3909 grouping_sets = self._parse_grouping_sets() 3910 if grouping_sets: 3911 elements["grouping_sets"].extend(grouping_sets) 3912 3913 rollup = None 3914 cube = None 3915 totals = None 3916 3917 index = self._index 3918 with_ = self._match(TokenType.WITH) 3919 if self._match(TokenType.ROLLUP): 3920 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3921 elements["rollup"].extend(ensure_list(rollup)) 3922 3923 if self._match(TokenType.CUBE): 3924 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3925 elements["cube"].extend(ensure_list(cube)) 3926 3927 if self._match_text_seq("TOTALS"): 3928 totals = True 3929 elements["totals"] = True # type: ignore 3930 3931 if not (grouping_sets or rollup or cube or totals): 3932 if with_: 3933 self._retreat(index) 3934 break 3935 3936 return self.expression(exp.Group, **elements) # type: ignore 3937 3938 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3939 if not self._match(TokenType.GROUPING_SETS): 3940 return None 3941 3942 return self._parse_wrapped_csv(self._parse_grouping_set) 3943 3944 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3945 if self._match(TokenType.L_PAREN): 3946 grouping_set = self._parse_csv(self._parse_column) 3947 self._match_r_paren() 3948 return self.expression(exp.Tuple, expressions=grouping_set) 3949 3950 return self._parse_column() 3951 3952 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3953 if not skip_having_token and not self._match(TokenType.HAVING): 3954 return None 3955 return self.expression(exp.Having, this=self._parse_assignment()) 3956 3957 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3958 if not self._match(TokenType.QUALIFY): 3959 return None 3960 return self.expression(exp.Qualify, this=self._parse_assignment()) 3961 3962 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3963 if skip_start_token: 3964 start = None 3965 elif self._match(TokenType.START_WITH): 3966 start = self._parse_assignment() 3967 else: 3968 return None 3969 3970 self._match(TokenType.CONNECT_BY) 3971 nocycle = self._match_text_seq("NOCYCLE") 3972 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3973 exp.Prior, this=self._parse_bitwise() 3974 ) 3975 connect = self._parse_assignment() 3976 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3977 3978 if not start and self._match(TokenType.START_WITH): 3979 start = self._parse_assignment() 3980 3981 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3982 3983 def _parse_name_as_expression(self) -> exp.Alias: 3984 return self.expression( 3985 exp.Alias, 3986 alias=self._parse_id_var(any_token=True), 3987 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3988 ) 3989 3990 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3991 if self._match_text_seq("INTERPOLATE"): 3992 return self._parse_wrapped_csv(self._parse_name_as_expression) 3993 return None 3994 3995 def _parse_order( 3996 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3997 ) -> t.Optional[exp.Expression]: 3998 siblings = None 3999 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4000 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4001 return this 4002 4003 siblings = True 4004 4005 return self.expression( 4006 exp.Order, 4007 this=this, 4008 expressions=self._parse_csv(self._parse_ordered), 4009 interpolate=self._parse_interpolate(), 4010 siblings=siblings, 4011 ) 4012 4013 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4014 if not self._match(token): 4015 return None 4016 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4017 4018 def _parse_ordered( 4019 self, parse_method: t.Optional[t.Callable] = None 4020 ) -> t.Optional[exp.Ordered]: 4021 this = parse_method() if parse_method else self._parse_assignment() 4022 if not this: 4023 return None 4024 4025 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4026 this = exp.var("ALL") 4027 4028 asc = self._match(TokenType.ASC) 4029 desc = self._match(TokenType.DESC) or (asc and False) 4030 4031 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4032 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4033 4034 nulls_first = is_nulls_first or False 4035 explicitly_null_ordered = is_nulls_first or is_nulls_last 4036 4037 if ( 4038 not explicitly_null_ordered 4039 and ( 4040 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4041 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4042 ) 4043 and self.dialect.NULL_ORDERING != "nulls_are_last" 4044 ): 4045 nulls_first = True 4046 4047 if self._match_text_seq("WITH", "FILL"): 4048 with_fill = self.expression( 4049 exp.WithFill, 4050 **{ # type: ignore 4051 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4052 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4053 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4054 }, 4055 ) 4056 else: 4057 with_fill = None 4058 4059 return self.expression( 4060 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4061 ) 4062 4063 def _parse_limit( 4064 self, 4065 this: t.Optional[exp.Expression] = None, 4066 top: bool = False, 4067 skip_limit_token: bool = False, 4068 ) -> t.Optional[exp.Expression]: 4069 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4070 comments = self._prev_comments 4071 if top: 4072 limit_paren = self._match(TokenType.L_PAREN) 4073 expression = self._parse_term() if limit_paren else self._parse_number() 4074 4075 if limit_paren: 4076 self._match_r_paren() 4077 else: 4078 expression = self._parse_term() 4079 4080 if self._match(TokenType.COMMA): 4081 offset = expression 4082 expression = self._parse_term() 4083 else: 4084 offset = None 4085 4086 limit_exp = self.expression( 4087 exp.Limit, 4088 this=this, 4089 expression=expression, 4090 offset=offset, 4091 comments=comments, 4092 expressions=self._parse_limit_by(), 4093 ) 4094 4095 return limit_exp 4096 4097 if self._match(TokenType.FETCH): 4098 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4099 direction = self._prev.text.upper() if direction else "FIRST" 4100 4101 count = self._parse_field(tokens=self.FETCH_TOKENS) 4102 percent = self._match(TokenType.PERCENT) 4103 4104 self._match_set((TokenType.ROW, TokenType.ROWS)) 4105 4106 only = self._match_text_seq("ONLY") 4107 with_ties = self._match_text_seq("WITH", "TIES") 4108 4109 if only and with_ties: 4110 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4111 4112 return self.expression( 4113 exp.Fetch, 4114 direction=direction, 4115 count=count, 4116 percent=percent, 4117 with_ties=with_ties, 4118 ) 4119 4120 return this 4121 4122 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4123 if not self._match(TokenType.OFFSET): 4124 return this 4125 4126 count = self._parse_term() 4127 self._match_set((TokenType.ROW, TokenType.ROWS)) 4128 4129 return self.expression( 4130 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4131 ) 4132 4133 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4134 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4135 4136 def _parse_locks(self) -> t.List[exp.Lock]: 4137 locks = [] 4138 while True: 4139 if self._match_text_seq("FOR", "UPDATE"): 4140 update = True 4141 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4142 "LOCK", "IN", "SHARE", "MODE" 4143 ): 4144 update = False 4145 else: 4146 break 4147 4148 expressions = None 4149 if self._match_text_seq("OF"): 4150 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4151 4152 wait: t.Optional[bool | exp.Expression] = None 4153 if self._match_text_seq("NOWAIT"): 4154 wait = True 4155 elif self._match_text_seq("WAIT"): 4156 wait = self._parse_primary() 4157 elif self._match_text_seq("SKIP", "LOCKED"): 4158 wait = False 4159 4160 locks.append( 4161 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4162 ) 4163 4164 return locks 4165 4166 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4167 while this and self._match_set(self.SET_OPERATIONS): 4168 token_type = self._prev.token_type 4169 4170 if token_type == TokenType.UNION: 4171 operation: t.Type[exp.SetOperation] = exp.Union 4172 elif token_type == TokenType.EXCEPT: 4173 operation = exp.Except 4174 else: 4175 operation = exp.Intersect 4176 4177 comments = self._prev.comments 4178 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 4179 by_name = self._match_text_seq("BY", "NAME") 4180 expression = self._parse_select(nested=True, parse_set_operation=False) 4181 4182 this = self.expression( 4183 operation, 4184 comments=comments, 4185 this=this, 4186 distinct=distinct, 4187 by_name=by_name, 4188 expression=expression, 4189 ) 4190 4191 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4192 expression = this.expression 4193 4194 if expression: 4195 for arg in self.SET_OP_MODIFIERS: 4196 expr = expression.args.get(arg) 4197 if expr: 4198 this.set(arg, expr.pop()) 4199 4200 return this 4201 4202 def _parse_expression(self) -> t.Optional[exp.Expression]: 4203 return self._parse_alias(self._parse_assignment()) 4204 4205 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4206 this = self._parse_disjunction() 4207 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4208 # This allows us to parse <non-identifier token> := <expr> 4209 this = exp.column( 4210 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4211 ) 4212 4213 while self._match_set(self.ASSIGNMENT): 4214 this = self.expression( 4215 self.ASSIGNMENT[self._prev.token_type], 4216 this=this, 4217 comments=self._prev_comments, 4218 expression=self._parse_assignment(), 4219 ) 4220 4221 return this 4222 4223 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4224 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4225 4226 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4227 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4228 4229 def _parse_equality(self) -> t.Optional[exp.Expression]: 4230 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4231 4232 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4233 return self._parse_tokens(self._parse_range, self.COMPARISON) 4234 4235 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4236 this = this or self._parse_bitwise() 4237 negate = self._match(TokenType.NOT) 4238 4239 if self._match_set(self.RANGE_PARSERS): 4240 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4241 if not expression: 4242 return this 4243 4244 this = expression 4245 elif self._match(TokenType.ISNULL): 4246 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4247 4248 # Postgres supports ISNULL and NOTNULL for conditions. 4249 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4250 if self._match(TokenType.NOTNULL): 4251 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4252 this = self.expression(exp.Not, this=this) 4253 4254 if negate: 4255 this = self._negate_range(this) 4256 4257 if self._match(TokenType.IS): 4258 this = self._parse_is(this) 4259 4260 return this 4261 4262 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4263 if not this: 4264 return this 4265 4266 return self.expression(exp.Not, this=this) 4267 4268 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4269 index = self._index - 1 4270 negate = self._match(TokenType.NOT) 4271 4272 if self._match_text_seq("DISTINCT", "FROM"): 4273 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4274 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4275 4276 expression = self._parse_null() or self._parse_boolean() 4277 if not expression: 4278 self._retreat(index) 4279 return None 4280 4281 this = self.expression(exp.Is, this=this, expression=expression) 4282 return self.expression(exp.Not, this=this) if negate else this 4283 4284 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4285 unnest = self._parse_unnest(with_alias=False) 4286 if unnest: 4287 this = self.expression(exp.In, this=this, unnest=unnest) 4288 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4289 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4290 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4291 4292 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4293 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4294 else: 4295 this = self.expression(exp.In, this=this, expressions=expressions) 4296 4297 if matched_l_paren: 4298 self._match_r_paren(this) 4299 elif not self._match(TokenType.R_BRACKET, expression=this): 4300 self.raise_error("Expecting ]") 4301 else: 4302 this = self.expression(exp.In, this=this, field=self._parse_field()) 4303 4304 return this 4305 4306 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4307 low = self._parse_bitwise() 4308 self._match(TokenType.AND) 4309 high = self._parse_bitwise() 4310 return self.expression(exp.Between, this=this, low=low, high=high) 4311 4312 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4313 if not self._match(TokenType.ESCAPE): 4314 return this 4315 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4316 4317 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4318 index = self._index 4319 4320 if not self._match(TokenType.INTERVAL) and match_interval: 4321 return None 4322 4323 if self._match(TokenType.STRING, advance=False): 4324 this = self._parse_primary() 4325 else: 4326 this = self._parse_term() 4327 4328 if not this or ( 4329 isinstance(this, exp.Column) 4330 and not this.table 4331 and not this.this.quoted 4332 and this.name.upper() == "IS" 4333 ): 4334 self._retreat(index) 4335 return None 4336 4337 unit = self._parse_function() or ( 4338 not self._match(TokenType.ALIAS, advance=False) 4339 and self._parse_var(any_token=True, upper=True) 4340 ) 4341 4342 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4343 # each INTERVAL expression into this canonical form so it's easy to transpile 4344 if this and this.is_number: 4345 this = exp.Literal.string(this.to_py()) 4346 elif this and this.is_string: 4347 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4348 if len(parts) == 1: 4349 if unit: 4350 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4351 self._retreat(self._index - 1) 4352 4353 this = exp.Literal.string(parts[0][0]) 4354 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4355 4356 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4357 unit = self.expression( 4358 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4359 ) 4360 4361 interval = self.expression(exp.Interval, this=this, unit=unit) 4362 4363 index = self._index 4364 self._match(TokenType.PLUS) 4365 4366 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4367 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4368 return self.expression( 4369 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4370 ) 4371 4372 self._retreat(index) 4373 return interval 4374 4375 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4376 this = self._parse_term() 4377 4378 while True: 4379 if self._match_set(self.BITWISE): 4380 this = self.expression( 4381 self.BITWISE[self._prev.token_type], 4382 this=this, 4383 expression=self._parse_term(), 4384 ) 4385 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4386 this = self.expression( 4387 exp.DPipe, 4388 this=this, 4389 expression=self._parse_term(), 4390 safe=not self.dialect.STRICT_STRING_CONCAT, 4391 ) 4392 elif self._match(TokenType.DQMARK): 4393 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4394 elif self._match_pair(TokenType.LT, TokenType.LT): 4395 this = self.expression( 4396 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4397 ) 4398 elif self._match_pair(TokenType.GT, TokenType.GT): 4399 this = self.expression( 4400 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4401 ) 4402 else: 4403 break 4404 4405 return this 4406 4407 def _parse_term(self) -> t.Optional[exp.Expression]: 4408 this = self._parse_factor() 4409 4410 while self._match_set(self.TERM): 4411 klass = self.TERM[self._prev.token_type] 4412 comments = self._prev_comments 4413 expression = self._parse_factor() 4414 4415 this = self.expression(klass, this=this, comments=comments, expression=expression) 4416 4417 if isinstance(this, exp.Collate): 4418 expr = this.expression 4419 4420 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4421 # fallback to Identifier / Var 4422 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4423 ident = expr.this 4424 if isinstance(ident, exp.Identifier): 4425 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4426 4427 return this 4428 4429 def _parse_factor(self) -> t.Optional[exp.Expression]: 4430 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4431 this = parse_method() 4432 4433 while self._match_set(self.FACTOR): 4434 klass = self.FACTOR[self._prev.token_type] 4435 comments = self._prev_comments 4436 expression = parse_method() 4437 4438 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4439 self._retreat(self._index - 1) 4440 return this 4441 4442 this = self.expression(klass, this=this, comments=comments, expression=expression) 4443 4444 if isinstance(this, exp.Div): 4445 this.args["typed"] = self.dialect.TYPED_DIVISION 4446 this.args["safe"] = self.dialect.SAFE_DIVISION 4447 4448 return this 4449 4450 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4451 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4452 4453 def _parse_unary(self) -> t.Optional[exp.Expression]: 4454 if self._match_set(self.UNARY_PARSERS): 4455 return self.UNARY_PARSERS[self._prev.token_type](self) 4456 return self._parse_at_time_zone(self._parse_type()) 4457 4458 def _parse_type( 4459 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4460 ) -> t.Optional[exp.Expression]: 4461 interval = parse_interval and self._parse_interval() 4462 if interval: 4463 return interval 4464 4465 index = self._index 4466 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4467 4468 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4469 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4470 if isinstance(data_type, exp.Cast): 4471 # This constructor can contain ops directly after it, for instance struct unnesting: 4472 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4473 return self._parse_column_ops(data_type) 4474 4475 if data_type: 4476 index2 = self._index 4477 this = self._parse_primary() 4478 4479 if isinstance(this, exp.Literal): 4480 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4481 if parser: 4482 return parser(self, this, data_type) 4483 4484 return self.expression(exp.Cast, this=this, to=data_type) 4485 4486 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4487 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4488 # 4489 # If the index difference here is greater than 1, that means the parser itself must have 4490 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4491 # 4492 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4493 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4494 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4495 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4496 # 4497 # In these cases, we don't really want to return the converted type, but instead retreat 4498 # and try to parse a Column or Identifier in the section below. 4499 if data_type.expressions and index2 - index > 1: 4500 self._retreat(index2) 4501 return self._parse_column_ops(data_type) 4502 4503 self._retreat(index) 4504 4505 if fallback_to_identifier: 4506 return self._parse_id_var() 4507 4508 this = self._parse_column() 4509 return this and self._parse_column_ops(this) 4510 4511 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4512 this = self._parse_type() 4513 if not this: 4514 return None 4515 4516 if isinstance(this, exp.Column) and not this.table: 4517 this = exp.var(this.name.upper()) 4518 4519 return self.expression( 4520 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4521 ) 4522 4523 def _parse_types( 4524 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4525 ) -> t.Optional[exp.Expression]: 4526 index = self._index 4527 4528 this: t.Optional[exp.Expression] = None 4529 prefix = self._match_text_seq("SYSUDTLIB", ".") 4530 4531 if not self._match_set(self.TYPE_TOKENS): 4532 identifier = allow_identifiers and self._parse_id_var( 4533 any_token=False, tokens=(TokenType.VAR,) 4534 ) 4535 if isinstance(identifier, exp.Identifier): 4536 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4537 4538 if len(tokens) != 1: 4539 self.raise_error("Unexpected identifier", self._prev) 4540 4541 if tokens[0].token_type in self.TYPE_TOKENS: 4542 self._prev = tokens[0] 4543 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4544 type_name = identifier.name 4545 4546 while self._match(TokenType.DOT): 4547 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4548 4549 this = exp.DataType.build(type_name, udt=True) 4550 else: 4551 self._retreat(self._index - 1) 4552 return None 4553 else: 4554 return None 4555 4556 type_token = self._prev.token_type 4557 4558 if type_token == TokenType.PSEUDO_TYPE: 4559 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4560 4561 if type_token == TokenType.OBJECT_IDENTIFIER: 4562 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4563 4564 # https://materialize.com/docs/sql/types/map/ 4565 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4566 key_type = self._parse_types( 4567 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4568 ) 4569 if not self._match(TokenType.FARROW): 4570 self._retreat(index) 4571 return None 4572 4573 value_type = self._parse_types( 4574 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4575 ) 4576 if not self._match(TokenType.R_BRACKET): 4577 self._retreat(index) 4578 return None 4579 4580 return exp.DataType( 4581 this=exp.DataType.Type.MAP, 4582 expressions=[key_type, value_type], 4583 nested=True, 4584 prefix=prefix, 4585 ) 4586 4587 nested = type_token in self.NESTED_TYPE_TOKENS 4588 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4589 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4590 expressions = None 4591 maybe_func = False 4592 4593 if self._match(TokenType.L_PAREN): 4594 if is_struct: 4595 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4596 elif nested: 4597 expressions = self._parse_csv( 4598 lambda: self._parse_types( 4599 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4600 ) 4601 ) 4602 elif type_token in self.ENUM_TYPE_TOKENS: 4603 expressions = self._parse_csv(self._parse_equality) 4604 elif is_aggregate: 4605 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4606 any_token=False, tokens=(TokenType.VAR,) 4607 ) 4608 if not func_or_ident or not self._match(TokenType.COMMA): 4609 return None 4610 expressions = self._parse_csv( 4611 lambda: self._parse_types( 4612 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4613 ) 4614 ) 4615 expressions.insert(0, func_or_ident) 4616 else: 4617 expressions = self._parse_csv(self._parse_type_size) 4618 4619 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4620 if type_token == TokenType.VECTOR and len(expressions) == 2: 4621 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4622 4623 if not expressions or not self._match(TokenType.R_PAREN): 4624 self._retreat(index) 4625 return None 4626 4627 maybe_func = True 4628 4629 values: t.Optional[t.List[exp.Expression]] = None 4630 4631 if nested and self._match(TokenType.LT): 4632 if is_struct: 4633 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4634 else: 4635 expressions = self._parse_csv( 4636 lambda: self._parse_types( 4637 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4638 ) 4639 ) 4640 4641 if not self._match(TokenType.GT): 4642 self.raise_error("Expecting >") 4643 4644 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4645 values = self._parse_csv(self._parse_assignment) 4646 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4647 4648 if type_token in self.TIMESTAMPS: 4649 if self._match_text_seq("WITH", "TIME", "ZONE"): 4650 maybe_func = False 4651 tz_type = ( 4652 exp.DataType.Type.TIMETZ 4653 if type_token in self.TIMES 4654 else exp.DataType.Type.TIMESTAMPTZ 4655 ) 4656 this = exp.DataType(this=tz_type, expressions=expressions) 4657 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4658 maybe_func = False 4659 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4660 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4661 maybe_func = False 4662 elif type_token == TokenType.INTERVAL: 4663 unit = self._parse_var(upper=True) 4664 if unit: 4665 if self._match_text_seq("TO"): 4666 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4667 4668 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4669 else: 4670 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4671 4672 if maybe_func and check_func: 4673 index2 = self._index 4674 peek = self._parse_string() 4675 4676 if not peek: 4677 self._retreat(index) 4678 return None 4679 4680 self._retreat(index2) 4681 4682 if not this: 4683 if self._match_text_seq("UNSIGNED"): 4684 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4685 if not unsigned_type_token: 4686 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4687 4688 type_token = unsigned_type_token or type_token 4689 4690 this = exp.DataType( 4691 this=exp.DataType.Type[type_token.value], 4692 expressions=expressions, 4693 nested=nested, 4694 prefix=prefix, 4695 ) 4696 4697 # Empty arrays/structs are allowed 4698 if values is not None: 4699 cls = exp.Struct if is_struct else exp.Array 4700 this = exp.cast(cls(expressions=values), this, copy=False) 4701 4702 elif expressions: 4703 this.set("expressions", expressions) 4704 4705 # https://materialize.com/docs/sql/types/list/#type-name 4706 while self._match(TokenType.LIST): 4707 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4708 4709 index = self._index 4710 4711 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4712 matched_array = self._match(TokenType.ARRAY) 4713 4714 while self._curr: 4715 datatype_token = self._prev.token_type 4716 matched_l_bracket = self._match(TokenType.L_BRACKET) 4717 if not matched_l_bracket and not matched_array: 4718 break 4719 4720 matched_array = False 4721 values = self._parse_csv(self._parse_assignment) or None 4722 if ( 4723 values 4724 and not schema 4725 and ( 4726 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4727 ) 4728 ): 4729 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4730 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4731 self._retreat(index) 4732 break 4733 4734 this = exp.DataType( 4735 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4736 ) 4737 self._match(TokenType.R_BRACKET) 4738 4739 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4740 converter = self.TYPE_CONVERTERS.get(this.this) 4741 if converter: 4742 this = converter(t.cast(exp.DataType, this)) 4743 4744 return this 4745 4746 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4747 index = self._index 4748 4749 if ( 4750 self._curr 4751 and self._next 4752 and self._curr.token_type in self.TYPE_TOKENS 4753 and self._next.token_type in self.TYPE_TOKENS 4754 ): 4755 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4756 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4757 this = self._parse_id_var() 4758 else: 4759 this = ( 4760 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4761 or self._parse_id_var() 4762 ) 4763 4764 self._match(TokenType.COLON) 4765 4766 if ( 4767 type_required 4768 and not isinstance(this, exp.DataType) 4769 and not self._match_set(self.TYPE_TOKENS, advance=False) 4770 ): 4771 self._retreat(index) 4772 return self._parse_types() 4773 4774 return self._parse_column_def(this) 4775 4776 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4777 if not self._match_text_seq("AT", "TIME", "ZONE"): 4778 return this 4779 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4780 4781 def _parse_column(self) -> t.Optional[exp.Expression]: 4782 this = self._parse_column_reference() 4783 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4784 4785 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4786 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4787 4788 return column 4789 4790 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4791 this = self._parse_field() 4792 if ( 4793 not this 4794 and self._match(TokenType.VALUES, advance=False) 4795 and self.VALUES_FOLLOWED_BY_PAREN 4796 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4797 ): 4798 this = self._parse_id_var() 4799 4800 if isinstance(this, exp.Identifier): 4801 # We bubble up comments from the Identifier to the Column 4802 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4803 4804 return this 4805 4806 def _parse_colon_as_variant_extract( 4807 self, this: t.Optional[exp.Expression] 4808 ) -> t.Optional[exp.Expression]: 4809 casts = [] 4810 json_path = [] 4811 4812 while self._match(TokenType.COLON): 4813 start_index = self._index 4814 4815 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4816 path = self._parse_column_ops( 4817 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4818 ) 4819 4820 # The cast :: operator has a lower precedence than the extraction operator :, so 4821 # we rearrange the AST appropriately to avoid casting the JSON path 4822 while isinstance(path, exp.Cast): 4823 casts.append(path.to) 4824 path = path.this 4825 4826 if casts: 4827 dcolon_offset = next( 4828 i 4829 for i, t in enumerate(self._tokens[start_index:]) 4830 if t.token_type == TokenType.DCOLON 4831 ) 4832 end_token = self._tokens[start_index + dcolon_offset - 1] 4833 else: 4834 end_token = self._prev 4835 4836 if path: 4837 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4838 4839 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 4840 # Databricks transforms it back to the colon/dot notation 4841 if json_path: 4842 this = self.expression( 4843 exp.JSONExtract, 4844 this=this, 4845 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4846 variant_extract=True, 4847 ) 4848 4849 while casts: 4850 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4851 4852 return this 4853 4854 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4855 return self._parse_types() 4856 4857 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4858 this = self._parse_bracket(this) 4859 4860 while self._match_set(self.COLUMN_OPERATORS): 4861 op_token = self._prev.token_type 4862 op = self.COLUMN_OPERATORS.get(op_token) 4863 4864 if op_token == TokenType.DCOLON: 4865 field = self._parse_dcolon() 4866 if not field: 4867 self.raise_error("Expected type") 4868 elif op and self._curr: 4869 field = self._parse_column_reference() 4870 else: 4871 field = self._parse_field(any_token=True, anonymous_func=True) 4872 4873 if isinstance(field, exp.Func) and this: 4874 # bigquery allows function calls like x.y.count(...) 4875 # SAFE.SUBSTR(...) 4876 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4877 this = exp.replace_tree( 4878 this, 4879 lambda n: ( 4880 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4881 if n.table 4882 else n.this 4883 ) 4884 if isinstance(n, exp.Column) 4885 else n, 4886 ) 4887 4888 if op: 4889 this = op(self, this, field) 4890 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4891 this = self.expression( 4892 exp.Column, 4893 this=field, 4894 table=this.this, 4895 db=this.args.get("table"), 4896 catalog=this.args.get("db"), 4897 ) 4898 else: 4899 this = self.expression(exp.Dot, this=this, expression=field) 4900 4901 this = self._parse_bracket(this) 4902 4903 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 4904 4905 def _parse_primary(self) -> t.Optional[exp.Expression]: 4906 if self._match_set(self.PRIMARY_PARSERS): 4907 token_type = self._prev.token_type 4908 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4909 4910 if token_type == TokenType.STRING: 4911 expressions = [primary] 4912 while self._match(TokenType.STRING): 4913 expressions.append(exp.Literal.string(self._prev.text)) 4914 4915 if len(expressions) > 1: 4916 return self.expression(exp.Concat, expressions=expressions) 4917 4918 return primary 4919 4920 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4921 return exp.Literal.number(f"0.{self._prev.text}") 4922 4923 if self._match(TokenType.L_PAREN): 4924 comments = self._prev_comments 4925 query = self._parse_select() 4926 4927 if query: 4928 expressions = [query] 4929 else: 4930 expressions = self._parse_expressions() 4931 4932 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4933 4934 if not this and self._match(TokenType.R_PAREN, advance=False): 4935 this = self.expression(exp.Tuple) 4936 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4937 this = self._parse_subquery(this=this, parse_alias=False) 4938 elif isinstance(this, exp.Subquery): 4939 this = self._parse_subquery( 4940 this=self._parse_set_operations(this), parse_alias=False 4941 ) 4942 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4943 this = self.expression(exp.Tuple, expressions=expressions) 4944 else: 4945 this = self.expression(exp.Paren, this=this) 4946 4947 if this: 4948 this.add_comments(comments) 4949 4950 self._match_r_paren(expression=this) 4951 return this 4952 4953 return None 4954 4955 def _parse_field( 4956 self, 4957 any_token: bool = False, 4958 tokens: t.Optional[t.Collection[TokenType]] = None, 4959 anonymous_func: bool = False, 4960 ) -> t.Optional[exp.Expression]: 4961 if anonymous_func: 4962 field = ( 4963 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4964 or self._parse_primary() 4965 ) 4966 else: 4967 field = self._parse_primary() or self._parse_function( 4968 anonymous=anonymous_func, any_token=any_token 4969 ) 4970 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4971 4972 def _parse_function( 4973 self, 4974 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4975 anonymous: bool = False, 4976 optional_parens: bool = True, 4977 any_token: bool = False, 4978 ) -> t.Optional[exp.Expression]: 4979 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4980 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4981 fn_syntax = False 4982 if ( 4983 self._match(TokenType.L_BRACE, advance=False) 4984 and self._next 4985 and self._next.text.upper() == "FN" 4986 ): 4987 self._advance(2) 4988 fn_syntax = True 4989 4990 func = self._parse_function_call( 4991 functions=functions, 4992 anonymous=anonymous, 4993 optional_parens=optional_parens, 4994 any_token=any_token, 4995 ) 4996 4997 if fn_syntax: 4998 self._match(TokenType.R_BRACE) 4999 5000 return func 5001 5002 def _parse_function_call( 5003 self, 5004 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5005 anonymous: bool = False, 5006 optional_parens: bool = True, 5007 any_token: bool = False, 5008 ) -> t.Optional[exp.Expression]: 5009 if not self._curr: 5010 return None 5011 5012 comments = self._curr.comments 5013 token_type = self._curr.token_type 5014 this = self._curr.text 5015 upper = this.upper() 5016 5017 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5018 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5019 self._advance() 5020 return self._parse_window(parser(self)) 5021 5022 if not self._next or self._next.token_type != TokenType.L_PAREN: 5023 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5024 self._advance() 5025 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5026 5027 return None 5028 5029 if any_token: 5030 if token_type in self.RESERVED_TOKENS: 5031 return None 5032 elif token_type not in self.FUNC_TOKENS: 5033 return None 5034 5035 self._advance(2) 5036 5037 parser = self.FUNCTION_PARSERS.get(upper) 5038 if parser and not anonymous: 5039 this = parser(self) 5040 else: 5041 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5042 5043 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5044 this = self.expression(subquery_predicate, this=self._parse_select()) 5045 self._match_r_paren() 5046 return this 5047 5048 if functions is None: 5049 functions = self.FUNCTIONS 5050 5051 function = functions.get(upper) 5052 5053 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5054 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5055 5056 if alias: 5057 args = self._kv_to_prop_eq(args) 5058 5059 if function and not anonymous: 5060 if "dialect" in function.__code__.co_varnames: 5061 func = function(args, dialect=self.dialect) 5062 else: 5063 func = function(args) 5064 5065 func = self.validate_expression(func, args) 5066 if not self.dialect.NORMALIZE_FUNCTIONS: 5067 func.meta["name"] = this 5068 5069 this = func 5070 else: 5071 if token_type == TokenType.IDENTIFIER: 5072 this = exp.Identifier(this=this, quoted=True) 5073 this = self.expression(exp.Anonymous, this=this, expressions=args) 5074 5075 if isinstance(this, exp.Expression): 5076 this.add_comments(comments) 5077 5078 self._match_r_paren(this) 5079 return self._parse_window(this) 5080 5081 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5082 transformed = [] 5083 5084 for e in expressions: 5085 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5086 if isinstance(e, exp.Alias): 5087 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5088 5089 if not isinstance(e, exp.PropertyEQ): 5090 e = self.expression( 5091 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5092 ) 5093 5094 if isinstance(e.this, exp.Column): 5095 e.this.replace(e.this.this) 5096 5097 transformed.append(e) 5098 5099 return transformed 5100 5101 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5102 return self._parse_column_def(self._parse_id_var()) 5103 5104 def _parse_user_defined_function( 5105 self, kind: t.Optional[TokenType] = None 5106 ) -> t.Optional[exp.Expression]: 5107 this = self._parse_id_var() 5108 5109 while self._match(TokenType.DOT): 5110 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5111 5112 if not self._match(TokenType.L_PAREN): 5113 return this 5114 5115 expressions = self._parse_csv(self._parse_function_parameter) 5116 self._match_r_paren() 5117 return self.expression( 5118 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5119 ) 5120 5121 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5122 literal = self._parse_primary() 5123 if literal: 5124 return self.expression(exp.Introducer, this=token.text, expression=literal) 5125 5126 return self.expression(exp.Identifier, this=token.text) 5127 5128 def _parse_session_parameter(self) -> exp.SessionParameter: 5129 kind = None 5130 this = self._parse_id_var() or self._parse_primary() 5131 5132 if this and self._match(TokenType.DOT): 5133 kind = this.name 5134 this = self._parse_var() or self._parse_primary() 5135 5136 return self.expression(exp.SessionParameter, this=this, kind=kind) 5137 5138 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5139 return self._parse_id_var() 5140 5141 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5142 index = self._index 5143 5144 if self._match(TokenType.L_PAREN): 5145 expressions = t.cast( 5146 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5147 ) 5148 5149 if not self._match(TokenType.R_PAREN): 5150 self._retreat(index) 5151 else: 5152 expressions = [self._parse_lambda_arg()] 5153 5154 if self._match_set(self.LAMBDAS): 5155 return self.LAMBDAS[self._prev.token_type](self, expressions) 5156 5157 self._retreat(index) 5158 5159 this: t.Optional[exp.Expression] 5160 5161 if self._match(TokenType.DISTINCT): 5162 this = self.expression( 5163 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5164 ) 5165 else: 5166 this = self._parse_select_or_expression(alias=alias) 5167 5168 return self._parse_limit( 5169 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5170 ) 5171 5172 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5173 index = self._index 5174 if not self._match(TokenType.L_PAREN): 5175 return this 5176 5177 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5178 # expr can be of both types 5179 if self._match_set(self.SELECT_START_TOKENS): 5180 self._retreat(index) 5181 return this 5182 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5183 self._match_r_paren() 5184 return self.expression(exp.Schema, this=this, expressions=args) 5185 5186 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5187 return self._parse_column_def(self._parse_field(any_token=True)) 5188 5189 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5190 # column defs are not really columns, they're identifiers 5191 if isinstance(this, exp.Column): 5192 this = this.this 5193 5194 kind = self._parse_types(schema=True) 5195 5196 if self._match_text_seq("FOR", "ORDINALITY"): 5197 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5198 5199 constraints: t.List[exp.Expression] = [] 5200 5201 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5202 ("ALIAS", "MATERIALIZED") 5203 ): 5204 persisted = self._prev.text.upper() == "MATERIALIZED" 5205 constraints.append( 5206 self.expression( 5207 exp.ComputedColumnConstraint, 5208 this=self._parse_assignment(), 5209 persisted=persisted or self._match_text_seq("PERSISTED"), 5210 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5211 ) 5212 ) 5213 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5214 self._match(TokenType.ALIAS) 5215 constraints.append( 5216 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 5217 ) 5218 5219 while True: 5220 constraint = self._parse_column_constraint() 5221 if not constraint: 5222 break 5223 constraints.append(constraint) 5224 5225 if not kind and not constraints: 5226 return this 5227 5228 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5229 5230 def _parse_auto_increment( 5231 self, 5232 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5233 start = None 5234 increment = None 5235 5236 if self._match(TokenType.L_PAREN, advance=False): 5237 args = self._parse_wrapped_csv(self._parse_bitwise) 5238 start = seq_get(args, 0) 5239 increment = seq_get(args, 1) 5240 elif self._match_text_seq("START"): 5241 start = self._parse_bitwise() 5242 self._match_text_seq("INCREMENT") 5243 increment = self._parse_bitwise() 5244 5245 if start and increment: 5246 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5247 5248 return exp.AutoIncrementColumnConstraint() 5249 5250 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5251 if not self._match_text_seq("REFRESH"): 5252 self._retreat(self._index - 1) 5253 return None 5254 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5255 5256 def _parse_compress(self) -> exp.CompressColumnConstraint: 5257 if self._match(TokenType.L_PAREN, advance=False): 5258 return self.expression( 5259 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5260 ) 5261 5262 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5263 5264 def _parse_generated_as_identity( 5265 self, 5266 ) -> ( 5267 exp.GeneratedAsIdentityColumnConstraint 5268 | exp.ComputedColumnConstraint 5269 | exp.GeneratedAsRowColumnConstraint 5270 ): 5271 if self._match_text_seq("BY", "DEFAULT"): 5272 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5273 this = self.expression( 5274 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5275 ) 5276 else: 5277 self._match_text_seq("ALWAYS") 5278 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5279 5280 self._match(TokenType.ALIAS) 5281 5282 if self._match_text_seq("ROW"): 5283 start = self._match_text_seq("START") 5284 if not start: 5285 self._match(TokenType.END) 5286 hidden = self._match_text_seq("HIDDEN") 5287 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5288 5289 identity = self._match_text_seq("IDENTITY") 5290 5291 if self._match(TokenType.L_PAREN): 5292 if self._match(TokenType.START_WITH): 5293 this.set("start", self._parse_bitwise()) 5294 if self._match_text_seq("INCREMENT", "BY"): 5295 this.set("increment", self._parse_bitwise()) 5296 if self._match_text_seq("MINVALUE"): 5297 this.set("minvalue", self._parse_bitwise()) 5298 if self._match_text_seq("MAXVALUE"): 5299 this.set("maxvalue", self._parse_bitwise()) 5300 5301 if self._match_text_seq("CYCLE"): 5302 this.set("cycle", True) 5303 elif self._match_text_seq("NO", "CYCLE"): 5304 this.set("cycle", False) 5305 5306 if not identity: 5307 this.set("expression", self._parse_range()) 5308 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5309 args = self._parse_csv(self._parse_bitwise) 5310 this.set("start", seq_get(args, 0)) 5311 this.set("increment", seq_get(args, 1)) 5312 5313 self._match_r_paren() 5314 5315 return this 5316 5317 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5318 self._match_text_seq("LENGTH") 5319 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5320 5321 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5322 if self._match_text_seq("NULL"): 5323 return self.expression(exp.NotNullColumnConstraint) 5324 if self._match_text_seq("CASESPECIFIC"): 5325 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5326 if self._match_text_seq("FOR", "REPLICATION"): 5327 return self.expression(exp.NotForReplicationColumnConstraint) 5328 return None 5329 5330 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5331 if self._match(TokenType.CONSTRAINT): 5332 this = self._parse_id_var() 5333 else: 5334 this = None 5335 5336 if self._match_texts(self.CONSTRAINT_PARSERS): 5337 return self.expression( 5338 exp.ColumnConstraint, 5339 this=this, 5340 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5341 ) 5342 5343 return this 5344 5345 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5346 if not self._match(TokenType.CONSTRAINT): 5347 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5348 5349 return self.expression( 5350 exp.Constraint, 5351 this=self._parse_id_var(), 5352 expressions=self._parse_unnamed_constraints(), 5353 ) 5354 5355 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5356 constraints = [] 5357 while True: 5358 constraint = self._parse_unnamed_constraint() or self._parse_function() 5359 if not constraint: 5360 break 5361 constraints.append(constraint) 5362 5363 return constraints 5364 5365 def _parse_unnamed_constraint( 5366 self, constraints: t.Optional[t.Collection[str]] = None 5367 ) -> t.Optional[exp.Expression]: 5368 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5369 constraints or self.CONSTRAINT_PARSERS 5370 ): 5371 return None 5372 5373 constraint = self._prev.text.upper() 5374 if constraint not in self.CONSTRAINT_PARSERS: 5375 self.raise_error(f"No parser found for schema constraint {constraint}.") 5376 5377 return self.CONSTRAINT_PARSERS[constraint](self) 5378 5379 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5380 return self._parse_id_var(any_token=False) 5381 5382 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5383 self._match_text_seq("KEY") 5384 return self.expression( 5385 exp.UniqueColumnConstraint, 5386 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5387 this=self._parse_schema(self._parse_unique_key()), 5388 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5389 on_conflict=self._parse_on_conflict(), 5390 ) 5391 5392 def _parse_key_constraint_options(self) -> t.List[str]: 5393 options = [] 5394 while True: 5395 if not self._curr: 5396 break 5397 5398 if self._match(TokenType.ON): 5399 action = None 5400 on = self._advance_any() and self._prev.text 5401 5402 if self._match_text_seq("NO", "ACTION"): 5403 action = "NO ACTION" 5404 elif self._match_text_seq("CASCADE"): 5405 action = "CASCADE" 5406 elif self._match_text_seq("RESTRICT"): 5407 action = "RESTRICT" 5408 elif self._match_pair(TokenType.SET, TokenType.NULL): 5409 action = "SET NULL" 5410 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5411 action = "SET DEFAULT" 5412 else: 5413 self.raise_error("Invalid key constraint") 5414 5415 options.append(f"ON {on} {action}") 5416 else: 5417 var = self._parse_var_from_options( 5418 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5419 ) 5420 if not var: 5421 break 5422 options.append(var.name) 5423 5424 return options 5425 5426 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5427 if match and not self._match(TokenType.REFERENCES): 5428 return None 5429 5430 expressions = None 5431 this = self._parse_table(schema=True) 5432 options = self._parse_key_constraint_options() 5433 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5434 5435 def _parse_foreign_key(self) -> exp.ForeignKey: 5436 expressions = self._parse_wrapped_id_vars() 5437 reference = self._parse_references() 5438 options = {} 5439 5440 while self._match(TokenType.ON): 5441 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5442 self.raise_error("Expected DELETE or UPDATE") 5443 5444 kind = self._prev.text.lower() 5445 5446 if self._match_text_seq("NO", "ACTION"): 5447 action = "NO ACTION" 5448 elif self._match(TokenType.SET): 5449 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5450 action = "SET " + self._prev.text.upper() 5451 else: 5452 self._advance() 5453 action = self._prev.text.upper() 5454 5455 options[kind] = action 5456 5457 return self.expression( 5458 exp.ForeignKey, 5459 expressions=expressions, 5460 reference=reference, 5461 **options, # type: ignore 5462 ) 5463 5464 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5465 return self._parse_field() 5466 5467 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5468 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5469 self._retreat(self._index - 1) 5470 return None 5471 5472 id_vars = self._parse_wrapped_id_vars() 5473 return self.expression( 5474 exp.PeriodForSystemTimeConstraint, 5475 this=seq_get(id_vars, 0), 5476 expression=seq_get(id_vars, 1), 5477 ) 5478 5479 def _parse_primary_key( 5480 self, wrapped_optional: bool = False, in_props: bool = False 5481 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5482 desc = ( 5483 self._match_set((TokenType.ASC, TokenType.DESC)) 5484 and self._prev.token_type == TokenType.DESC 5485 ) 5486 5487 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5488 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5489 5490 expressions = self._parse_wrapped_csv( 5491 self._parse_primary_key_part, optional=wrapped_optional 5492 ) 5493 options = self._parse_key_constraint_options() 5494 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5495 5496 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5497 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5498 5499 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5500 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5501 return this 5502 5503 bracket_kind = self._prev.token_type 5504 expressions = self._parse_csv( 5505 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5506 ) 5507 5508 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5509 self.raise_error("Expected ]") 5510 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5511 self.raise_error("Expected }") 5512 5513 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5514 if bracket_kind == TokenType.L_BRACE: 5515 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5516 elif not this: 5517 this = build_array_constructor( 5518 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5519 ) 5520 else: 5521 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5522 if constructor_type: 5523 return build_array_constructor( 5524 constructor_type, 5525 args=expressions, 5526 bracket_kind=bracket_kind, 5527 dialect=self.dialect, 5528 ) 5529 5530 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5531 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5532 5533 self._add_comments(this) 5534 return self._parse_bracket(this) 5535 5536 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5537 if self._match(TokenType.COLON): 5538 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5539 return this 5540 5541 def _parse_case(self) -> t.Optional[exp.Expression]: 5542 ifs = [] 5543 default = None 5544 5545 comments = self._prev_comments 5546 expression = self._parse_assignment() 5547 5548 while self._match(TokenType.WHEN): 5549 this = self._parse_assignment() 5550 self._match(TokenType.THEN) 5551 then = self._parse_assignment() 5552 ifs.append(self.expression(exp.If, this=this, true=then)) 5553 5554 if self._match(TokenType.ELSE): 5555 default = self._parse_assignment() 5556 5557 if not self._match(TokenType.END): 5558 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5559 default = exp.column("interval") 5560 else: 5561 self.raise_error("Expected END after CASE", self._prev) 5562 5563 return self.expression( 5564 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5565 ) 5566 5567 def _parse_if(self) -> t.Optional[exp.Expression]: 5568 if self._match(TokenType.L_PAREN): 5569 args = self._parse_csv(self._parse_assignment) 5570 this = self.validate_expression(exp.If.from_arg_list(args), args) 5571 self._match_r_paren() 5572 else: 5573 index = self._index - 1 5574 5575 if self.NO_PAREN_IF_COMMANDS and index == 0: 5576 return self._parse_as_command(self._prev) 5577 5578 condition = self._parse_assignment() 5579 5580 if not condition: 5581 self._retreat(index) 5582 return None 5583 5584 self._match(TokenType.THEN) 5585 true = self._parse_assignment() 5586 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5587 self._match(TokenType.END) 5588 this = self.expression(exp.If, this=condition, true=true, false=false) 5589 5590 return this 5591 5592 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5593 if not self._match_text_seq("VALUE", "FOR"): 5594 self._retreat(self._index - 1) 5595 return None 5596 5597 return self.expression( 5598 exp.NextValueFor, 5599 this=self._parse_column(), 5600 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5601 ) 5602 5603 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5604 this = self._parse_function() or self._parse_var_or_string(upper=True) 5605 5606 if self._match(TokenType.FROM): 5607 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5608 5609 if not self._match(TokenType.COMMA): 5610 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5611 5612 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5613 5614 def _parse_gap_fill(self) -> exp.GapFill: 5615 self._match(TokenType.TABLE) 5616 this = self._parse_table() 5617 5618 self._match(TokenType.COMMA) 5619 args = [this, *self._parse_csv(self._parse_lambda)] 5620 5621 gap_fill = exp.GapFill.from_arg_list(args) 5622 return self.validate_expression(gap_fill, args) 5623 5624 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5625 this = self._parse_assignment() 5626 5627 if not self._match(TokenType.ALIAS): 5628 if self._match(TokenType.COMMA): 5629 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5630 5631 self.raise_error("Expected AS after CAST") 5632 5633 fmt = None 5634 to = self._parse_types() 5635 5636 if self._match(TokenType.FORMAT): 5637 fmt_string = self._parse_string() 5638 fmt = self._parse_at_time_zone(fmt_string) 5639 5640 if not to: 5641 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5642 if to.this in exp.DataType.TEMPORAL_TYPES: 5643 this = self.expression( 5644 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5645 this=this, 5646 format=exp.Literal.string( 5647 format_time( 5648 fmt_string.this if fmt_string else "", 5649 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5650 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5651 ) 5652 ), 5653 safe=safe, 5654 ) 5655 5656 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5657 this.set("zone", fmt.args["zone"]) 5658 return this 5659 elif not to: 5660 self.raise_error("Expected TYPE after CAST") 5661 elif isinstance(to, exp.Identifier): 5662 to = exp.DataType.build(to.name, udt=True) 5663 elif to.this == exp.DataType.Type.CHAR: 5664 if self._match(TokenType.CHARACTER_SET): 5665 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5666 5667 return self.expression( 5668 exp.Cast if strict else exp.TryCast, 5669 this=this, 5670 to=to, 5671 format=fmt, 5672 safe=safe, 5673 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5674 ) 5675 5676 def _parse_string_agg(self) -> exp.Expression: 5677 if self._match(TokenType.DISTINCT): 5678 args: t.List[t.Optional[exp.Expression]] = [ 5679 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5680 ] 5681 if self._match(TokenType.COMMA): 5682 args.extend(self._parse_csv(self._parse_assignment)) 5683 else: 5684 args = self._parse_csv(self._parse_assignment) # type: ignore 5685 5686 index = self._index 5687 if not self._match(TokenType.R_PAREN) and args: 5688 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5689 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5690 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5691 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5692 5693 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5694 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5695 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5696 if not self._match_text_seq("WITHIN", "GROUP"): 5697 self._retreat(index) 5698 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5699 5700 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5701 order = self._parse_order(this=seq_get(args, 0)) 5702 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5703 5704 def _parse_convert( 5705 self, strict: bool, safe: t.Optional[bool] = None 5706 ) -> t.Optional[exp.Expression]: 5707 this = self._parse_bitwise() 5708 5709 if self._match(TokenType.USING): 5710 to: t.Optional[exp.Expression] = self.expression( 5711 exp.CharacterSet, this=self._parse_var() 5712 ) 5713 elif self._match(TokenType.COMMA): 5714 to = self._parse_types() 5715 else: 5716 to = None 5717 5718 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5719 5720 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5721 """ 5722 There are generally two variants of the DECODE function: 5723 5724 - DECODE(bin, charset) 5725 - DECODE(expression, search, result [, search, result] ... [, default]) 5726 5727 The second variant will always be parsed into a CASE expression. Note that NULL 5728 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5729 instead of relying on pattern matching. 5730 """ 5731 args = self._parse_csv(self._parse_assignment) 5732 5733 if len(args) < 3: 5734 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5735 5736 expression, *expressions = args 5737 if not expression: 5738 return None 5739 5740 ifs = [] 5741 for search, result in zip(expressions[::2], expressions[1::2]): 5742 if not search or not result: 5743 return None 5744 5745 if isinstance(search, exp.Literal): 5746 ifs.append( 5747 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5748 ) 5749 elif isinstance(search, exp.Null): 5750 ifs.append( 5751 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5752 ) 5753 else: 5754 cond = exp.or_( 5755 exp.EQ(this=expression.copy(), expression=search), 5756 exp.and_( 5757 exp.Is(this=expression.copy(), expression=exp.Null()), 5758 exp.Is(this=search.copy(), expression=exp.Null()), 5759 copy=False, 5760 ), 5761 copy=False, 5762 ) 5763 ifs.append(exp.If(this=cond, true=result)) 5764 5765 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5766 5767 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5768 self._match_text_seq("KEY") 5769 key = self._parse_column() 5770 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5771 self._match_text_seq("VALUE") 5772 value = self._parse_bitwise() 5773 5774 if not key and not value: 5775 return None 5776 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5777 5778 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5779 if not this or not self._match_text_seq("FORMAT", "JSON"): 5780 return this 5781 5782 return self.expression(exp.FormatJson, this=this) 5783 5784 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5785 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5786 for value in values: 5787 if self._match_text_seq(value, "ON", on): 5788 return f"{value} ON {on}" 5789 5790 return None 5791 5792 @t.overload 5793 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5794 5795 @t.overload 5796 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5797 5798 def _parse_json_object(self, agg=False): 5799 star = self._parse_star() 5800 expressions = ( 5801 [star] 5802 if star 5803 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5804 ) 5805 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5806 5807 unique_keys = None 5808 if self._match_text_seq("WITH", "UNIQUE"): 5809 unique_keys = True 5810 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5811 unique_keys = False 5812 5813 self._match_text_seq("KEYS") 5814 5815 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5816 self._parse_type() 5817 ) 5818 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5819 5820 return self.expression( 5821 exp.JSONObjectAgg if agg else exp.JSONObject, 5822 expressions=expressions, 5823 null_handling=null_handling, 5824 unique_keys=unique_keys, 5825 return_type=return_type, 5826 encoding=encoding, 5827 ) 5828 5829 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5830 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5831 if not self._match_text_seq("NESTED"): 5832 this = self._parse_id_var() 5833 kind = self._parse_types(allow_identifiers=False) 5834 nested = None 5835 else: 5836 this = None 5837 kind = None 5838 nested = True 5839 5840 path = self._match_text_seq("PATH") and self._parse_string() 5841 nested_schema = nested and self._parse_json_schema() 5842 5843 return self.expression( 5844 exp.JSONColumnDef, 5845 this=this, 5846 kind=kind, 5847 path=path, 5848 nested_schema=nested_schema, 5849 ) 5850 5851 def _parse_json_schema(self) -> exp.JSONSchema: 5852 self._match_text_seq("COLUMNS") 5853 return self.expression( 5854 exp.JSONSchema, 5855 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5856 ) 5857 5858 def _parse_json_table(self) -> exp.JSONTable: 5859 this = self._parse_format_json(self._parse_bitwise()) 5860 path = self._match(TokenType.COMMA) and self._parse_string() 5861 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5862 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5863 schema = self._parse_json_schema() 5864 5865 return exp.JSONTable( 5866 this=this, 5867 schema=schema, 5868 path=path, 5869 error_handling=error_handling, 5870 empty_handling=empty_handling, 5871 ) 5872 5873 def _parse_match_against(self) -> exp.MatchAgainst: 5874 expressions = self._parse_csv(self._parse_column) 5875 5876 self._match_text_seq(")", "AGAINST", "(") 5877 5878 this = self._parse_string() 5879 5880 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5881 modifier = "IN NATURAL LANGUAGE MODE" 5882 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5883 modifier = f"{modifier} WITH QUERY EXPANSION" 5884 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5885 modifier = "IN BOOLEAN MODE" 5886 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5887 modifier = "WITH QUERY EXPANSION" 5888 else: 5889 modifier = None 5890 5891 return self.expression( 5892 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5893 ) 5894 5895 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5896 def _parse_open_json(self) -> exp.OpenJSON: 5897 this = self._parse_bitwise() 5898 path = self._match(TokenType.COMMA) and self._parse_string() 5899 5900 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5901 this = self._parse_field(any_token=True) 5902 kind = self._parse_types() 5903 path = self._parse_string() 5904 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5905 5906 return self.expression( 5907 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5908 ) 5909 5910 expressions = None 5911 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5912 self._match_l_paren() 5913 expressions = self._parse_csv(_parse_open_json_column_def) 5914 5915 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5916 5917 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5918 args = self._parse_csv(self._parse_bitwise) 5919 5920 if self._match(TokenType.IN): 5921 return self.expression( 5922 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5923 ) 5924 5925 if haystack_first: 5926 haystack = seq_get(args, 0) 5927 needle = seq_get(args, 1) 5928 else: 5929 needle = seq_get(args, 0) 5930 haystack = seq_get(args, 1) 5931 5932 return self.expression( 5933 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5934 ) 5935 5936 def _parse_predict(self) -> exp.Predict: 5937 self._match_text_seq("MODEL") 5938 this = self._parse_table() 5939 5940 self._match(TokenType.COMMA) 5941 self._match_text_seq("TABLE") 5942 5943 return self.expression( 5944 exp.Predict, 5945 this=this, 5946 expression=self._parse_table(), 5947 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5948 ) 5949 5950 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5951 args = self._parse_csv(self._parse_table) 5952 return exp.JoinHint(this=func_name.upper(), expressions=args) 5953 5954 def _parse_substring(self) -> exp.Substring: 5955 # Postgres supports the form: substring(string [from int] [for int]) 5956 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5957 5958 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5959 5960 if self._match(TokenType.FROM): 5961 args.append(self._parse_bitwise()) 5962 if self._match(TokenType.FOR): 5963 if len(args) == 1: 5964 args.append(exp.Literal.number(1)) 5965 args.append(self._parse_bitwise()) 5966 5967 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5968 5969 def _parse_trim(self) -> exp.Trim: 5970 # https://www.w3resource.com/sql/character-functions/trim.php 5971 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5972 5973 position = None 5974 collation = None 5975 expression = None 5976 5977 if self._match_texts(self.TRIM_TYPES): 5978 position = self._prev.text.upper() 5979 5980 this = self._parse_bitwise() 5981 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5982 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5983 expression = self._parse_bitwise() 5984 5985 if invert_order: 5986 this, expression = expression, this 5987 5988 if self._match(TokenType.COLLATE): 5989 collation = self._parse_bitwise() 5990 5991 return self.expression( 5992 exp.Trim, this=this, position=position, expression=expression, collation=collation 5993 ) 5994 5995 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5996 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5997 5998 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5999 return self._parse_window(self._parse_id_var(), alias=True) 6000 6001 def _parse_respect_or_ignore_nulls( 6002 self, this: t.Optional[exp.Expression] 6003 ) -> t.Optional[exp.Expression]: 6004 if self._match_text_seq("IGNORE", "NULLS"): 6005 return self.expression(exp.IgnoreNulls, this=this) 6006 if self._match_text_seq("RESPECT", "NULLS"): 6007 return self.expression(exp.RespectNulls, this=this) 6008 return this 6009 6010 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6011 if self._match(TokenType.HAVING): 6012 self._match_texts(("MAX", "MIN")) 6013 max = self._prev.text.upper() != "MIN" 6014 return self.expression( 6015 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6016 ) 6017 6018 return this 6019 6020 def _parse_window( 6021 self, this: t.Optional[exp.Expression], alias: bool = False 6022 ) -> t.Optional[exp.Expression]: 6023 func = this 6024 comments = func.comments if isinstance(func, exp.Expression) else None 6025 6026 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6027 self._match(TokenType.WHERE) 6028 this = self.expression( 6029 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6030 ) 6031 self._match_r_paren() 6032 6033 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6034 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6035 if self._match_text_seq("WITHIN", "GROUP"): 6036 order = self._parse_wrapped(self._parse_order) 6037 this = self.expression(exp.WithinGroup, this=this, expression=order) 6038 6039 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6040 # Some dialects choose to implement and some do not. 6041 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6042 6043 # There is some code above in _parse_lambda that handles 6044 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6045 6046 # The below changes handle 6047 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6048 6049 # Oracle allows both formats 6050 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6051 # and Snowflake chose to do the same for familiarity 6052 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6053 if isinstance(this, exp.AggFunc): 6054 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6055 6056 if ignore_respect and ignore_respect is not this: 6057 ignore_respect.replace(ignore_respect.this) 6058 this = self.expression(ignore_respect.__class__, this=this) 6059 6060 this = self._parse_respect_or_ignore_nulls(this) 6061 6062 # bigquery select from window x AS (partition by ...) 6063 if alias: 6064 over = None 6065 self._match(TokenType.ALIAS) 6066 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6067 return this 6068 else: 6069 over = self._prev.text.upper() 6070 6071 if comments and isinstance(func, exp.Expression): 6072 func.pop_comments() 6073 6074 if not self._match(TokenType.L_PAREN): 6075 return self.expression( 6076 exp.Window, 6077 comments=comments, 6078 this=this, 6079 alias=self._parse_id_var(False), 6080 over=over, 6081 ) 6082 6083 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6084 6085 first = self._match(TokenType.FIRST) 6086 if self._match_text_seq("LAST"): 6087 first = False 6088 6089 partition, order = self._parse_partition_and_order() 6090 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6091 6092 if kind: 6093 self._match(TokenType.BETWEEN) 6094 start = self._parse_window_spec() 6095 self._match(TokenType.AND) 6096 end = self._parse_window_spec() 6097 6098 spec = self.expression( 6099 exp.WindowSpec, 6100 kind=kind, 6101 start=start["value"], 6102 start_side=start["side"], 6103 end=end["value"], 6104 end_side=end["side"], 6105 ) 6106 else: 6107 spec = None 6108 6109 self._match_r_paren() 6110 6111 window = self.expression( 6112 exp.Window, 6113 comments=comments, 6114 this=this, 6115 partition_by=partition, 6116 order=order, 6117 spec=spec, 6118 alias=window_alias, 6119 over=over, 6120 first=first, 6121 ) 6122 6123 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6124 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6125 return self._parse_window(window, alias=alias) 6126 6127 return window 6128 6129 def _parse_partition_and_order( 6130 self, 6131 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6132 return self._parse_partition_by(), self._parse_order() 6133 6134 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6135 self._match(TokenType.BETWEEN) 6136 6137 return { 6138 "value": ( 6139 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6140 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6141 or self._parse_bitwise() 6142 ), 6143 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6144 } 6145 6146 def _parse_alias( 6147 self, this: t.Optional[exp.Expression], explicit: bool = False 6148 ) -> t.Optional[exp.Expression]: 6149 any_token = self._match(TokenType.ALIAS) 6150 comments = self._prev_comments or [] 6151 6152 if explicit and not any_token: 6153 return this 6154 6155 if self._match(TokenType.L_PAREN): 6156 aliases = self.expression( 6157 exp.Aliases, 6158 comments=comments, 6159 this=this, 6160 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6161 ) 6162 self._match_r_paren(aliases) 6163 return aliases 6164 6165 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6166 self.STRING_ALIASES and self._parse_string_as_identifier() 6167 ) 6168 6169 if alias: 6170 comments.extend(alias.pop_comments()) 6171 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6172 column = this.this 6173 6174 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6175 if not this.comments and column and column.comments: 6176 this.comments = column.pop_comments() 6177 6178 return this 6179 6180 def _parse_id_var( 6181 self, 6182 any_token: bool = True, 6183 tokens: t.Optional[t.Collection[TokenType]] = None, 6184 ) -> t.Optional[exp.Expression]: 6185 expression = self._parse_identifier() 6186 if not expression and ( 6187 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6188 ): 6189 quoted = self._prev.token_type == TokenType.STRING 6190 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6191 6192 return expression 6193 6194 def _parse_string(self) -> t.Optional[exp.Expression]: 6195 if self._match_set(self.STRING_PARSERS): 6196 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6197 return self._parse_placeholder() 6198 6199 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6200 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6201 6202 def _parse_number(self) -> t.Optional[exp.Expression]: 6203 if self._match_set(self.NUMERIC_PARSERS): 6204 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6205 return self._parse_placeholder() 6206 6207 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6208 if self._match(TokenType.IDENTIFIER): 6209 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6210 return self._parse_placeholder() 6211 6212 def _parse_var( 6213 self, 6214 any_token: bool = False, 6215 tokens: t.Optional[t.Collection[TokenType]] = None, 6216 upper: bool = False, 6217 ) -> t.Optional[exp.Expression]: 6218 if ( 6219 (any_token and self._advance_any()) 6220 or self._match(TokenType.VAR) 6221 or (self._match_set(tokens) if tokens else False) 6222 ): 6223 return self.expression( 6224 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6225 ) 6226 return self._parse_placeholder() 6227 6228 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6229 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6230 self._advance() 6231 return self._prev 6232 return None 6233 6234 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6235 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6236 6237 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6238 return self._parse_primary() or self._parse_var(any_token=True) 6239 6240 def _parse_null(self) -> t.Optional[exp.Expression]: 6241 if self._match_set(self.NULL_TOKENS): 6242 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6243 return self._parse_placeholder() 6244 6245 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6246 if self._match(TokenType.TRUE): 6247 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6248 if self._match(TokenType.FALSE): 6249 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6250 return self._parse_placeholder() 6251 6252 def _parse_star(self) -> t.Optional[exp.Expression]: 6253 if self._match(TokenType.STAR): 6254 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6255 return self._parse_placeholder() 6256 6257 def _parse_parameter(self) -> exp.Parameter: 6258 this = self._parse_identifier() or self._parse_primary_or_var() 6259 return self.expression(exp.Parameter, this=this) 6260 6261 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6262 if self._match_set(self.PLACEHOLDER_PARSERS): 6263 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6264 if placeholder: 6265 return placeholder 6266 self._advance(-1) 6267 return None 6268 6269 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6270 if not self._match_texts(keywords): 6271 return None 6272 if self._match(TokenType.L_PAREN, advance=False): 6273 return self._parse_wrapped_csv(self._parse_expression) 6274 6275 expression = self._parse_expression() 6276 return [expression] if expression else None 6277 6278 def _parse_csv( 6279 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6280 ) -> t.List[exp.Expression]: 6281 parse_result = parse_method() 6282 items = [parse_result] if parse_result is not None else [] 6283 6284 while self._match(sep): 6285 self._add_comments(parse_result) 6286 parse_result = parse_method() 6287 if parse_result is not None: 6288 items.append(parse_result) 6289 6290 return items 6291 6292 def _parse_tokens( 6293 self, parse_method: t.Callable, expressions: t.Dict 6294 ) -> t.Optional[exp.Expression]: 6295 this = parse_method() 6296 6297 while self._match_set(expressions): 6298 this = self.expression( 6299 expressions[self._prev.token_type], 6300 this=this, 6301 comments=self._prev_comments, 6302 expression=parse_method(), 6303 ) 6304 6305 return this 6306 6307 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6308 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6309 6310 def _parse_wrapped_csv( 6311 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6312 ) -> t.List[exp.Expression]: 6313 return self._parse_wrapped( 6314 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6315 ) 6316 6317 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6318 wrapped = self._match(TokenType.L_PAREN) 6319 if not wrapped and not optional: 6320 self.raise_error("Expecting (") 6321 parse_result = parse_method() 6322 if wrapped: 6323 self._match_r_paren() 6324 return parse_result 6325 6326 def _parse_expressions(self) -> t.List[exp.Expression]: 6327 return self._parse_csv(self._parse_expression) 6328 6329 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6330 return self._parse_select() or self._parse_set_operations( 6331 self._parse_expression() if alias else self._parse_assignment() 6332 ) 6333 6334 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6335 return self._parse_query_modifiers( 6336 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6337 ) 6338 6339 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6340 this = None 6341 if self._match_texts(self.TRANSACTION_KIND): 6342 this = self._prev.text 6343 6344 self._match_texts(("TRANSACTION", "WORK")) 6345 6346 modes = [] 6347 while True: 6348 mode = [] 6349 while self._match(TokenType.VAR): 6350 mode.append(self._prev.text) 6351 6352 if mode: 6353 modes.append(" ".join(mode)) 6354 if not self._match(TokenType.COMMA): 6355 break 6356 6357 return self.expression(exp.Transaction, this=this, modes=modes) 6358 6359 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6360 chain = None 6361 savepoint = None 6362 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6363 6364 self._match_texts(("TRANSACTION", "WORK")) 6365 6366 if self._match_text_seq("TO"): 6367 self._match_text_seq("SAVEPOINT") 6368 savepoint = self._parse_id_var() 6369 6370 if self._match(TokenType.AND): 6371 chain = not self._match_text_seq("NO") 6372 self._match_text_seq("CHAIN") 6373 6374 if is_rollback: 6375 return self.expression(exp.Rollback, savepoint=savepoint) 6376 6377 return self.expression(exp.Commit, chain=chain) 6378 6379 def _parse_refresh(self) -> exp.Refresh: 6380 self._match(TokenType.TABLE) 6381 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6382 6383 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6384 if not self._match_text_seq("ADD"): 6385 return None 6386 6387 self._match(TokenType.COLUMN) 6388 exists_column = self._parse_exists(not_=True) 6389 expression = self._parse_field_def() 6390 6391 if expression: 6392 expression.set("exists", exists_column) 6393 6394 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6395 if self._match_texts(("FIRST", "AFTER")): 6396 position = self._prev.text 6397 column_position = self.expression( 6398 exp.ColumnPosition, this=self._parse_column(), position=position 6399 ) 6400 expression.set("position", column_position) 6401 6402 return expression 6403 6404 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6405 drop = self._match(TokenType.DROP) and self._parse_drop() 6406 if drop and not isinstance(drop, exp.Command): 6407 drop.set("kind", drop.args.get("kind", "COLUMN")) 6408 return drop 6409 6410 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6411 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6412 return self.expression( 6413 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6414 ) 6415 6416 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6417 index = self._index - 1 6418 6419 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6420 return self._parse_csv( 6421 lambda: self.expression( 6422 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6423 ) 6424 ) 6425 6426 self._retreat(index) 6427 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6428 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6429 6430 if self._match_text_seq("ADD", "COLUMNS"): 6431 schema = self._parse_schema() 6432 if schema: 6433 return [schema] 6434 return [] 6435 6436 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6437 6438 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6439 if self._match_texts(self.ALTER_ALTER_PARSERS): 6440 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6441 6442 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6443 # keyword after ALTER we default to parsing this statement 6444 self._match(TokenType.COLUMN) 6445 column = self._parse_field(any_token=True) 6446 6447 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6448 return self.expression(exp.AlterColumn, this=column, drop=True) 6449 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6450 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6451 if self._match(TokenType.COMMENT): 6452 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6453 if self._match_text_seq("DROP", "NOT", "NULL"): 6454 return self.expression( 6455 exp.AlterColumn, 6456 this=column, 6457 drop=True, 6458 allow_null=True, 6459 ) 6460 if self._match_text_seq("SET", "NOT", "NULL"): 6461 return self.expression( 6462 exp.AlterColumn, 6463 this=column, 6464 allow_null=False, 6465 ) 6466 self._match_text_seq("SET", "DATA") 6467 self._match_text_seq("TYPE") 6468 return self.expression( 6469 exp.AlterColumn, 6470 this=column, 6471 dtype=self._parse_types(), 6472 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6473 using=self._match(TokenType.USING) and self._parse_assignment(), 6474 ) 6475 6476 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6477 if self._match_texts(("ALL", "EVEN", "AUTO")): 6478 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6479 6480 self._match_text_seq("KEY", "DISTKEY") 6481 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6482 6483 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6484 if compound: 6485 self._match_text_seq("SORTKEY") 6486 6487 if self._match(TokenType.L_PAREN, advance=False): 6488 return self.expression( 6489 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6490 ) 6491 6492 self._match_texts(("AUTO", "NONE")) 6493 return self.expression( 6494 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6495 ) 6496 6497 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6498 index = self._index - 1 6499 6500 partition_exists = self._parse_exists() 6501 if self._match(TokenType.PARTITION, advance=False): 6502 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6503 6504 self._retreat(index) 6505 return self._parse_csv(self._parse_drop_column) 6506 6507 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6508 if self._match(TokenType.COLUMN): 6509 exists = self._parse_exists() 6510 old_column = self._parse_column() 6511 to = self._match_text_seq("TO") 6512 new_column = self._parse_column() 6513 6514 if old_column is None or to is None or new_column is None: 6515 return None 6516 6517 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6518 6519 self._match_text_seq("TO") 6520 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6521 6522 def _parse_alter_table_set(self) -> exp.AlterSet: 6523 alter_set = self.expression(exp.AlterSet) 6524 6525 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6526 "TABLE", "PROPERTIES" 6527 ): 6528 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6529 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6530 alter_set.set("expressions", [self._parse_assignment()]) 6531 elif self._match_texts(("LOGGED", "UNLOGGED")): 6532 alter_set.set("option", exp.var(self._prev.text.upper())) 6533 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6534 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6535 elif self._match_text_seq("LOCATION"): 6536 alter_set.set("location", self._parse_field()) 6537 elif self._match_text_seq("ACCESS", "METHOD"): 6538 alter_set.set("access_method", self._parse_field()) 6539 elif self._match_text_seq("TABLESPACE"): 6540 alter_set.set("tablespace", self._parse_field()) 6541 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6542 alter_set.set("file_format", [self._parse_field()]) 6543 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6544 alter_set.set("file_format", self._parse_wrapped_options()) 6545 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6546 alter_set.set("copy_options", self._parse_wrapped_options()) 6547 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6548 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6549 else: 6550 if self._match_text_seq("SERDE"): 6551 alter_set.set("serde", self._parse_field()) 6552 6553 alter_set.set("expressions", [self._parse_properties()]) 6554 6555 return alter_set 6556 6557 def _parse_alter(self) -> exp.Alter | exp.Command: 6558 start = self._prev 6559 6560 alter_token = self._match_set(self.ALTERABLES) and self._prev 6561 if not alter_token: 6562 return self._parse_as_command(start) 6563 6564 exists = self._parse_exists() 6565 only = self._match_text_seq("ONLY") 6566 this = self._parse_table(schema=True) 6567 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6568 6569 if self._next: 6570 self._advance() 6571 6572 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6573 if parser: 6574 actions = ensure_list(parser(self)) 6575 options = self._parse_csv(self._parse_property) 6576 6577 if not self._curr and actions: 6578 return self.expression( 6579 exp.Alter, 6580 this=this, 6581 kind=alter_token.text.upper(), 6582 exists=exists, 6583 actions=actions, 6584 only=only, 6585 options=options, 6586 cluster=cluster, 6587 ) 6588 6589 return self._parse_as_command(start) 6590 6591 def _parse_merge(self) -> exp.Merge: 6592 self._match(TokenType.INTO) 6593 target = self._parse_table() 6594 6595 if target and self._match(TokenType.ALIAS, advance=False): 6596 target.set("alias", self._parse_table_alias()) 6597 6598 self._match(TokenType.USING) 6599 using = self._parse_table() 6600 6601 self._match(TokenType.ON) 6602 on = self._parse_assignment() 6603 6604 return self.expression( 6605 exp.Merge, 6606 this=target, 6607 using=using, 6608 on=on, 6609 expressions=self._parse_when_matched(), 6610 ) 6611 6612 def _parse_when_matched(self) -> t.List[exp.When]: 6613 whens = [] 6614 6615 while self._match(TokenType.WHEN): 6616 matched = not self._match(TokenType.NOT) 6617 self._match_text_seq("MATCHED") 6618 source = ( 6619 False 6620 if self._match_text_seq("BY", "TARGET") 6621 else self._match_text_seq("BY", "SOURCE") 6622 ) 6623 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6624 6625 self._match(TokenType.THEN) 6626 6627 if self._match(TokenType.INSERT): 6628 _this = self._parse_star() 6629 if _this: 6630 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6631 else: 6632 then = self.expression( 6633 exp.Insert, 6634 this=self._parse_value(), 6635 expression=self._match_text_seq("VALUES") and self._parse_value(), 6636 ) 6637 elif self._match(TokenType.UPDATE): 6638 expressions = self._parse_star() 6639 if expressions: 6640 then = self.expression(exp.Update, expressions=expressions) 6641 else: 6642 then = self.expression( 6643 exp.Update, 6644 expressions=self._match(TokenType.SET) 6645 and self._parse_csv(self._parse_equality), 6646 ) 6647 elif self._match(TokenType.DELETE): 6648 then = self.expression(exp.Var, this=self._prev.text) 6649 else: 6650 then = None 6651 6652 whens.append( 6653 self.expression( 6654 exp.When, 6655 matched=matched, 6656 source=source, 6657 condition=condition, 6658 then=then, 6659 ) 6660 ) 6661 return whens 6662 6663 def _parse_show(self) -> t.Optional[exp.Expression]: 6664 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6665 if parser: 6666 return parser(self) 6667 return self._parse_as_command(self._prev) 6668 6669 def _parse_set_item_assignment( 6670 self, kind: t.Optional[str] = None 6671 ) -> t.Optional[exp.Expression]: 6672 index = self._index 6673 6674 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6675 return self._parse_set_transaction(global_=kind == "GLOBAL") 6676 6677 left = self._parse_primary() or self._parse_column() 6678 assignment_delimiter = self._match_texts(("=", "TO")) 6679 6680 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6681 self._retreat(index) 6682 return None 6683 6684 right = self._parse_statement() or self._parse_id_var() 6685 if isinstance(right, (exp.Column, exp.Identifier)): 6686 right = exp.var(right.name) 6687 6688 this = self.expression(exp.EQ, this=left, expression=right) 6689 return self.expression(exp.SetItem, this=this, kind=kind) 6690 6691 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6692 self._match_text_seq("TRANSACTION") 6693 characteristics = self._parse_csv( 6694 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6695 ) 6696 return self.expression( 6697 exp.SetItem, 6698 expressions=characteristics, 6699 kind="TRANSACTION", 6700 **{"global": global_}, # type: ignore 6701 ) 6702 6703 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6704 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6705 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6706 6707 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6708 index = self._index 6709 set_ = self.expression( 6710 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6711 ) 6712 6713 if self._curr: 6714 self._retreat(index) 6715 return self._parse_as_command(self._prev) 6716 6717 return set_ 6718 6719 def _parse_var_from_options( 6720 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6721 ) -> t.Optional[exp.Var]: 6722 start = self._curr 6723 if not start: 6724 return None 6725 6726 option = start.text.upper() 6727 continuations = options.get(option) 6728 6729 index = self._index 6730 self._advance() 6731 for keywords in continuations or []: 6732 if isinstance(keywords, str): 6733 keywords = (keywords,) 6734 6735 if self._match_text_seq(*keywords): 6736 option = f"{option} {' '.join(keywords)}" 6737 break 6738 else: 6739 if continuations or continuations is None: 6740 if raise_unmatched: 6741 self.raise_error(f"Unknown option {option}") 6742 6743 self._retreat(index) 6744 return None 6745 6746 return exp.var(option) 6747 6748 def _parse_as_command(self, start: Token) -> exp.Command: 6749 while self._curr: 6750 self._advance() 6751 text = self._find_sql(start, self._prev) 6752 size = len(start.text) 6753 self._warn_unsupported() 6754 return exp.Command(this=text[:size], expression=text[size:]) 6755 6756 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6757 settings = [] 6758 6759 self._match_l_paren() 6760 kind = self._parse_id_var() 6761 6762 if self._match(TokenType.L_PAREN): 6763 while True: 6764 key = self._parse_id_var() 6765 value = self._parse_primary() 6766 6767 if not key and value is None: 6768 break 6769 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6770 self._match(TokenType.R_PAREN) 6771 6772 self._match_r_paren() 6773 6774 return self.expression( 6775 exp.DictProperty, 6776 this=this, 6777 kind=kind.this if kind else None, 6778 settings=settings, 6779 ) 6780 6781 def _parse_dict_range(self, this: str) -> exp.DictRange: 6782 self._match_l_paren() 6783 has_min = self._match_text_seq("MIN") 6784 if has_min: 6785 min = self._parse_var() or self._parse_primary() 6786 self._match_text_seq("MAX") 6787 max = self._parse_var() or self._parse_primary() 6788 else: 6789 max = self._parse_var() or self._parse_primary() 6790 min = exp.Literal.number(0) 6791 self._match_r_paren() 6792 return self.expression(exp.DictRange, this=this, min=min, max=max) 6793 6794 def _parse_comprehension( 6795 self, this: t.Optional[exp.Expression] 6796 ) -> t.Optional[exp.Comprehension]: 6797 index = self._index 6798 expression = self._parse_column() 6799 if not self._match(TokenType.IN): 6800 self._retreat(index - 1) 6801 return None 6802 iterator = self._parse_column() 6803 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6804 return self.expression( 6805 exp.Comprehension, 6806 this=this, 6807 expression=expression, 6808 iterator=iterator, 6809 condition=condition, 6810 ) 6811 6812 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6813 if self._match(TokenType.HEREDOC_STRING): 6814 return self.expression(exp.Heredoc, this=self._prev.text) 6815 6816 if not self._match_text_seq("$"): 6817 return None 6818 6819 tags = ["$"] 6820 tag_text = None 6821 6822 if self._is_connected(): 6823 self._advance() 6824 tags.append(self._prev.text.upper()) 6825 else: 6826 self.raise_error("No closing $ found") 6827 6828 if tags[-1] != "$": 6829 if self._is_connected() and self._match_text_seq("$"): 6830 tag_text = tags[-1] 6831 tags.append("$") 6832 else: 6833 self.raise_error("No closing $ found") 6834 6835 heredoc_start = self._curr 6836 6837 while self._curr: 6838 if self._match_text_seq(*tags, advance=False): 6839 this = self._find_sql(heredoc_start, self._prev) 6840 self._advance(len(tags)) 6841 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6842 6843 self._advance() 6844 6845 self.raise_error(f"No closing {''.join(tags)} found") 6846 return None 6847 6848 def _find_parser( 6849 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6850 ) -> t.Optional[t.Callable]: 6851 if not self._curr: 6852 return None 6853 6854 index = self._index 6855 this = [] 6856 while True: 6857 # The current token might be multiple words 6858 curr = self._curr.text.upper() 6859 key = curr.split(" ") 6860 this.append(curr) 6861 6862 self._advance() 6863 result, trie = in_trie(trie, key) 6864 if result == TrieResult.FAILED: 6865 break 6866 6867 if result == TrieResult.EXISTS: 6868 subparser = parsers[" ".join(this)] 6869 return subparser 6870 6871 self._retreat(index) 6872 return None 6873 6874 def _match(self, token_type, advance=True, expression=None): 6875 if not self._curr: 6876 return None 6877 6878 if self._curr.token_type == token_type: 6879 if advance: 6880 self._advance() 6881 self._add_comments(expression) 6882 return True 6883 6884 return None 6885 6886 def _match_set(self, types, advance=True): 6887 if not self._curr: 6888 return None 6889 6890 if self._curr.token_type in types: 6891 if advance: 6892 self._advance() 6893 return True 6894 6895 return None 6896 6897 def _match_pair(self, token_type_a, token_type_b, advance=True): 6898 if not self._curr or not self._next: 6899 return None 6900 6901 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6902 if advance: 6903 self._advance(2) 6904 return True 6905 6906 return None 6907 6908 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6909 if not self._match(TokenType.L_PAREN, expression=expression): 6910 self.raise_error("Expecting (") 6911 6912 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6913 if not self._match(TokenType.R_PAREN, expression=expression): 6914 self.raise_error("Expecting )") 6915 6916 def _match_texts(self, texts, advance=True): 6917 if self._curr and self._curr.text.upper() in texts: 6918 if advance: 6919 self._advance() 6920 return True 6921 return None 6922 6923 def _match_text_seq(self, *texts, advance=True): 6924 index = self._index 6925 for text in texts: 6926 if self._curr and self._curr.text.upper() == text: 6927 self._advance() 6928 else: 6929 self._retreat(index) 6930 return None 6931 6932 if not advance: 6933 self._retreat(index) 6934 6935 return True 6936 6937 def _replace_lambda( 6938 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6939 ) -> t.Optional[exp.Expression]: 6940 if not node: 6941 return node 6942 6943 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6944 6945 for column in node.find_all(exp.Column): 6946 typ = lambda_types.get(column.parts[0].name) 6947 if typ is not None: 6948 dot_or_id = column.to_dot() if column.table else column.this 6949 6950 if typ: 6951 dot_or_id = self.expression( 6952 exp.Cast, 6953 this=dot_or_id, 6954 to=typ, 6955 ) 6956 6957 parent = column.parent 6958 6959 while isinstance(parent, exp.Dot): 6960 if not isinstance(parent.parent, exp.Dot): 6961 parent.replace(dot_or_id) 6962 break 6963 parent = parent.parent 6964 else: 6965 if column is node: 6966 node = dot_or_id 6967 else: 6968 column.replace(dot_or_id) 6969 return node 6970 6971 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6972 start = self._prev 6973 6974 # Not to be confused with TRUNCATE(number, decimals) function call 6975 if self._match(TokenType.L_PAREN): 6976 self._retreat(self._index - 2) 6977 return self._parse_function() 6978 6979 # Clickhouse supports TRUNCATE DATABASE as well 6980 is_database = self._match(TokenType.DATABASE) 6981 6982 self._match(TokenType.TABLE) 6983 6984 exists = self._parse_exists(not_=False) 6985 6986 expressions = self._parse_csv( 6987 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6988 ) 6989 6990 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6991 6992 if self._match_text_seq("RESTART", "IDENTITY"): 6993 identity = "RESTART" 6994 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6995 identity = "CONTINUE" 6996 else: 6997 identity = None 6998 6999 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7000 option = self._prev.text 7001 else: 7002 option = None 7003 7004 partition = self._parse_partition() 7005 7006 # Fallback case 7007 if self._curr: 7008 return self._parse_as_command(start) 7009 7010 return self.expression( 7011 exp.TruncateTable, 7012 expressions=expressions, 7013 is_database=is_database, 7014 exists=exists, 7015 cluster=cluster, 7016 identity=identity, 7017 option=option, 7018 partition=partition, 7019 ) 7020 7021 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7022 this = self._parse_ordered(self._parse_opclass) 7023 7024 if not self._match(TokenType.WITH): 7025 return this 7026 7027 op = self._parse_var(any_token=True) 7028 7029 return self.expression(exp.WithOperator, this=this, op=op) 7030 7031 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7032 self._match(TokenType.EQ) 7033 self._match(TokenType.L_PAREN) 7034 7035 opts: t.List[t.Optional[exp.Expression]] = [] 7036 while self._curr and not self._match(TokenType.R_PAREN): 7037 if self._match_text_seq("FORMAT_NAME", "="): 7038 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7039 # so we parse it separately to use _parse_field() 7040 prop = self.expression( 7041 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7042 ) 7043 opts.append(prop) 7044 else: 7045 opts.append(self._parse_property()) 7046 7047 self._match(TokenType.COMMA) 7048 7049 return opts 7050 7051 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7052 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7053 7054 options = [] 7055 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7056 option = self._parse_var(any_token=True) 7057 prev = self._prev.text.upper() 7058 7059 # Different dialects might separate options and values by white space, "=" and "AS" 7060 self._match(TokenType.EQ) 7061 self._match(TokenType.ALIAS) 7062 7063 param = self.expression(exp.CopyParameter, this=option) 7064 7065 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7066 TokenType.L_PAREN, advance=False 7067 ): 7068 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7069 param.set("expressions", self._parse_wrapped_options()) 7070 elif prev == "FILE_FORMAT": 7071 # T-SQL's external file format case 7072 param.set("expression", self._parse_field()) 7073 else: 7074 param.set("expression", self._parse_unquoted_field()) 7075 7076 options.append(param) 7077 self._match(sep) 7078 7079 return options 7080 7081 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7082 expr = self.expression(exp.Credentials) 7083 7084 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7085 expr.set("storage", self._parse_field()) 7086 if self._match_text_seq("CREDENTIALS"): 7087 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7088 creds = ( 7089 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7090 ) 7091 expr.set("credentials", creds) 7092 if self._match_text_seq("ENCRYPTION"): 7093 expr.set("encryption", self._parse_wrapped_options()) 7094 if self._match_text_seq("IAM_ROLE"): 7095 expr.set("iam_role", self._parse_field()) 7096 if self._match_text_seq("REGION"): 7097 expr.set("region", self._parse_field()) 7098 7099 return expr 7100 7101 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7102 return self._parse_field() 7103 7104 def _parse_copy(self) -> exp.Copy | exp.Command: 7105 start = self._prev 7106 7107 self._match(TokenType.INTO) 7108 7109 this = ( 7110 self._parse_select(nested=True, parse_subquery_alias=False) 7111 if self._match(TokenType.L_PAREN, advance=False) 7112 else self._parse_table(schema=True) 7113 ) 7114 7115 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7116 7117 files = self._parse_csv(self._parse_file_location) 7118 credentials = self._parse_credentials() 7119 7120 self._match_text_seq("WITH") 7121 7122 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7123 7124 # Fallback case 7125 if self._curr: 7126 return self._parse_as_command(start) 7127 7128 return self.expression( 7129 exp.Copy, 7130 this=this, 7131 kind=kind, 7132 credentials=credentials, 7133 files=files, 7134 params=params, 7135 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1315 def __init__( 1316 self, 1317 error_level: t.Optional[ErrorLevel] = None, 1318 error_message_context: int = 100, 1319 max_errors: int = 3, 1320 dialect: DialectType = None, 1321 ): 1322 from sqlglot.dialects import Dialect 1323 1324 self.error_level = error_level or ErrorLevel.IMMEDIATE 1325 self.error_message_context = error_message_context 1326 self.max_errors = max_errors 1327 self.dialect = Dialect.get_or_raise(dialect) 1328 self.reset()
1340 def parse( 1341 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1342 ) -> t.List[t.Optional[exp.Expression]]: 1343 """ 1344 Parses a list of tokens and returns a list of syntax trees, one tree 1345 per parsed SQL statement. 1346 1347 Args: 1348 raw_tokens: The list of tokens. 1349 sql: The original SQL string, used to produce helpful debug messages. 1350 1351 Returns: 1352 The list of the produced syntax trees. 1353 """ 1354 return self._parse( 1355 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1356 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1358 def parse_into( 1359 self, 1360 expression_types: exp.IntoType, 1361 raw_tokens: t.List[Token], 1362 sql: t.Optional[str] = None, 1363 ) -> t.List[t.Optional[exp.Expression]]: 1364 """ 1365 Parses a list of tokens into a given Expression type. If a collection of Expression 1366 types is given instead, this method will try to parse the token list into each one 1367 of them, stopping at the first for which the parsing succeeds. 1368 1369 Args: 1370 expression_types: The expression type(s) to try and parse the token list into. 1371 raw_tokens: The list of tokens. 1372 sql: The original SQL string, used to produce helpful debug messages. 1373 1374 Returns: 1375 The target Expression. 1376 """ 1377 errors = [] 1378 for expression_type in ensure_list(expression_types): 1379 parser = self.EXPRESSION_PARSERS.get(expression_type) 1380 if not parser: 1381 raise TypeError(f"No parser registered for {expression_type}") 1382 1383 try: 1384 return self._parse(parser, raw_tokens, sql) 1385 except ParseError as e: 1386 e.errors[0]["into_expression"] = expression_type 1387 errors.append(e) 1388 1389 raise ParseError( 1390 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1391 errors=merge_errors(errors), 1392 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1432 def check_errors(self) -> None: 1433 """Logs or raises any found errors, depending on the chosen error level setting.""" 1434 if self.error_level == ErrorLevel.WARN: 1435 for error in self.errors: 1436 logger.error(str(error)) 1437 elif self.error_level == ErrorLevel.RAISE and self.errors: 1438 raise ParseError( 1439 concat_messages(self.errors, self.max_errors), 1440 errors=merge_errors(self.errors), 1441 )
Logs or raises any found errors, depending on the chosen error level setting.
1443 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1444 """ 1445 Appends an error in the list of recorded errors or raises it, depending on the chosen 1446 error level setting. 1447 """ 1448 token = token or self._curr or self._prev or Token.string("") 1449 start = token.start 1450 end = token.end + 1 1451 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1452 highlight = self.sql[start:end] 1453 end_context = self.sql[end : end + self.error_message_context] 1454 1455 error = ParseError.new( 1456 f"{message}. Line {token.line}, Col: {token.col}.\n" 1457 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1458 description=message, 1459 line=token.line, 1460 col=token.col, 1461 start_context=start_context, 1462 highlight=highlight, 1463 end_context=end_context, 1464 ) 1465 1466 if self.error_level == ErrorLevel.IMMEDIATE: 1467 raise error 1468 1469 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1471 def expression( 1472 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1473 ) -> E: 1474 """ 1475 Creates a new, validated Expression. 1476 1477 Args: 1478 exp_class: The expression class to instantiate. 1479 comments: An optional list of comments to attach to the expression. 1480 kwargs: The arguments to set for the expression along with their respective values. 1481 1482 Returns: 1483 The target expression. 1484 """ 1485 instance = exp_class(**kwargs) 1486 instance.add_comments(comments) if comments else self._add_comments(instance) 1487 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1494 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1495 """ 1496 Validates an Expression, making sure that all its mandatory arguments are set. 1497 1498 Args: 1499 expression: The expression to validate. 1500 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1501 1502 Returns: 1503 The validated expression. 1504 """ 1505 if self.error_level != ErrorLevel.IGNORE: 1506 for error_message in expression.error_messages(args): 1507 self.raise_error(error_message) 1508 1509 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.