sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 154 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 155 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 156 "LIKE": build_like, 157 "LOG": build_logarithm, 158 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 159 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 160 "MOD": build_mod, 161 "TIME_TO_TIME_STR": lambda args: exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 166 this=exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 start=exp.Literal.number(1), 171 length=exp.Literal.number(10), 172 ), 173 "VAR_MAP": build_var_map, 174 "LOWER": build_lower, 175 "UPPER": build_upper, 176 "HEX": build_hex, 177 "TO_HEX": build_hex, 178 } 179 180 NO_PAREN_FUNCTIONS = { 181 TokenType.CURRENT_DATE: exp.CurrentDate, 182 TokenType.CURRENT_DATETIME: exp.CurrentDate, 183 TokenType.CURRENT_TIME: exp.CurrentTime, 184 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 185 TokenType.CURRENT_USER: exp.CurrentUser, 186 } 187 188 STRUCT_TYPE_TOKENS = { 189 TokenType.NESTED, 190 TokenType.OBJECT, 191 TokenType.STRUCT, 192 } 193 194 NESTED_TYPE_TOKENS = { 195 TokenType.ARRAY, 196 TokenType.LOWCARDINALITY, 197 TokenType.MAP, 198 TokenType.NULLABLE, 199 *STRUCT_TYPE_TOKENS, 200 } 201 202 ENUM_TYPE_TOKENS = { 203 TokenType.ENUM, 204 TokenType.ENUM8, 205 TokenType.ENUM16, 206 } 207 208 AGGREGATE_TYPE_TOKENS = { 209 TokenType.AGGREGATEFUNCTION, 210 TokenType.SIMPLEAGGREGATEFUNCTION, 211 } 212 213 TYPE_TOKENS = { 214 TokenType.BIT, 215 TokenType.BOOLEAN, 216 TokenType.TINYINT, 217 TokenType.UTINYINT, 218 TokenType.SMALLINT, 219 TokenType.USMALLINT, 220 TokenType.INT, 221 TokenType.UINT, 222 TokenType.BIGINT, 223 TokenType.UBIGINT, 224 TokenType.INT128, 225 TokenType.UINT128, 226 TokenType.INT256, 227 TokenType.UINT256, 228 TokenType.MEDIUMINT, 229 TokenType.UMEDIUMINT, 230 TokenType.FIXEDSTRING, 231 TokenType.FLOAT, 232 TokenType.DOUBLE, 233 TokenType.CHAR, 234 TokenType.NCHAR, 235 TokenType.VARCHAR, 236 TokenType.NVARCHAR, 237 TokenType.BPCHAR, 238 TokenType.TEXT, 239 TokenType.MEDIUMTEXT, 240 TokenType.LONGTEXT, 241 TokenType.MEDIUMBLOB, 242 TokenType.LONGBLOB, 243 TokenType.BINARY, 244 TokenType.VARBINARY, 245 TokenType.JSON, 246 TokenType.JSONB, 247 TokenType.INTERVAL, 248 TokenType.TINYBLOB, 249 TokenType.TINYTEXT, 250 TokenType.TIME, 251 TokenType.TIMETZ, 252 TokenType.TIMESTAMP, 253 TokenType.TIMESTAMP_S, 254 TokenType.TIMESTAMP_MS, 255 TokenType.TIMESTAMP_NS, 256 TokenType.TIMESTAMPTZ, 257 TokenType.TIMESTAMPLTZ, 258 TokenType.TIMESTAMPNTZ, 259 TokenType.DATETIME, 260 TokenType.DATETIME64, 261 TokenType.DATE, 262 TokenType.DATE32, 263 TokenType.INT4RANGE, 264 TokenType.INT4MULTIRANGE, 265 TokenType.INT8RANGE, 266 TokenType.INT8MULTIRANGE, 267 TokenType.NUMRANGE, 268 TokenType.NUMMULTIRANGE, 269 TokenType.TSRANGE, 270 TokenType.TSMULTIRANGE, 271 TokenType.TSTZRANGE, 272 TokenType.TSTZMULTIRANGE, 273 TokenType.DATERANGE, 274 TokenType.DATEMULTIRANGE, 275 TokenType.DECIMAL, 276 TokenType.UDECIMAL, 277 TokenType.BIGDECIMAL, 278 TokenType.UUID, 279 TokenType.GEOGRAPHY, 280 TokenType.GEOMETRY, 281 TokenType.HLLSKETCH, 282 TokenType.HSTORE, 283 TokenType.PSEUDO_TYPE, 284 TokenType.SUPER, 285 TokenType.SERIAL, 286 TokenType.SMALLSERIAL, 287 TokenType.BIGSERIAL, 288 TokenType.XML, 289 TokenType.YEAR, 290 TokenType.UNIQUEIDENTIFIER, 291 TokenType.USERDEFINED, 292 TokenType.MONEY, 293 TokenType.SMALLMONEY, 294 TokenType.ROWVERSION, 295 TokenType.IMAGE, 296 TokenType.VARIANT, 297 TokenType.OBJECT, 298 TokenType.OBJECT_IDENTIFIER, 299 TokenType.INET, 300 TokenType.IPADDRESS, 301 TokenType.IPPREFIX, 302 TokenType.IPV4, 303 TokenType.IPV6, 304 TokenType.UNKNOWN, 305 TokenType.NULL, 306 TokenType.NAME, 307 TokenType.TDIGEST, 308 *ENUM_TYPE_TOKENS, 309 *NESTED_TYPE_TOKENS, 310 *AGGREGATE_TYPE_TOKENS, 311 } 312 313 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 314 TokenType.BIGINT: TokenType.UBIGINT, 315 TokenType.INT: TokenType.UINT, 316 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 317 TokenType.SMALLINT: TokenType.USMALLINT, 318 TokenType.TINYINT: TokenType.UTINYINT, 319 TokenType.DECIMAL: TokenType.UDECIMAL, 320 } 321 322 SUBQUERY_PREDICATES = { 323 TokenType.ANY: exp.Any, 324 TokenType.ALL: exp.All, 325 TokenType.EXISTS: exp.Exists, 326 TokenType.SOME: exp.Any, 327 } 328 329 RESERVED_TOKENS = { 330 *Tokenizer.SINGLE_TOKENS.values(), 331 TokenType.SELECT, 332 } - {TokenType.IDENTIFIER} 333 334 DB_CREATABLES = { 335 TokenType.DATABASE, 336 TokenType.DICTIONARY, 337 TokenType.MODEL, 338 TokenType.SCHEMA, 339 TokenType.SEQUENCE, 340 TokenType.STORAGE_INTEGRATION, 341 TokenType.TABLE, 342 TokenType.TAG, 343 TokenType.VIEW, 344 TokenType.WAREHOUSE, 345 TokenType.STREAMLIT, 346 } 347 348 CREATABLES = { 349 TokenType.COLUMN, 350 TokenType.CONSTRAINT, 351 TokenType.FOREIGN_KEY, 352 TokenType.FUNCTION, 353 TokenType.INDEX, 354 TokenType.PROCEDURE, 355 *DB_CREATABLES, 356 } 357 358 # Tokens that can represent identifiers 359 ID_VAR_TOKENS = { 360 TokenType.VAR, 361 TokenType.ANTI, 362 TokenType.APPLY, 363 TokenType.ASC, 364 TokenType.ASOF, 365 TokenType.AUTO_INCREMENT, 366 TokenType.BEGIN, 367 TokenType.BPCHAR, 368 TokenType.CACHE, 369 TokenType.CASE, 370 TokenType.COLLATE, 371 TokenType.COMMAND, 372 TokenType.COMMENT, 373 TokenType.COMMIT, 374 TokenType.CONSTRAINT, 375 TokenType.COPY, 376 TokenType.DEFAULT, 377 TokenType.DELETE, 378 TokenType.DESC, 379 TokenType.DESCRIBE, 380 TokenType.DICTIONARY, 381 TokenType.DIV, 382 TokenType.END, 383 TokenType.EXECUTE, 384 TokenType.ESCAPE, 385 TokenType.FALSE, 386 TokenType.FIRST, 387 TokenType.FILTER, 388 TokenType.FINAL, 389 TokenType.FORMAT, 390 TokenType.FULL, 391 TokenType.IDENTIFIER, 392 TokenType.IS, 393 TokenType.ISNULL, 394 TokenType.INTERVAL, 395 TokenType.KEEP, 396 TokenType.KILL, 397 TokenType.LEFT, 398 TokenType.LOAD, 399 TokenType.MERGE, 400 TokenType.NATURAL, 401 TokenType.NEXT, 402 TokenType.OFFSET, 403 TokenType.OPERATOR, 404 TokenType.ORDINALITY, 405 TokenType.OVERLAPS, 406 TokenType.OVERWRITE, 407 TokenType.PARTITION, 408 TokenType.PERCENT, 409 TokenType.PIVOT, 410 TokenType.PRAGMA, 411 TokenType.RANGE, 412 TokenType.RECURSIVE, 413 TokenType.REFERENCES, 414 TokenType.REFRESH, 415 TokenType.REPLACE, 416 TokenType.RIGHT, 417 TokenType.ROLLUP, 418 TokenType.ROW, 419 TokenType.ROWS, 420 TokenType.SEMI, 421 TokenType.SET, 422 TokenType.SETTINGS, 423 TokenType.SHOW, 424 TokenType.TEMPORARY, 425 TokenType.TOP, 426 TokenType.TRUE, 427 TokenType.TRUNCATE, 428 TokenType.UNIQUE, 429 TokenType.UNNEST, 430 TokenType.UNPIVOT, 431 TokenType.UPDATE, 432 TokenType.USE, 433 TokenType.VOLATILE, 434 TokenType.WINDOW, 435 *CREATABLES, 436 *SUBQUERY_PREDICATES, 437 *TYPE_TOKENS, 438 *NO_PAREN_FUNCTIONS, 439 } 440 441 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 442 443 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 444 TokenType.ANTI, 445 TokenType.APPLY, 446 TokenType.ASOF, 447 TokenType.FULL, 448 TokenType.LEFT, 449 TokenType.LOCK, 450 TokenType.NATURAL, 451 TokenType.OFFSET, 452 TokenType.RIGHT, 453 TokenType.SEMI, 454 TokenType.WINDOW, 455 } 456 457 ALIAS_TOKENS = ID_VAR_TOKENS 458 459 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 460 461 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 462 463 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 464 465 FUNC_TOKENS = { 466 TokenType.COLLATE, 467 TokenType.COMMAND, 468 TokenType.CURRENT_DATE, 469 TokenType.CURRENT_DATETIME, 470 TokenType.CURRENT_TIMESTAMP, 471 TokenType.CURRENT_TIME, 472 TokenType.CURRENT_USER, 473 TokenType.FILTER, 474 TokenType.FIRST, 475 TokenType.FORMAT, 476 TokenType.GLOB, 477 TokenType.IDENTIFIER, 478 TokenType.INDEX, 479 TokenType.ISNULL, 480 TokenType.ILIKE, 481 TokenType.INSERT, 482 TokenType.LIKE, 483 TokenType.MERGE, 484 TokenType.OFFSET, 485 TokenType.PRIMARY_KEY, 486 TokenType.RANGE, 487 TokenType.REPLACE, 488 TokenType.RLIKE, 489 TokenType.ROW, 490 TokenType.UNNEST, 491 TokenType.VAR, 492 TokenType.LEFT, 493 TokenType.RIGHT, 494 TokenType.SEQUENCE, 495 TokenType.DATE, 496 TokenType.DATETIME, 497 TokenType.TABLE, 498 TokenType.TIMESTAMP, 499 TokenType.TIMESTAMPTZ, 500 TokenType.TRUNCATE, 501 TokenType.WINDOW, 502 TokenType.XOR, 503 *TYPE_TOKENS, 504 *SUBQUERY_PREDICATES, 505 } 506 507 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 508 TokenType.AND: exp.And, 509 } 510 511 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 512 TokenType.COLON_EQ: exp.PropertyEQ, 513 } 514 515 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 516 TokenType.OR: exp.Or, 517 } 518 519 EQUALITY = { 520 TokenType.EQ: exp.EQ, 521 TokenType.NEQ: exp.NEQ, 522 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 523 } 524 525 COMPARISON = { 526 TokenType.GT: exp.GT, 527 TokenType.GTE: exp.GTE, 528 TokenType.LT: exp.LT, 529 TokenType.LTE: exp.LTE, 530 } 531 532 BITWISE = { 533 TokenType.AMP: exp.BitwiseAnd, 534 TokenType.CARET: exp.BitwiseXor, 535 TokenType.PIPE: exp.BitwiseOr, 536 } 537 538 TERM = { 539 TokenType.DASH: exp.Sub, 540 TokenType.PLUS: exp.Add, 541 TokenType.MOD: exp.Mod, 542 TokenType.COLLATE: exp.Collate, 543 } 544 545 FACTOR = { 546 TokenType.DIV: exp.IntDiv, 547 TokenType.LR_ARROW: exp.Distance, 548 TokenType.SLASH: exp.Div, 549 TokenType.STAR: exp.Mul, 550 } 551 552 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 553 554 TIMES = { 555 TokenType.TIME, 556 TokenType.TIMETZ, 557 } 558 559 TIMESTAMPS = { 560 TokenType.TIMESTAMP, 561 TokenType.TIMESTAMPTZ, 562 TokenType.TIMESTAMPLTZ, 563 *TIMES, 564 } 565 566 SET_OPERATIONS = { 567 TokenType.UNION, 568 TokenType.INTERSECT, 569 TokenType.EXCEPT, 570 } 571 572 JOIN_METHODS = { 573 TokenType.ASOF, 574 TokenType.NATURAL, 575 TokenType.POSITIONAL, 576 } 577 578 JOIN_SIDES = { 579 TokenType.LEFT, 580 TokenType.RIGHT, 581 TokenType.FULL, 582 } 583 584 JOIN_KINDS = { 585 TokenType.INNER, 586 TokenType.OUTER, 587 TokenType.CROSS, 588 TokenType.SEMI, 589 TokenType.ANTI, 590 } 591 592 JOIN_HINTS: t.Set[str] = set() 593 594 LAMBDAS = { 595 TokenType.ARROW: lambda self, expressions: self.expression( 596 exp.Lambda, 597 this=self._replace_lambda( 598 self._parse_assignment(), 599 expressions, 600 ), 601 expressions=expressions, 602 ), 603 TokenType.FARROW: lambda self, expressions: self.expression( 604 exp.Kwarg, 605 this=exp.var(expressions[0].name), 606 expression=self._parse_assignment(), 607 ), 608 } 609 610 COLUMN_OPERATORS = { 611 TokenType.DOT: None, 612 TokenType.DCOLON: lambda self, this, to: self.expression( 613 exp.Cast if self.STRICT_CAST else exp.TryCast, 614 this=this, 615 to=to, 616 ), 617 TokenType.ARROW: lambda self, this, path: self.expression( 618 exp.JSONExtract, 619 this=this, 620 expression=self.dialect.to_json_path(path), 621 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 622 ), 623 TokenType.DARROW: lambda self, this, path: self.expression( 624 exp.JSONExtractScalar, 625 this=this, 626 expression=self.dialect.to_json_path(path), 627 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 628 ), 629 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 630 exp.JSONBExtract, 631 this=this, 632 expression=path, 633 ), 634 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 635 exp.JSONBExtractScalar, 636 this=this, 637 expression=path, 638 ), 639 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 640 exp.JSONBContains, 641 this=this, 642 expression=key, 643 ), 644 } 645 646 EXPRESSION_PARSERS = { 647 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 648 exp.Column: lambda self: self._parse_column(), 649 exp.Condition: lambda self: self._parse_assignment(), 650 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 651 exp.Expression: lambda self: self._parse_expression(), 652 exp.From: lambda self: self._parse_from(joins=True), 653 exp.Group: lambda self: self._parse_group(), 654 exp.Having: lambda self: self._parse_having(), 655 exp.Identifier: lambda self: self._parse_id_var(), 656 exp.Join: lambda self: self._parse_join(), 657 exp.Lambda: lambda self: self._parse_lambda(), 658 exp.Lateral: lambda self: self._parse_lateral(), 659 exp.Limit: lambda self: self._parse_limit(), 660 exp.Offset: lambda self: self._parse_offset(), 661 exp.Order: lambda self: self._parse_order(), 662 exp.Ordered: lambda self: self._parse_ordered(), 663 exp.Properties: lambda self: self._parse_properties(), 664 exp.Qualify: lambda self: self._parse_qualify(), 665 exp.Returning: lambda self: self._parse_returning(), 666 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 667 exp.Table: lambda self: self._parse_table_parts(), 668 exp.TableAlias: lambda self: self._parse_table_alias(), 669 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 670 exp.Where: lambda self: self._parse_where(), 671 exp.Window: lambda self: self._parse_named_window(), 672 exp.With: lambda self: self._parse_with(), 673 "JOIN_TYPE": lambda self: self._parse_join_parts(), 674 } 675 676 STATEMENT_PARSERS = { 677 TokenType.ALTER: lambda self: self._parse_alter(), 678 TokenType.BEGIN: lambda self: self._parse_transaction(), 679 TokenType.CACHE: lambda self: self._parse_cache(), 680 TokenType.COMMENT: lambda self: self._parse_comment(), 681 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 682 TokenType.COPY: lambda self: self._parse_copy(), 683 TokenType.CREATE: lambda self: self._parse_create(), 684 TokenType.DELETE: lambda self: self._parse_delete(), 685 TokenType.DESC: lambda self: self._parse_describe(), 686 TokenType.DESCRIBE: lambda self: self._parse_describe(), 687 TokenType.DROP: lambda self: self._parse_drop(), 688 TokenType.INSERT: lambda self: self._parse_insert(), 689 TokenType.KILL: lambda self: self._parse_kill(), 690 TokenType.LOAD: lambda self: self._parse_load(), 691 TokenType.MERGE: lambda self: self._parse_merge(), 692 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 693 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 694 TokenType.REFRESH: lambda self: self._parse_refresh(), 695 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 696 TokenType.SET: lambda self: self._parse_set(), 697 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 698 TokenType.UNCACHE: lambda self: self._parse_uncache(), 699 TokenType.UPDATE: lambda self: self._parse_update(), 700 TokenType.USE: lambda self: self.expression( 701 exp.Use, 702 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 703 this=self._parse_table(schema=False), 704 ), 705 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 706 } 707 708 UNARY_PARSERS = { 709 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 710 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 711 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 712 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 713 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 714 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 715 } 716 717 STRING_PARSERS = { 718 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 719 exp.RawString, this=token.text 720 ), 721 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 722 exp.National, this=token.text 723 ), 724 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 725 TokenType.STRING: lambda self, token: self.expression( 726 exp.Literal, this=token.text, is_string=True 727 ), 728 TokenType.UNICODE_STRING: lambda self, token: self.expression( 729 exp.UnicodeString, 730 this=token.text, 731 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 732 ), 733 } 734 735 NUMERIC_PARSERS = { 736 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 737 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 738 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 739 TokenType.NUMBER: lambda self, token: self.expression( 740 exp.Literal, this=token.text, is_string=False 741 ), 742 } 743 744 PRIMARY_PARSERS = { 745 **STRING_PARSERS, 746 **NUMERIC_PARSERS, 747 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 748 TokenType.NULL: lambda self, _: self.expression(exp.Null), 749 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 750 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 751 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 752 TokenType.STAR: lambda self, _: self.expression( 753 exp.Star, 754 **{ 755 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 756 "replace": self._parse_star_op("REPLACE"), 757 "rename": self._parse_star_op("RENAME"), 758 }, 759 ), 760 } 761 762 PLACEHOLDER_PARSERS = { 763 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 764 TokenType.PARAMETER: lambda self: self._parse_parameter(), 765 TokenType.COLON: lambda self: ( 766 self.expression(exp.Placeholder, this=self._prev.text) 767 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 768 else None 769 ), 770 } 771 772 RANGE_PARSERS = { 773 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 774 TokenType.GLOB: binary_range_parser(exp.Glob), 775 TokenType.ILIKE: binary_range_parser(exp.ILike), 776 TokenType.IN: lambda self, this: self._parse_in(this), 777 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 778 TokenType.IS: lambda self, this: self._parse_is(this), 779 TokenType.LIKE: binary_range_parser(exp.Like), 780 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 781 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 782 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 783 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 784 } 785 786 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 787 "ALLOWED_VALUES": lambda self: self.expression( 788 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 789 ), 790 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 791 "AUTO": lambda self: self._parse_auto_property(), 792 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 793 "BACKUP": lambda self: self.expression( 794 exp.BackupProperty, this=self._parse_var(any_token=True) 795 ), 796 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 797 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 798 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 799 "CHECKSUM": lambda self: self._parse_checksum(), 800 "CLUSTER BY": lambda self: self._parse_cluster(), 801 "CLUSTERED": lambda self: self._parse_clustered_by(), 802 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 803 exp.CollateProperty, **kwargs 804 ), 805 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 806 "CONTAINS": lambda self: self._parse_contains_property(), 807 "COPY": lambda self: self._parse_copy_property(), 808 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 809 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 810 "DEFINER": lambda self: self._parse_definer(), 811 "DETERMINISTIC": lambda self: self.expression( 812 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 813 ), 814 "DISTKEY": lambda self: self._parse_distkey(), 815 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 816 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 817 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 818 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 819 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 820 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 821 "FREESPACE": lambda self: self._parse_freespace(), 822 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 823 "HEAP": lambda self: self.expression(exp.HeapProperty), 824 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 825 "IMMUTABLE": lambda self: self.expression( 826 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 827 ), 828 "INHERITS": lambda self: self.expression( 829 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 830 ), 831 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 832 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 833 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 834 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 835 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 836 "LIKE": lambda self: self._parse_create_like(), 837 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 838 "LOCK": lambda self: self._parse_locking(), 839 "LOCKING": lambda self: self._parse_locking(), 840 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 841 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 842 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 843 "MODIFIES": lambda self: self._parse_modifies_property(), 844 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 845 "NO": lambda self: self._parse_no_property(), 846 "ON": lambda self: self._parse_on_property(), 847 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 848 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 849 "PARTITION": lambda self: self._parse_partitioned_of(), 850 "PARTITION BY": lambda self: self._parse_partitioned_by(), 851 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 852 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 853 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 854 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 855 "READS": lambda self: self._parse_reads_property(), 856 "REMOTE": lambda self: self._parse_remote_with_connection(), 857 "RETURNS": lambda self: self._parse_returns(), 858 "STRICT": lambda self: self.expression(exp.StrictProperty), 859 "ROW": lambda self: self._parse_row(), 860 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 861 "SAMPLE": lambda self: self.expression( 862 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 863 ), 864 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 865 "SETTINGS": lambda self: self.expression( 866 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 867 ), 868 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 869 "SORTKEY": lambda self: self._parse_sortkey(), 870 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 871 "STABLE": lambda self: self.expression( 872 exp.StabilityProperty, this=exp.Literal.string("STABLE") 873 ), 874 "STORED": lambda self: self._parse_stored(), 875 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 876 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 877 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 878 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 879 "TO": lambda self: self._parse_to_table(), 880 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 881 "TRANSFORM": lambda self: self.expression( 882 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 883 ), 884 "TTL": lambda self: self._parse_ttl(), 885 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 886 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 887 "VOLATILE": lambda self: self._parse_volatile_property(), 888 "WITH": lambda self: self._parse_with_property(), 889 } 890 891 CONSTRAINT_PARSERS = { 892 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 893 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 894 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 895 "CHARACTER SET": lambda self: self.expression( 896 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 897 ), 898 "CHECK": lambda self: self.expression( 899 exp.CheckColumnConstraint, 900 this=self._parse_wrapped(self._parse_assignment), 901 enforced=self._match_text_seq("ENFORCED"), 902 ), 903 "COLLATE": lambda self: self.expression( 904 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 905 ), 906 "COMMENT": lambda self: self.expression( 907 exp.CommentColumnConstraint, this=self._parse_string() 908 ), 909 "COMPRESS": lambda self: self._parse_compress(), 910 "CLUSTERED": lambda self: self.expression( 911 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 912 ), 913 "NONCLUSTERED": lambda self: self.expression( 914 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 915 ), 916 "DEFAULT": lambda self: self.expression( 917 exp.DefaultColumnConstraint, this=self._parse_bitwise() 918 ), 919 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 920 "EPHEMERAL": lambda self: self.expression( 921 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 922 ), 923 "EXCLUDE": lambda self: self.expression( 924 exp.ExcludeColumnConstraint, this=self._parse_index_params() 925 ), 926 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 927 "FORMAT": lambda self: self.expression( 928 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 929 ), 930 "GENERATED": lambda self: self._parse_generated_as_identity(), 931 "IDENTITY": lambda self: self._parse_auto_increment(), 932 "INLINE": lambda self: self._parse_inline(), 933 "LIKE": lambda self: self._parse_create_like(), 934 "NOT": lambda self: self._parse_not_constraint(), 935 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 936 "ON": lambda self: ( 937 self._match(TokenType.UPDATE) 938 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 939 ) 940 or self.expression(exp.OnProperty, this=self._parse_id_var()), 941 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 942 "PERIOD": lambda self: self._parse_period_for_system_time(), 943 "PRIMARY KEY": lambda self: self._parse_primary_key(), 944 "REFERENCES": lambda self: self._parse_references(match=False), 945 "TITLE": lambda self: self.expression( 946 exp.TitleColumnConstraint, this=self._parse_var_or_string() 947 ), 948 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 949 "UNIQUE": lambda self: self._parse_unique(), 950 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 951 "WITH": lambda self: self.expression( 952 exp.Properties, expressions=self._parse_wrapped_properties() 953 ), 954 } 955 956 ALTER_PARSERS = { 957 "ADD": lambda self: self._parse_alter_table_add(), 958 "ALTER": lambda self: self._parse_alter_table_alter(), 959 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 960 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 961 "DROP": lambda self: self._parse_alter_table_drop(), 962 "RENAME": lambda self: self._parse_alter_table_rename(), 963 "SET": lambda self: self._parse_alter_table_set(), 964 } 965 966 ALTER_ALTER_PARSERS = { 967 "DISTKEY": lambda self: self._parse_alter_diststyle(), 968 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 969 "SORTKEY": lambda self: self._parse_alter_sortkey(), 970 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 971 } 972 973 SCHEMA_UNNAMED_CONSTRAINTS = { 974 "CHECK", 975 "EXCLUDE", 976 "FOREIGN KEY", 977 "LIKE", 978 "PERIOD", 979 "PRIMARY KEY", 980 "UNIQUE", 981 } 982 983 NO_PAREN_FUNCTION_PARSERS = { 984 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 985 "CASE": lambda self: self._parse_case(), 986 "IF": lambda self: self._parse_if(), 987 "NEXT": lambda self: self._parse_next_value_for(), 988 } 989 990 INVALID_FUNC_NAME_TOKENS = { 991 TokenType.IDENTIFIER, 992 TokenType.STRING, 993 } 994 995 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 996 997 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 998 999 FUNCTION_PARSERS = { 1000 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1001 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1002 "DECODE": lambda self: self._parse_decode(), 1003 "EXTRACT": lambda self: self._parse_extract(), 1004 "GAP_FILL": lambda self: self._parse_gap_fill(), 1005 "JSON_OBJECT": lambda self: self._parse_json_object(), 1006 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1007 "JSON_TABLE": lambda self: self._parse_json_table(), 1008 "MATCH": lambda self: self._parse_match_against(), 1009 "OPENJSON": lambda self: self._parse_open_json(), 1010 "POSITION": lambda self: self._parse_position(), 1011 "PREDICT": lambda self: self._parse_predict(), 1012 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1013 "STRING_AGG": lambda self: self._parse_string_agg(), 1014 "SUBSTRING": lambda self: self._parse_substring(), 1015 "TRIM": lambda self: self._parse_trim(), 1016 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1017 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1018 } 1019 1020 QUERY_MODIFIER_PARSERS = { 1021 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1022 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1023 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1024 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1025 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1026 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1027 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1028 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1029 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1030 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1031 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1032 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1033 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1034 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1035 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1036 TokenType.CLUSTER_BY: lambda self: ( 1037 "cluster", 1038 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1039 ), 1040 TokenType.DISTRIBUTE_BY: lambda self: ( 1041 "distribute", 1042 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1043 ), 1044 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1045 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1046 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1047 } 1048 1049 SET_PARSERS = { 1050 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1051 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1052 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1053 "TRANSACTION": lambda self: self._parse_set_transaction(), 1054 } 1055 1056 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1057 1058 TYPE_LITERAL_PARSERS = { 1059 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1060 } 1061 1062 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1063 1064 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1065 1066 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1067 1068 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1069 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1070 "ISOLATION": ( 1071 ("LEVEL", "REPEATABLE", "READ"), 1072 ("LEVEL", "READ", "COMMITTED"), 1073 ("LEVEL", "READ", "UNCOMITTED"), 1074 ("LEVEL", "SERIALIZABLE"), 1075 ), 1076 "READ": ("WRITE", "ONLY"), 1077 } 1078 1079 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1080 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1081 ) 1082 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1083 1084 CREATE_SEQUENCE: OPTIONS_TYPE = { 1085 "SCALE": ("EXTEND", "NOEXTEND"), 1086 "SHARD": ("EXTEND", "NOEXTEND"), 1087 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1088 **dict.fromkeys( 1089 ( 1090 "SESSION", 1091 "GLOBAL", 1092 "KEEP", 1093 "NOKEEP", 1094 "ORDER", 1095 "NOORDER", 1096 "NOCACHE", 1097 "CYCLE", 1098 "NOCYCLE", 1099 "NOMINVALUE", 1100 "NOMAXVALUE", 1101 "NOSCALE", 1102 "NOSHARD", 1103 ), 1104 tuple(), 1105 ), 1106 } 1107 1108 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1109 1110 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1111 1112 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1113 1114 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1115 1116 CLONE_KEYWORDS = {"CLONE", "COPY"} 1117 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1118 1119 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1120 1121 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1122 1123 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1124 1125 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1126 1127 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1128 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1129 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1130 1131 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1132 1133 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1134 1135 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1136 1137 DISTINCT_TOKENS = {TokenType.DISTINCT} 1138 1139 NULL_TOKENS = {TokenType.NULL} 1140 1141 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1142 1143 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1144 1145 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1146 1147 STRICT_CAST = True 1148 1149 PREFIXED_PIVOT_COLUMNS = False 1150 IDENTIFY_PIVOT_STRINGS = False 1151 1152 LOG_DEFAULTS_TO_LN = False 1153 1154 # Whether ADD is present for each column added by ALTER TABLE 1155 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1156 1157 # Whether the table sample clause expects CSV syntax 1158 TABLESAMPLE_CSV = False 1159 1160 # The default method used for table sampling 1161 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1162 1163 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1164 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1165 1166 # Whether the TRIM function expects the characters to trim as its first argument 1167 TRIM_PATTERN_FIRST = False 1168 1169 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1170 STRING_ALIASES = False 1171 1172 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1173 MODIFIERS_ATTACHED_TO_UNION = True 1174 UNION_MODIFIERS = {"order", "limit", "offset"} 1175 1176 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1177 NO_PAREN_IF_COMMANDS = True 1178 1179 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1180 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1181 1182 # Whether the `:` operator is used to extract a value from a JSON document 1183 COLON_IS_JSON_EXTRACT = False 1184 1185 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1186 # If this is True and '(' is not found, the keyword will be treated as an identifier 1187 VALUES_FOLLOWED_BY_PAREN = True 1188 1189 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1190 SUPPORTS_IMPLICIT_UNNEST = False 1191 1192 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1193 INTERVAL_SPANS = True 1194 1195 # Whether a PARTITION clause can follow a table reference 1196 SUPPORTS_PARTITION_SELECTION = False 1197 1198 __slots__ = ( 1199 "error_level", 1200 "error_message_context", 1201 "max_errors", 1202 "dialect", 1203 "sql", 1204 "errors", 1205 "_tokens", 1206 "_index", 1207 "_curr", 1208 "_next", 1209 "_prev", 1210 "_prev_comments", 1211 ) 1212 1213 # Autofilled 1214 SHOW_TRIE: t.Dict = {} 1215 SET_TRIE: t.Dict = {} 1216 1217 def __init__( 1218 self, 1219 error_level: t.Optional[ErrorLevel] = None, 1220 error_message_context: int = 100, 1221 max_errors: int = 3, 1222 dialect: DialectType = None, 1223 ): 1224 from sqlglot.dialects import Dialect 1225 1226 self.error_level = error_level or ErrorLevel.IMMEDIATE 1227 self.error_message_context = error_message_context 1228 self.max_errors = max_errors 1229 self.dialect = Dialect.get_or_raise(dialect) 1230 self.reset() 1231 1232 def reset(self): 1233 self.sql = "" 1234 self.errors = [] 1235 self._tokens = [] 1236 self._index = 0 1237 self._curr = None 1238 self._next = None 1239 self._prev = None 1240 self._prev_comments = None 1241 1242 def parse( 1243 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1244 ) -> t.List[t.Optional[exp.Expression]]: 1245 """ 1246 Parses a list of tokens and returns a list of syntax trees, one tree 1247 per parsed SQL statement. 1248 1249 Args: 1250 raw_tokens: The list of tokens. 1251 sql: The original SQL string, used to produce helpful debug messages. 1252 1253 Returns: 1254 The list of the produced syntax trees. 1255 """ 1256 return self._parse( 1257 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1258 ) 1259 1260 def parse_into( 1261 self, 1262 expression_types: exp.IntoType, 1263 raw_tokens: t.List[Token], 1264 sql: t.Optional[str] = None, 1265 ) -> t.List[t.Optional[exp.Expression]]: 1266 """ 1267 Parses a list of tokens into a given Expression type. If a collection of Expression 1268 types is given instead, this method will try to parse the token list into each one 1269 of them, stopping at the first for which the parsing succeeds. 1270 1271 Args: 1272 expression_types: The expression type(s) to try and parse the token list into. 1273 raw_tokens: The list of tokens. 1274 sql: The original SQL string, used to produce helpful debug messages. 1275 1276 Returns: 1277 The target Expression. 1278 """ 1279 errors = [] 1280 for expression_type in ensure_list(expression_types): 1281 parser = self.EXPRESSION_PARSERS.get(expression_type) 1282 if not parser: 1283 raise TypeError(f"No parser registered for {expression_type}") 1284 1285 try: 1286 return self._parse(parser, raw_tokens, sql) 1287 except ParseError as e: 1288 e.errors[0]["into_expression"] = expression_type 1289 errors.append(e) 1290 1291 raise ParseError( 1292 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1293 errors=merge_errors(errors), 1294 ) from errors[-1] 1295 1296 def _parse( 1297 self, 1298 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1299 raw_tokens: t.List[Token], 1300 sql: t.Optional[str] = None, 1301 ) -> t.List[t.Optional[exp.Expression]]: 1302 self.reset() 1303 self.sql = sql or "" 1304 1305 total = len(raw_tokens) 1306 chunks: t.List[t.List[Token]] = [[]] 1307 1308 for i, token in enumerate(raw_tokens): 1309 if token.token_type == TokenType.SEMICOLON: 1310 if token.comments: 1311 chunks.append([token]) 1312 1313 if i < total - 1: 1314 chunks.append([]) 1315 else: 1316 chunks[-1].append(token) 1317 1318 expressions = [] 1319 1320 for tokens in chunks: 1321 self._index = -1 1322 self._tokens = tokens 1323 self._advance() 1324 1325 expressions.append(parse_method(self)) 1326 1327 if self._index < len(self._tokens): 1328 self.raise_error("Invalid expression / Unexpected token") 1329 1330 self.check_errors() 1331 1332 return expressions 1333 1334 def check_errors(self) -> None: 1335 """Logs or raises any found errors, depending on the chosen error level setting.""" 1336 if self.error_level == ErrorLevel.WARN: 1337 for error in self.errors: 1338 logger.error(str(error)) 1339 elif self.error_level == ErrorLevel.RAISE and self.errors: 1340 raise ParseError( 1341 concat_messages(self.errors, self.max_errors), 1342 errors=merge_errors(self.errors), 1343 ) 1344 1345 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1346 """ 1347 Appends an error in the list of recorded errors or raises it, depending on the chosen 1348 error level setting. 1349 """ 1350 token = token or self._curr or self._prev or Token.string("") 1351 start = token.start 1352 end = token.end + 1 1353 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1354 highlight = self.sql[start:end] 1355 end_context = self.sql[end : end + self.error_message_context] 1356 1357 error = ParseError.new( 1358 f"{message}. Line {token.line}, Col: {token.col}.\n" 1359 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1360 description=message, 1361 line=token.line, 1362 col=token.col, 1363 start_context=start_context, 1364 highlight=highlight, 1365 end_context=end_context, 1366 ) 1367 1368 if self.error_level == ErrorLevel.IMMEDIATE: 1369 raise error 1370 1371 self.errors.append(error) 1372 1373 def expression( 1374 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1375 ) -> E: 1376 """ 1377 Creates a new, validated Expression. 1378 1379 Args: 1380 exp_class: The expression class to instantiate. 1381 comments: An optional list of comments to attach to the expression. 1382 kwargs: The arguments to set for the expression along with their respective values. 1383 1384 Returns: 1385 The target expression. 1386 """ 1387 instance = exp_class(**kwargs) 1388 instance.add_comments(comments) if comments else self._add_comments(instance) 1389 return self.validate_expression(instance) 1390 1391 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1392 if expression and self._prev_comments: 1393 expression.add_comments(self._prev_comments) 1394 self._prev_comments = None 1395 1396 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1397 """ 1398 Validates an Expression, making sure that all its mandatory arguments are set. 1399 1400 Args: 1401 expression: The expression to validate. 1402 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1403 1404 Returns: 1405 The validated expression. 1406 """ 1407 if self.error_level != ErrorLevel.IGNORE: 1408 for error_message in expression.error_messages(args): 1409 self.raise_error(error_message) 1410 1411 return expression 1412 1413 def _find_sql(self, start: Token, end: Token) -> str: 1414 return self.sql[start.start : end.end + 1] 1415 1416 def _is_connected(self) -> bool: 1417 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1418 1419 def _advance(self, times: int = 1) -> None: 1420 self._index += times 1421 self._curr = seq_get(self._tokens, self._index) 1422 self._next = seq_get(self._tokens, self._index + 1) 1423 1424 if self._index > 0: 1425 self._prev = self._tokens[self._index - 1] 1426 self._prev_comments = self._prev.comments 1427 else: 1428 self._prev = None 1429 self._prev_comments = None 1430 1431 def _retreat(self, index: int) -> None: 1432 if index != self._index: 1433 self._advance(index - self._index) 1434 1435 def _warn_unsupported(self) -> None: 1436 if len(self._tokens) <= 1: 1437 return 1438 1439 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1440 # interested in emitting a warning for the one being currently processed. 1441 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1442 1443 logger.warning( 1444 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1445 ) 1446 1447 def _parse_command(self) -> exp.Command: 1448 self._warn_unsupported() 1449 return self.expression( 1450 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1451 ) 1452 1453 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1454 """ 1455 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1456 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1457 the parser state accordingly 1458 """ 1459 index = self._index 1460 error_level = self.error_level 1461 1462 self.error_level = ErrorLevel.IMMEDIATE 1463 try: 1464 this = parse_method() 1465 except ParseError: 1466 this = None 1467 finally: 1468 if not this or retreat: 1469 self._retreat(index) 1470 self.error_level = error_level 1471 1472 return this 1473 1474 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1475 start = self._prev 1476 exists = self._parse_exists() if allow_exists else None 1477 1478 self._match(TokenType.ON) 1479 1480 materialized = self._match_text_seq("MATERIALIZED") 1481 kind = self._match_set(self.CREATABLES) and self._prev 1482 if not kind: 1483 return self._parse_as_command(start) 1484 1485 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1486 this = self._parse_user_defined_function(kind=kind.token_type) 1487 elif kind.token_type == TokenType.TABLE: 1488 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1489 elif kind.token_type == TokenType.COLUMN: 1490 this = self._parse_column() 1491 else: 1492 this = self._parse_id_var() 1493 1494 self._match(TokenType.IS) 1495 1496 return self.expression( 1497 exp.Comment, 1498 this=this, 1499 kind=kind.text, 1500 expression=self._parse_string(), 1501 exists=exists, 1502 materialized=materialized, 1503 ) 1504 1505 def _parse_to_table( 1506 self, 1507 ) -> exp.ToTableProperty: 1508 table = self._parse_table_parts(schema=True) 1509 return self.expression(exp.ToTableProperty, this=table) 1510 1511 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1512 def _parse_ttl(self) -> exp.Expression: 1513 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1514 this = self._parse_bitwise() 1515 1516 if self._match_text_seq("DELETE"): 1517 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1518 if self._match_text_seq("RECOMPRESS"): 1519 return self.expression( 1520 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1521 ) 1522 if self._match_text_seq("TO", "DISK"): 1523 return self.expression( 1524 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1525 ) 1526 if self._match_text_seq("TO", "VOLUME"): 1527 return self.expression( 1528 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1529 ) 1530 1531 return this 1532 1533 expressions = self._parse_csv(_parse_ttl_action) 1534 where = self._parse_where() 1535 group = self._parse_group() 1536 1537 aggregates = None 1538 if group and self._match(TokenType.SET): 1539 aggregates = self._parse_csv(self._parse_set_item) 1540 1541 return self.expression( 1542 exp.MergeTreeTTL, 1543 expressions=expressions, 1544 where=where, 1545 group=group, 1546 aggregates=aggregates, 1547 ) 1548 1549 def _parse_statement(self) -> t.Optional[exp.Expression]: 1550 if self._curr is None: 1551 return None 1552 1553 if self._match_set(self.STATEMENT_PARSERS): 1554 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1555 1556 if self._match_set(self.dialect.tokenizer.COMMANDS): 1557 return self._parse_command() 1558 1559 expression = self._parse_expression() 1560 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1561 return self._parse_query_modifiers(expression) 1562 1563 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1564 start = self._prev 1565 temporary = self._match(TokenType.TEMPORARY) 1566 materialized = self._match_text_seq("MATERIALIZED") 1567 1568 kind = self._match_set(self.CREATABLES) and self._prev.text 1569 if not kind: 1570 return self._parse_as_command(start) 1571 1572 if_exists = exists or self._parse_exists() 1573 table = self._parse_table_parts( 1574 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1575 ) 1576 1577 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1578 1579 if self._match(TokenType.L_PAREN, advance=False): 1580 expressions = self._parse_wrapped_csv(self._parse_types) 1581 else: 1582 expressions = None 1583 1584 return self.expression( 1585 exp.Drop, 1586 comments=start.comments, 1587 exists=if_exists, 1588 this=table, 1589 expressions=expressions, 1590 kind=kind.upper(), 1591 temporary=temporary, 1592 materialized=materialized, 1593 cascade=self._match_text_seq("CASCADE"), 1594 constraints=self._match_text_seq("CONSTRAINTS"), 1595 purge=self._match_text_seq("PURGE"), 1596 cluster=cluster, 1597 ) 1598 1599 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1600 return ( 1601 self._match_text_seq("IF") 1602 and (not not_ or self._match(TokenType.NOT)) 1603 and self._match(TokenType.EXISTS) 1604 ) 1605 1606 def _parse_create(self) -> exp.Create | exp.Command: 1607 # Note: this can't be None because we've matched a statement parser 1608 start = self._prev 1609 comments = self._prev_comments 1610 1611 replace = ( 1612 start.token_type == TokenType.REPLACE 1613 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1614 or self._match_pair(TokenType.OR, TokenType.ALTER) 1615 ) 1616 1617 unique = self._match(TokenType.UNIQUE) 1618 1619 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1620 self._advance() 1621 1622 properties = None 1623 create_token = self._match_set(self.CREATABLES) and self._prev 1624 1625 if not create_token: 1626 # exp.Properties.Location.POST_CREATE 1627 properties = self._parse_properties() 1628 create_token = self._match_set(self.CREATABLES) and self._prev 1629 1630 if not properties or not create_token: 1631 return self._parse_as_command(start) 1632 1633 exists = self._parse_exists(not_=True) 1634 this = None 1635 expression: t.Optional[exp.Expression] = None 1636 indexes = None 1637 no_schema_binding = None 1638 begin = None 1639 end = None 1640 clone = None 1641 1642 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1643 nonlocal properties 1644 if properties and temp_props: 1645 properties.expressions.extend(temp_props.expressions) 1646 elif temp_props: 1647 properties = temp_props 1648 1649 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1650 this = self._parse_user_defined_function(kind=create_token.token_type) 1651 1652 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1653 extend_props(self._parse_properties()) 1654 1655 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1656 extend_props(self._parse_properties()) 1657 1658 if not expression: 1659 if self._match(TokenType.COMMAND): 1660 expression = self._parse_as_command(self._prev) 1661 else: 1662 begin = self._match(TokenType.BEGIN) 1663 return_ = self._match_text_seq("RETURN") 1664 1665 if self._match(TokenType.STRING, advance=False): 1666 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1667 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1668 expression = self._parse_string() 1669 extend_props(self._parse_properties()) 1670 else: 1671 expression = self._parse_statement() 1672 1673 end = self._match_text_seq("END") 1674 1675 if return_: 1676 expression = self.expression(exp.Return, this=expression) 1677 elif create_token.token_type == TokenType.INDEX: 1678 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1679 if not self._match(TokenType.ON): 1680 index = self._parse_id_var() 1681 anonymous = False 1682 else: 1683 index = None 1684 anonymous = True 1685 1686 this = self._parse_index(index=index, anonymous=anonymous) 1687 elif create_token.token_type in self.DB_CREATABLES: 1688 table_parts = self._parse_table_parts( 1689 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1690 ) 1691 1692 # exp.Properties.Location.POST_NAME 1693 self._match(TokenType.COMMA) 1694 extend_props(self._parse_properties(before=True)) 1695 1696 this = self._parse_schema(this=table_parts) 1697 1698 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1699 extend_props(self._parse_properties()) 1700 1701 self._match(TokenType.ALIAS) 1702 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1703 # exp.Properties.Location.POST_ALIAS 1704 extend_props(self._parse_properties()) 1705 1706 if create_token.token_type == TokenType.SEQUENCE: 1707 expression = self._parse_types() 1708 extend_props(self._parse_properties()) 1709 else: 1710 expression = self._parse_ddl_select() 1711 1712 if create_token.token_type == TokenType.TABLE: 1713 # exp.Properties.Location.POST_EXPRESSION 1714 extend_props(self._parse_properties()) 1715 1716 indexes = [] 1717 while True: 1718 index = self._parse_index() 1719 1720 # exp.Properties.Location.POST_INDEX 1721 extend_props(self._parse_properties()) 1722 1723 if not index: 1724 break 1725 else: 1726 self._match(TokenType.COMMA) 1727 indexes.append(index) 1728 elif create_token.token_type == TokenType.VIEW: 1729 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1730 no_schema_binding = True 1731 1732 shallow = self._match_text_seq("SHALLOW") 1733 1734 if self._match_texts(self.CLONE_KEYWORDS): 1735 copy = self._prev.text.lower() == "copy" 1736 clone = self.expression( 1737 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1738 ) 1739 1740 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1741 return self._parse_as_command(start) 1742 1743 return self.expression( 1744 exp.Create, 1745 comments=comments, 1746 this=this, 1747 kind=create_token.text.upper(), 1748 replace=replace, 1749 unique=unique, 1750 expression=expression, 1751 exists=exists, 1752 properties=properties, 1753 indexes=indexes, 1754 no_schema_binding=no_schema_binding, 1755 begin=begin, 1756 end=end, 1757 clone=clone, 1758 ) 1759 1760 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1761 seq = exp.SequenceProperties() 1762 1763 options = [] 1764 index = self._index 1765 1766 while self._curr: 1767 self._match(TokenType.COMMA) 1768 if self._match_text_seq("INCREMENT"): 1769 self._match_text_seq("BY") 1770 self._match_text_seq("=") 1771 seq.set("increment", self._parse_term()) 1772 elif self._match_text_seq("MINVALUE"): 1773 seq.set("minvalue", self._parse_term()) 1774 elif self._match_text_seq("MAXVALUE"): 1775 seq.set("maxvalue", self._parse_term()) 1776 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1777 self._match_text_seq("=") 1778 seq.set("start", self._parse_term()) 1779 elif self._match_text_seq("CACHE"): 1780 # T-SQL allows empty CACHE which is initialized dynamically 1781 seq.set("cache", self._parse_number() or True) 1782 elif self._match_text_seq("OWNED", "BY"): 1783 # "OWNED BY NONE" is the default 1784 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1785 else: 1786 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1787 if opt: 1788 options.append(opt) 1789 else: 1790 break 1791 1792 seq.set("options", options if options else None) 1793 return None if self._index == index else seq 1794 1795 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1796 # only used for teradata currently 1797 self._match(TokenType.COMMA) 1798 1799 kwargs = { 1800 "no": self._match_text_seq("NO"), 1801 "dual": self._match_text_seq("DUAL"), 1802 "before": self._match_text_seq("BEFORE"), 1803 "default": self._match_text_seq("DEFAULT"), 1804 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1805 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1806 "after": self._match_text_seq("AFTER"), 1807 "minimum": self._match_texts(("MIN", "MINIMUM")), 1808 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1809 } 1810 1811 if self._match_texts(self.PROPERTY_PARSERS): 1812 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1813 try: 1814 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1815 except TypeError: 1816 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1817 1818 return None 1819 1820 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1821 return self._parse_wrapped_csv(self._parse_property) 1822 1823 def _parse_property(self) -> t.Optional[exp.Expression]: 1824 if self._match_texts(self.PROPERTY_PARSERS): 1825 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1826 1827 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1828 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1829 1830 if self._match_text_seq("COMPOUND", "SORTKEY"): 1831 return self._parse_sortkey(compound=True) 1832 1833 if self._match_text_seq("SQL", "SECURITY"): 1834 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1835 1836 index = self._index 1837 key = self._parse_column() 1838 1839 if not self._match(TokenType.EQ): 1840 self._retreat(index) 1841 return self._parse_sequence_properties() 1842 1843 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1844 if isinstance(key, exp.Column): 1845 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1846 1847 value = self._parse_bitwise() or self._parse_var(any_token=True) 1848 1849 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1850 if isinstance(value, exp.Column): 1851 value = exp.var(value.name) 1852 1853 return self.expression(exp.Property, this=key, value=value) 1854 1855 def _parse_stored(self) -> exp.FileFormatProperty: 1856 self._match(TokenType.ALIAS) 1857 1858 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1859 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1860 1861 return self.expression( 1862 exp.FileFormatProperty, 1863 this=( 1864 self.expression( 1865 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1866 ) 1867 if input_format or output_format 1868 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1869 ), 1870 ) 1871 1872 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1873 field = self._parse_field() 1874 if isinstance(field, exp.Identifier) and not field.quoted: 1875 field = exp.var(field) 1876 1877 return field 1878 1879 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1880 self._match(TokenType.EQ) 1881 self._match(TokenType.ALIAS) 1882 1883 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1884 1885 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1886 properties = [] 1887 while True: 1888 if before: 1889 prop = self._parse_property_before() 1890 else: 1891 prop = self._parse_property() 1892 if not prop: 1893 break 1894 for p in ensure_list(prop): 1895 properties.append(p) 1896 1897 if properties: 1898 return self.expression(exp.Properties, expressions=properties) 1899 1900 return None 1901 1902 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1903 return self.expression( 1904 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1905 ) 1906 1907 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1908 if self._index >= 2: 1909 pre_volatile_token = self._tokens[self._index - 2] 1910 else: 1911 pre_volatile_token = None 1912 1913 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1914 return exp.VolatileProperty() 1915 1916 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1917 1918 def _parse_retention_period(self) -> exp.Var: 1919 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1920 number = self._parse_number() 1921 number_str = f"{number} " if number else "" 1922 unit = self._parse_var(any_token=True) 1923 return exp.var(f"{number_str}{unit}") 1924 1925 def _parse_system_versioning_property( 1926 self, with_: bool = False 1927 ) -> exp.WithSystemVersioningProperty: 1928 self._match(TokenType.EQ) 1929 prop = self.expression( 1930 exp.WithSystemVersioningProperty, 1931 **{ # type: ignore 1932 "on": True, 1933 "with": with_, 1934 }, 1935 ) 1936 1937 if self._match_text_seq("OFF"): 1938 prop.set("on", False) 1939 return prop 1940 1941 self._match(TokenType.ON) 1942 if self._match(TokenType.L_PAREN): 1943 while self._curr and not self._match(TokenType.R_PAREN): 1944 if self._match_text_seq("HISTORY_TABLE", "="): 1945 prop.set("this", self._parse_table_parts()) 1946 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1947 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1948 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1949 prop.set("retention_period", self._parse_retention_period()) 1950 1951 self._match(TokenType.COMMA) 1952 1953 return prop 1954 1955 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1956 self._match(TokenType.EQ) 1957 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1958 prop = self.expression(exp.DataDeletionProperty, on=on) 1959 1960 if self._match(TokenType.L_PAREN): 1961 while self._curr and not self._match(TokenType.R_PAREN): 1962 if self._match_text_seq("FILTER_COLUMN", "="): 1963 prop.set("filter_column", self._parse_column()) 1964 elif self._match_text_seq("RETENTION_PERIOD", "="): 1965 prop.set("retention_period", self._parse_retention_period()) 1966 1967 self._match(TokenType.COMMA) 1968 1969 return prop 1970 1971 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1972 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1973 prop = self._parse_system_versioning_property(with_=True) 1974 self._match_r_paren() 1975 return prop 1976 1977 if self._match(TokenType.L_PAREN, advance=False): 1978 return self._parse_wrapped_properties() 1979 1980 if self._match_text_seq("JOURNAL"): 1981 return self._parse_withjournaltable() 1982 1983 if self._match_texts(self.VIEW_ATTRIBUTES): 1984 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1985 1986 if self._match_text_seq("DATA"): 1987 return self._parse_withdata(no=False) 1988 elif self._match_text_seq("NO", "DATA"): 1989 return self._parse_withdata(no=True) 1990 1991 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1992 return self._parse_serde_properties(with_=True) 1993 1994 if not self._next: 1995 return None 1996 1997 return self._parse_withisolatedloading() 1998 1999 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2000 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2001 self._match(TokenType.EQ) 2002 2003 user = self._parse_id_var() 2004 self._match(TokenType.PARAMETER) 2005 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2006 2007 if not user or not host: 2008 return None 2009 2010 return exp.DefinerProperty(this=f"{user}@{host}") 2011 2012 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2013 self._match(TokenType.TABLE) 2014 self._match(TokenType.EQ) 2015 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2016 2017 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2018 return self.expression(exp.LogProperty, no=no) 2019 2020 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2021 return self.expression(exp.JournalProperty, **kwargs) 2022 2023 def _parse_checksum(self) -> exp.ChecksumProperty: 2024 self._match(TokenType.EQ) 2025 2026 on = None 2027 if self._match(TokenType.ON): 2028 on = True 2029 elif self._match_text_seq("OFF"): 2030 on = False 2031 2032 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2033 2034 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2035 return self.expression( 2036 exp.Cluster, 2037 expressions=( 2038 self._parse_wrapped_csv(self._parse_ordered) 2039 if wrapped 2040 else self._parse_csv(self._parse_ordered) 2041 ), 2042 ) 2043 2044 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2045 self._match_text_seq("BY") 2046 2047 self._match_l_paren() 2048 expressions = self._parse_csv(self._parse_column) 2049 self._match_r_paren() 2050 2051 if self._match_text_seq("SORTED", "BY"): 2052 self._match_l_paren() 2053 sorted_by = self._parse_csv(self._parse_ordered) 2054 self._match_r_paren() 2055 else: 2056 sorted_by = None 2057 2058 self._match(TokenType.INTO) 2059 buckets = self._parse_number() 2060 self._match_text_seq("BUCKETS") 2061 2062 return self.expression( 2063 exp.ClusteredByProperty, 2064 expressions=expressions, 2065 sorted_by=sorted_by, 2066 buckets=buckets, 2067 ) 2068 2069 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2070 if not self._match_text_seq("GRANTS"): 2071 self._retreat(self._index - 1) 2072 return None 2073 2074 return self.expression(exp.CopyGrantsProperty) 2075 2076 def _parse_freespace(self) -> exp.FreespaceProperty: 2077 self._match(TokenType.EQ) 2078 return self.expression( 2079 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2080 ) 2081 2082 def _parse_mergeblockratio( 2083 self, no: bool = False, default: bool = False 2084 ) -> exp.MergeBlockRatioProperty: 2085 if self._match(TokenType.EQ): 2086 return self.expression( 2087 exp.MergeBlockRatioProperty, 2088 this=self._parse_number(), 2089 percent=self._match(TokenType.PERCENT), 2090 ) 2091 2092 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2093 2094 def _parse_datablocksize( 2095 self, 2096 default: t.Optional[bool] = None, 2097 minimum: t.Optional[bool] = None, 2098 maximum: t.Optional[bool] = None, 2099 ) -> exp.DataBlocksizeProperty: 2100 self._match(TokenType.EQ) 2101 size = self._parse_number() 2102 2103 units = None 2104 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2105 units = self._prev.text 2106 2107 return self.expression( 2108 exp.DataBlocksizeProperty, 2109 size=size, 2110 units=units, 2111 default=default, 2112 minimum=minimum, 2113 maximum=maximum, 2114 ) 2115 2116 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2117 self._match(TokenType.EQ) 2118 always = self._match_text_seq("ALWAYS") 2119 manual = self._match_text_seq("MANUAL") 2120 never = self._match_text_seq("NEVER") 2121 default = self._match_text_seq("DEFAULT") 2122 2123 autotemp = None 2124 if self._match_text_seq("AUTOTEMP"): 2125 autotemp = self._parse_schema() 2126 2127 return self.expression( 2128 exp.BlockCompressionProperty, 2129 always=always, 2130 manual=manual, 2131 never=never, 2132 default=default, 2133 autotemp=autotemp, 2134 ) 2135 2136 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2137 index = self._index 2138 no = self._match_text_seq("NO") 2139 concurrent = self._match_text_seq("CONCURRENT") 2140 2141 if not self._match_text_seq("ISOLATED", "LOADING"): 2142 self._retreat(index) 2143 return None 2144 2145 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2146 return self.expression( 2147 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2148 ) 2149 2150 def _parse_locking(self) -> exp.LockingProperty: 2151 if self._match(TokenType.TABLE): 2152 kind = "TABLE" 2153 elif self._match(TokenType.VIEW): 2154 kind = "VIEW" 2155 elif self._match(TokenType.ROW): 2156 kind = "ROW" 2157 elif self._match_text_seq("DATABASE"): 2158 kind = "DATABASE" 2159 else: 2160 kind = None 2161 2162 if kind in ("DATABASE", "TABLE", "VIEW"): 2163 this = self._parse_table_parts() 2164 else: 2165 this = None 2166 2167 if self._match(TokenType.FOR): 2168 for_or_in = "FOR" 2169 elif self._match(TokenType.IN): 2170 for_or_in = "IN" 2171 else: 2172 for_or_in = None 2173 2174 if self._match_text_seq("ACCESS"): 2175 lock_type = "ACCESS" 2176 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2177 lock_type = "EXCLUSIVE" 2178 elif self._match_text_seq("SHARE"): 2179 lock_type = "SHARE" 2180 elif self._match_text_seq("READ"): 2181 lock_type = "READ" 2182 elif self._match_text_seq("WRITE"): 2183 lock_type = "WRITE" 2184 elif self._match_text_seq("CHECKSUM"): 2185 lock_type = "CHECKSUM" 2186 else: 2187 lock_type = None 2188 2189 override = self._match_text_seq("OVERRIDE") 2190 2191 return self.expression( 2192 exp.LockingProperty, 2193 this=this, 2194 kind=kind, 2195 for_or_in=for_or_in, 2196 lock_type=lock_type, 2197 override=override, 2198 ) 2199 2200 def _parse_partition_by(self) -> t.List[exp.Expression]: 2201 if self._match(TokenType.PARTITION_BY): 2202 return self._parse_csv(self._parse_assignment) 2203 return [] 2204 2205 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2206 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2207 if self._match_text_seq("MINVALUE"): 2208 return exp.var("MINVALUE") 2209 if self._match_text_seq("MAXVALUE"): 2210 return exp.var("MAXVALUE") 2211 return self._parse_bitwise() 2212 2213 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2214 expression = None 2215 from_expressions = None 2216 to_expressions = None 2217 2218 if self._match(TokenType.IN): 2219 this = self._parse_wrapped_csv(self._parse_bitwise) 2220 elif self._match(TokenType.FROM): 2221 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2222 self._match_text_seq("TO") 2223 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2224 elif self._match_text_seq("WITH", "(", "MODULUS"): 2225 this = self._parse_number() 2226 self._match_text_seq(",", "REMAINDER") 2227 expression = self._parse_number() 2228 self._match_r_paren() 2229 else: 2230 self.raise_error("Failed to parse partition bound spec.") 2231 2232 return self.expression( 2233 exp.PartitionBoundSpec, 2234 this=this, 2235 expression=expression, 2236 from_expressions=from_expressions, 2237 to_expressions=to_expressions, 2238 ) 2239 2240 # https://www.postgresql.org/docs/current/sql-createtable.html 2241 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2242 if not self._match_text_seq("OF"): 2243 self._retreat(self._index - 1) 2244 return None 2245 2246 this = self._parse_table(schema=True) 2247 2248 if self._match(TokenType.DEFAULT): 2249 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2250 elif self._match_text_seq("FOR", "VALUES"): 2251 expression = self._parse_partition_bound_spec() 2252 else: 2253 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2254 2255 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2256 2257 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2258 self._match(TokenType.EQ) 2259 return self.expression( 2260 exp.PartitionedByProperty, 2261 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2262 ) 2263 2264 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2265 if self._match_text_seq("AND", "STATISTICS"): 2266 statistics = True 2267 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2268 statistics = False 2269 else: 2270 statistics = None 2271 2272 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2273 2274 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2275 if self._match_text_seq("SQL"): 2276 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2277 return None 2278 2279 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2280 if self._match_text_seq("SQL", "DATA"): 2281 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2282 return None 2283 2284 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2285 if self._match_text_seq("PRIMARY", "INDEX"): 2286 return exp.NoPrimaryIndexProperty() 2287 if self._match_text_seq("SQL"): 2288 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2289 return None 2290 2291 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2292 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2293 return exp.OnCommitProperty() 2294 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2295 return exp.OnCommitProperty(delete=True) 2296 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2297 2298 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2299 if self._match_text_seq("SQL", "DATA"): 2300 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2301 return None 2302 2303 def _parse_distkey(self) -> exp.DistKeyProperty: 2304 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2305 2306 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2307 table = self._parse_table(schema=True) 2308 2309 options = [] 2310 while self._match_texts(("INCLUDING", "EXCLUDING")): 2311 this = self._prev.text.upper() 2312 2313 id_var = self._parse_id_var() 2314 if not id_var: 2315 return None 2316 2317 options.append( 2318 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2319 ) 2320 2321 return self.expression(exp.LikeProperty, this=table, expressions=options) 2322 2323 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2324 return self.expression( 2325 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2326 ) 2327 2328 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2329 self._match(TokenType.EQ) 2330 return self.expression( 2331 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2332 ) 2333 2334 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2335 self._match_text_seq("WITH", "CONNECTION") 2336 return self.expression( 2337 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2338 ) 2339 2340 def _parse_returns(self) -> exp.ReturnsProperty: 2341 value: t.Optional[exp.Expression] 2342 null = None 2343 is_table = self._match(TokenType.TABLE) 2344 2345 if is_table: 2346 if self._match(TokenType.LT): 2347 value = self.expression( 2348 exp.Schema, 2349 this="TABLE", 2350 expressions=self._parse_csv(self._parse_struct_types), 2351 ) 2352 if not self._match(TokenType.GT): 2353 self.raise_error("Expecting >") 2354 else: 2355 value = self._parse_schema(exp.var("TABLE")) 2356 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2357 null = True 2358 value = None 2359 else: 2360 value = self._parse_types() 2361 2362 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2363 2364 def _parse_describe(self) -> exp.Describe: 2365 kind = self._match_set(self.CREATABLES) and self._prev.text 2366 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2367 if self._match(TokenType.DOT): 2368 style = None 2369 self._retreat(self._index - 2) 2370 this = self._parse_table(schema=True) 2371 properties = self._parse_properties() 2372 expressions = properties.expressions if properties else None 2373 return self.expression( 2374 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2375 ) 2376 2377 def _parse_insert(self) -> exp.Insert: 2378 comments = ensure_list(self._prev_comments) 2379 hint = self._parse_hint() 2380 overwrite = self._match(TokenType.OVERWRITE) 2381 ignore = self._match(TokenType.IGNORE) 2382 local = self._match_text_seq("LOCAL") 2383 alternative = None 2384 is_function = None 2385 2386 if self._match_text_seq("DIRECTORY"): 2387 this: t.Optional[exp.Expression] = self.expression( 2388 exp.Directory, 2389 this=self._parse_var_or_string(), 2390 local=local, 2391 row_format=self._parse_row_format(match_row=True), 2392 ) 2393 else: 2394 if self._match(TokenType.OR): 2395 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2396 2397 self._match(TokenType.INTO) 2398 comments += ensure_list(self._prev_comments) 2399 self._match(TokenType.TABLE) 2400 is_function = self._match(TokenType.FUNCTION) 2401 2402 this = ( 2403 self._parse_table(schema=True, parse_partition=True) 2404 if not is_function 2405 else self._parse_function() 2406 ) 2407 2408 returning = self._parse_returning() 2409 2410 return self.expression( 2411 exp.Insert, 2412 comments=comments, 2413 hint=hint, 2414 is_function=is_function, 2415 this=this, 2416 stored=self._match_text_seq("STORED") and self._parse_stored(), 2417 by_name=self._match_text_seq("BY", "NAME"), 2418 exists=self._parse_exists(), 2419 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2420 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2421 conflict=self._parse_on_conflict(), 2422 returning=returning or self._parse_returning(), 2423 overwrite=overwrite, 2424 alternative=alternative, 2425 ignore=ignore, 2426 ) 2427 2428 def _parse_kill(self) -> exp.Kill: 2429 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2430 2431 return self.expression( 2432 exp.Kill, 2433 this=self._parse_primary(), 2434 kind=kind, 2435 ) 2436 2437 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2438 conflict = self._match_text_seq("ON", "CONFLICT") 2439 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2440 2441 if not conflict and not duplicate: 2442 return None 2443 2444 conflict_keys = None 2445 constraint = None 2446 2447 if conflict: 2448 if self._match_text_seq("ON", "CONSTRAINT"): 2449 constraint = self._parse_id_var() 2450 elif self._match(TokenType.L_PAREN): 2451 conflict_keys = self._parse_csv(self._parse_id_var) 2452 self._match_r_paren() 2453 2454 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2455 if self._prev.token_type == TokenType.UPDATE: 2456 self._match(TokenType.SET) 2457 expressions = self._parse_csv(self._parse_equality) 2458 else: 2459 expressions = None 2460 2461 return self.expression( 2462 exp.OnConflict, 2463 duplicate=duplicate, 2464 expressions=expressions, 2465 action=action, 2466 conflict_keys=conflict_keys, 2467 constraint=constraint, 2468 ) 2469 2470 def _parse_returning(self) -> t.Optional[exp.Returning]: 2471 if not self._match(TokenType.RETURNING): 2472 return None 2473 return self.expression( 2474 exp.Returning, 2475 expressions=self._parse_csv(self._parse_expression), 2476 into=self._match(TokenType.INTO) and self._parse_table_part(), 2477 ) 2478 2479 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2480 if not self._match(TokenType.FORMAT): 2481 return None 2482 return self._parse_row_format() 2483 2484 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2485 index = self._index 2486 with_ = with_ or self._match_text_seq("WITH") 2487 2488 if not self._match(TokenType.SERDE_PROPERTIES): 2489 self._retreat(index) 2490 return None 2491 return self.expression( 2492 exp.SerdeProperties, 2493 **{ # type: ignore 2494 "expressions": self._parse_wrapped_properties(), 2495 "with": with_, 2496 }, 2497 ) 2498 2499 def _parse_row_format( 2500 self, match_row: bool = False 2501 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2502 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2503 return None 2504 2505 if self._match_text_seq("SERDE"): 2506 this = self._parse_string() 2507 2508 serde_properties = self._parse_serde_properties() 2509 2510 return self.expression( 2511 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2512 ) 2513 2514 self._match_text_seq("DELIMITED") 2515 2516 kwargs = {} 2517 2518 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2519 kwargs["fields"] = self._parse_string() 2520 if self._match_text_seq("ESCAPED", "BY"): 2521 kwargs["escaped"] = self._parse_string() 2522 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2523 kwargs["collection_items"] = self._parse_string() 2524 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2525 kwargs["map_keys"] = self._parse_string() 2526 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2527 kwargs["lines"] = self._parse_string() 2528 if self._match_text_seq("NULL", "DEFINED", "AS"): 2529 kwargs["null"] = self._parse_string() 2530 2531 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2532 2533 def _parse_load(self) -> exp.LoadData | exp.Command: 2534 if self._match_text_seq("DATA"): 2535 local = self._match_text_seq("LOCAL") 2536 self._match_text_seq("INPATH") 2537 inpath = self._parse_string() 2538 overwrite = self._match(TokenType.OVERWRITE) 2539 self._match_pair(TokenType.INTO, TokenType.TABLE) 2540 2541 return self.expression( 2542 exp.LoadData, 2543 this=self._parse_table(schema=True), 2544 local=local, 2545 overwrite=overwrite, 2546 inpath=inpath, 2547 partition=self._parse_partition(), 2548 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2549 serde=self._match_text_seq("SERDE") and self._parse_string(), 2550 ) 2551 return self._parse_as_command(self._prev) 2552 2553 def _parse_delete(self) -> exp.Delete: 2554 # This handles MySQL's "Multiple-Table Syntax" 2555 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2556 tables = None 2557 comments = self._prev_comments 2558 if not self._match(TokenType.FROM, advance=False): 2559 tables = self._parse_csv(self._parse_table) or None 2560 2561 returning = self._parse_returning() 2562 2563 return self.expression( 2564 exp.Delete, 2565 comments=comments, 2566 tables=tables, 2567 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2568 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2569 where=self._parse_where(), 2570 returning=returning or self._parse_returning(), 2571 limit=self._parse_limit(), 2572 ) 2573 2574 def _parse_update(self) -> exp.Update: 2575 comments = self._prev_comments 2576 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2577 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2578 returning = self._parse_returning() 2579 return self.expression( 2580 exp.Update, 2581 comments=comments, 2582 **{ # type: ignore 2583 "this": this, 2584 "expressions": expressions, 2585 "from": self._parse_from(joins=True), 2586 "where": self._parse_where(), 2587 "returning": returning or self._parse_returning(), 2588 "order": self._parse_order(), 2589 "limit": self._parse_limit(), 2590 }, 2591 ) 2592 2593 def _parse_uncache(self) -> exp.Uncache: 2594 if not self._match(TokenType.TABLE): 2595 self.raise_error("Expecting TABLE after UNCACHE") 2596 2597 return self.expression( 2598 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2599 ) 2600 2601 def _parse_cache(self) -> exp.Cache: 2602 lazy = self._match_text_seq("LAZY") 2603 self._match(TokenType.TABLE) 2604 table = self._parse_table(schema=True) 2605 2606 options = [] 2607 if self._match_text_seq("OPTIONS"): 2608 self._match_l_paren() 2609 k = self._parse_string() 2610 self._match(TokenType.EQ) 2611 v = self._parse_string() 2612 options = [k, v] 2613 self._match_r_paren() 2614 2615 self._match(TokenType.ALIAS) 2616 return self.expression( 2617 exp.Cache, 2618 this=table, 2619 lazy=lazy, 2620 options=options, 2621 expression=self._parse_select(nested=True), 2622 ) 2623 2624 def _parse_partition(self) -> t.Optional[exp.Partition]: 2625 if not self._match(TokenType.PARTITION): 2626 return None 2627 2628 return self.expression( 2629 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2630 ) 2631 2632 def _parse_value(self) -> t.Optional[exp.Tuple]: 2633 if self._match(TokenType.L_PAREN): 2634 expressions = self._parse_csv(self._parse_expression) 2635 self._match_r_paren() 2636 return self.expression(exp.Tuple, expressions=expressions) 2637 2638 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2639 expression = self._parse_expression() 2640 if expression: 2641 return self.expression(exp.Tuple, expressions=[expression]) 2642 return None 2643 2644 def _parse_projections(self) -> t.List[exp.Expression]: 2645 return self._parse_expressions() 2646 2647 def _parse_select( 2648 self, 2649 nested: bool = False, 2650 table: bool = False, 2651 parse_subquery_alias: bool = True, 2652 parse_set_operation: bool = True, 2653 ) -> t.Optional[exp.Expression]: 2654 cte = self._parse_with() 2655 2656 if cte: 2657 this = self._parse_statement() 2658 2659 if not this: 2660 self.raise_error("Failed to parse any statement following CTE") 2661 return cte 2662 2663 if "with" in this.arg_types: 2664 this.set("with", cte) 2665 else: 2666 self.raise_error(f"{this.key} does not support CTE") 2667 this = cte 2668 2669 return this 2670 2671 # duckdb supports leading with FROM x 2672 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2673 2674 if self._match(TokenType.SELECT): 2675 comments = self._prev_comments 2676 2677 hint = self._parse_hint() 2678 all_ = self._match(TokenType.ALL) 2679 distinct = self._match_set(self.DISTINCT_TOKENS) 2680 2681 kind = ( 2682 self._match(TokenType.ALIAS) 2683 and self._match_texts(("STRUCT", "VALUE")) 2684 and self._prev.text.upper() 2685 ) 2686 2687 if distinct: 2688 distinct = self.expression( 2689 exp.Distinct, 2690 on=self._parse_value() if self._match(TokenType.ON) else None, 2691 ) 2692 2693 if all_ and distinct: 2694 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2695 2696 limit = self._parse_limit(top=True) 2697 projections = self._parse_projections() 2698 2699 this = self.expression( 2700 exp.Select, 2701 kind=kind, 2702 hint=hint, 2703 distinct=distinct, 2704 expressions=projections, 2705 limit=limit, 2706 ) 2707 this.comments = comments 2708 2709 into = self._parse_into() 2710 if into: 2711 this.set("into", into) 2712 2713 if not from_: 2714 from_ = self._parse_from() 2715 2716 if from_: 2717 this.set("from", from_) 2718 2719 this = self._parse_query_modifiers(this) 2720 elif (table or nested) and self._match(TokenType.L_PAREN): 2721 if self._match(TokenType.PIVOT): 2722 this = self._parse_simplified_pivot() 2723 elif self._match(TokenType.FROM): 2724 this = exp.select("*").from_( 2725 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2726 ) 2727 else: 2728 this = ( 2729 self._parse_table() 2730 if table 2731 else self._parse_select(nested=True, parse_set_operation=False) 2732 ) 2733 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2734 2735 self._match_r_paren() 2736 2737 # We return early here so that the UNION isn't attached to the subquery by the 2738 # following call to _parse_set_operations, but instead becomes the parent node 2739 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2740 elif self._match(TokenType.VALUES, advance=False): 2741 this = self._parse_derived_table_values() 2742 elif from_: 2743 this = exp.select("*").from_(from_.this, copy=False) 2744 else: 2745 this = None 2746 2747 if parse_set_operation: 2748 return self._parse_set_operations(this) 2749 return this 2750 2751 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2752 if not skip_with_token and not self._match(TokenType.WITH): 2753 return None 2754 2755 comments = self._prev_comments 2756 recursive = self._match(TokenType.RECURSIVE) 2757 2758 expressions = [] 2759 while True: 2760 expressions.append(self._parse_cte()) 2761 2762 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2763 break 2764 else: 2765 self._match(TokenType.WITH) 2766 2767 return self.expression( 2768 exp.With, comments=comments, expressions=expressions, recursive=recursive 2769 ) 2770 2771 def _parse_cte(self) -> exp.CTE: 2772 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2773 if not alias or not alias.this: 2774 self.raise_error("Expected CTE to have alias") 2775 2776 self._match(TokenType.ALIAS) 2777 2778 if self._match_text_seq("NOT", "MATERIALIZED"): 2779 materialized = False 2780 elif self._match_text_seq("MATERIALIZED"): 2781 materialized = True 2782 else: 2783 materialized = None 2784 2785 return self.expression( 2786 exp.CTE, 2787 this=self._parse_wrapped(self._parse_statement), 2788 alias=alias, 2789 materialized=materialized, 2790 ) 2791 2792 def _parse_table_alias( 2793 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2794 ) -> t.Optional[exp.TableAlias]: 2795 any_token = self._match(TokenType.ALIAS) 2796 alias = ( 2797 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2798 or self._parse_string_as_identifier() 2799 ) 2800 2801 index = self._index 2802 if self._match(TokenType.L_PAREN): 2803 columns = self._parse_csv(self._parse_function_parameter) 2804 self._match_r_paren() if columns else self._retreat(index) 2805 else: 2806 columns = None 2807 2808 if not alias and not columns: 2809 return None 2810 2811 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2812 2813 # We bubble up comments from the Identifier to the TableAlias 2814 if isinstance(alias, exp.Identifier): 2815 table_alias.add_comments(alias.pop_comments()) 2816 2817 return table_alias 2818 2819 def _parse_subquery( 2820 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2821 ) -> t.Optional[exp.Subquery]: 2822 if not this: 2823 return None 2824 2825 return self.expression( 2826 exp.Subquery, 2827 this=this, 2828 pivots=self._parse_pivots(), 2829 alias=self._parse_table_alias() if parse_alias else None, 2830 ) 2831 2832 def _implicit_unnests_to_explicit(self, this: E) -> E: 2833 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2834 2835 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2836 for i, join in enumerate(this.args.get("joins") or []): 2837 table = join.this 2838 normalized_table = table.copy() 2839 normalized_table.meta["maybe_column"] = True 2840 normalized_table = _norm(normalized_table, dialect=self.dialect) 2841 2842 if isinstance(table, exp.Table) and not join.args.get("on"): 2843 if normalized_table.parts[0].name in refs: 2844 table_as_column = table.to_column() 2845 unnest = exp.Unnest(expressions=[table_as_column]) 2846 2847 # Table.to_column creates a parent Alias node that we want to convert to 2848 # a TableAlias and attach to the Unnest, so it matches the parser's output 2849 if isinstance(table.args.get("alias"), exp.TableAlias): 2850 table_as_column.replace(table_as_column.this) 2851 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2852 2853 table.replace(unnest) 2854 2855 refs.add(normalized_table.alias_or_name) 2856 2857 return this 2858 2859 def _parse_query_modifiers( 2860 self, this: t.Optional[exp.Expression] 2861 ) -> t.Optional[exp.Expression]: 2862 if isinstance(this, (exp.Query, exp.Table)): 2863 for join in self._parse_joins(): 2864 this.append("joins", join) 2865 for lateral in iter(self._parse_lateral, None): 2866 this.append("laterals", lateral) 2867 2868 while True: 2869 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2870 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2871 key, expression = parser(self) 2872 2873 if expression: 2874 this.set(key, expression) 2875 if key == "limit": 2876 offset = expression.args.pop("offset", None) 2877 2878 if offset: 2879 offset = exp.Offset(expression=offset) 2880 this.set("offset", offset) 2881 2882 limit_by_expressions = expression.expressions 2883 expression.set("expressions", None) 2884 offset.set("expressions", limit_by_expressions) 2885 continue 2886 break 2887 2888 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2889 this = self._implicit_unnests_to_explicit(this) 2890 2891 return this 2892 2893 def _parse_hint(self) -> t.Optional[exp.Hint]: 2894 if self._match(TokenType.HINT): 2895 hints = [] 2896 for hint in iter( 2897 lambda: self._parse_csv( 2898 lambda: self._parse_function() or self._parse_var(upper=True) 2899 ), 2900 [], 2901 ): 2902 hints.extend(hint) 2903 2904 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2905 self.raise_error("Expected */ after HINT") 2906 2907 return self.expression(exp.Hint, expressions=hints) 2908 2909 return None 2910 2911 def _parse_into(self) -> t.Optional[exp.Into]: 2912 if not self._match(TokenType.INTO): 2913 return None 2914 2915 temp = self._match(TokenType.TEMPORARY) 2916 unlogged = self._match_text_seq("UNLOGGED") 2917 self._match(TokenType.TABLE) 2918 2919 return self.expression( 2920 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2921 ) 2922 2923 def _parse_from( 2924 self, joins: bool = False, skip_from_token: bool = False 2925 ) -> t.Optional[exp.From]: 2926 if not skip_from_token and not self._match(TokenType.FROM): 2927 return None 2928 2929 return self.expression( 2930 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2931 ) 2932 2933 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2934 return self.expression( 2935 exp.MatchRecognizeMeasure, 2936 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2937 this=self._parse_expression(), 2938 ) 2939 2940 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2941 if not self._match(TokenType.MATCH_RECOGNIZE): 2942 return None 2943 2944 self._match_l_paren() 2945 2946 partition = self._parse_partition_by() 2947 order = self._parse_order() 2948 2949 measures = ( 2950 self._parse_csv(self._parse_match_recognize_measure) 2951 if self._match_text_seq("MEASURES") 2952 else None 2953 ) 2954 2955 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2956 rows = exp.var("ONE ROW PER MATCH") 2957 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2958 text = "ALL ROWS PER MATCH" 2959 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2960 text += " SHOW EMPTY MATCHES" 2961 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2962 text += " OMIT EMPTY MATCHES" 2963 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2964 text += " WITH UNMATCHED ROWS" 2965 rows = exp.var(text) 2966 else: 2967 rows = None 2968 2969 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2970 text = "AFTER MATCH SKIP" 2971 if self._match_text_seq("PAST", "LAST", "ROW"): 2972 text += " PAST LAST ROW" 2973 elif self._match_text_seq("TO", "NEXT", "ROW"): 2974 text += " TO NEXT ROW" 2975 elif self._match_text_seq("TO", "FIRST"): 2976 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2977 elif self._match_text_seq("TO", "LAST"): 2978 text += f" TO LAST {self._advance_any().text}" # type: ignore 2979 after = exp.var(text) 2980 else: 2981 after = None 2982 2983 if self._match_text_seq("PATTERN"): 2984 self._match_l_paren() 2985 2986 if not self._curr: 2987 self.raise_error("Expecting )", self._curr) 2988 2989 paren = 1 2990 start = self._curr 2991 2992 while self._curr and paren > 0: 2993 if self._curr.token_type == TokenType.L_PAREN: 2994 paren += 1 2995 if self._curr.token_type == TokenType.R_PAREN: 2996 paren -= 1 2997 2998 end = self._prev 2999 self._advance() 3000 3001 if paren > 0: 3002 self.raise_error("Expecting )", self._curr) 3003 3004 pattern = exp.var(self._find_sql(start, end)) 3005 else: 3006 pattern = None 3007 3008 define = ( 3009 self._parse_csv(self._parse_name_as_expression) 3010 if self._match_text_seq("DEFINE") 3011 else None 3012 ) 3013 3014 self._match_r_paren() 3015 3016 return self.expression( 3017 exp.MatchRecognize, 3018 partition_by=partition, 3019 order=order, 3020 measures=measures, 3021 rows=rows, 3022 after=after, 3023 pattern=pattern, 3024 define=define, 3025 alias=self._parse_table_alias(), 3026 ) 3027 3028 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3029 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3030 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3031 cross_apply = False 3032 3033 if cross_apply is not None: 3034 this = self._parse_select(table=True) 3035 view = None 3036 outer = None 3037 elif self._match(TokenType.LATERAL): 3038 this = self._parse_select(table=True) 3039 view = self._match(TokenType.VIEW) 3040 outer = self._match(TokenType.OUTER) 3041 else: 3042 return None 3043 3044 if not this: 3045 this = ( 3046 self._parse_unnest() 3047 or self._parse_function() 3048 or self._parse_id_var(any_token=False) 3049 ) 3050 3051 while self._match(TokenType.DOT): 3052 this = exp.Dot( 3053 this=this, 3054 expression=self._parse_function() or self._parse_id_var(any_token=False), 3055 ) 3056 3057 if view: 3058 table = self._parse_id_var(any_token=False) 3059 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3060 table_alias: t.Optional[exp.TableAlias] = self.expression( 3061 exp.TableAlias, this=table, columns=columns 3062 ) 3063 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3064 # We move the alias from the lateral's child node to the lateral itself 3065 table_alias = this.args["alias"].pop() 3066 else: 3067 table_alias = self._parse_table_alias() 3068 3069 return self.expression( 3070 exp.Lateral, 3071 this=this, 3072 view=view, 3073 outer=outer, 3074 alias=table_alias, 3075 cross_apply=cross_apply, 3076 ) 3077 3078 def _parse_join_parts( 3079 self, 3080 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3081 return ( 3082 self._match_set(self.JOIN_METHODS) and self._prev, 3083 self._match_set(self.JOIN_SIDES) and self._prev, 3084 self._match_set(self.JOIN_KINDS) and self._prev, 3085 ) 3086 3087 def _parse_join( 3088 self, skip_join_token: bool = False, parse_bracket: bool = False 3089 ) -> t.Optional[exp.Join]: 3090 if self._match(TokenType.COMMA): 3091 return self.expression(exp.Join, this=self._parse_table()) 3092 3093 index = self._index 3094 method, side, kind = self._parse_join_parts() 3095 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3096 join = self._match(TokenType.JOIN) 3097 3098 if not skip_join_token and not join: 3099 self._retreat(index) 3100 kind = None 3101 method = None 3102 side = None 3103 3104 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3105 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3106 3107 if not skip_join_token and not join and not outer_apply and not cross_apply: 3108 return None 3109 3110 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3111 3112 if method: 3113 kwargs["method"] = method.text 3114 if side: 3115 kwargs["side"] = side.text 3116 if kind: 3117 kwargs["kind"] = kind.text 3118 if hint: 3119 kwargs["hint"] = hint 3120 3121 if self._match(TokenType.MATCH_CONDITION): 3122 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3123 3124 if self._match(TokenType.ON): 3125 kwargs["on"] = self._parse_assignment() 3126 elif self._match(TokenType.USING): 3127 kwargs["using"] = self._parse_wrapped_id_vars() 3128 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3129 kind and kind.token_type == TokenType.CROSS 3130 ): 3131 index = self._index 3132 joins: t.Optional[list] = list(self._parse_joins()) 3133 3134 if joins and self._match(TokenType.ON): 3135 kwargs["on"] = self._parse_assignment() 3136 elif joins and self._match(TokenType.USING): 3137 kwargs["using"] = self._parse_wrapped_id_vars() 3138 else: 3139 joins = None 3140 self._retreat(index) 3141 3142 kwargs["this"].set("joins", joins if joins else None) 3143 3144 comments = [c for token in (method, side, kind) if token for c in token.comments] 3145 return self.expression(exp.Join, comments=comments, **kwargs) 3146 3147 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3148 this = self._parse_assignment() 3149 3150 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3151 return this 3152 3153 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3154 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3155 3156 return this 3157 3158 def _parse_index_params(self) -> exp.IndexParameters: 3159 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3160 3161 if self._match(TokenType.L_PAREN, advance=False): 3162 columns = self._parse_wrapped_csv(self._parse_with_operator) 3163 else: 3164 columns = None 3165 3166 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3167 partition_by = self._parse_partition_by() 3168 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3169 tablespace = ( 3170 self._parse_var(any_token=True) 3171 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3172 else None 3173 ) 3174 where = self._parse_where() 3175 3176 return self.expression( 3177 exp.IndexParameters, 3178 using=using, 3179 columns=columns, 3180 include=include, 3181 partition_by=partition_by, 3182 where=where, 3183 with_storage=with_storage, 3184 tablespace=tablespace, 3185 ) 3186 3187 def _parse_index( 3188 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3189 ) -> t.Optional[exp.Index]: 3190 if index or anonymous: 3191 unique = None 3192 primary = None 3193 amp = None 3194 3195 self._match(TokenType.ON) 3196 self._match(TokenType.TABLE) # hive 3197 table = self._parse_table_parts(schema=True) 3198 else: 3199 unique = self._match(TokenType.UNIQUE) 3200 primary = self._match_text_seq("PRIMARY") 3201 amp = self._match_text_seq("AMP") 3202 3203 if not self._match(TokenType.INDEX): 3204 return None 3205 3206 index = self._parse_id_var() 3207 table = None 3208 3209 params = self._parse_index_params() 3210 3211 return self.expression( 3212 exp.Index, 3213 this=index, 3214 table=table, 3215 unique=unique, 3216 primary=primary, 3217 amp=amp, 3218 params=params, 3219 ) 3220 3221 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3222 hints: t.List[exp.Expression] = [] 3223 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3224 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3225 hints.append( 3226 self.expression( 3227 exp.WithTableHint, 3228 expressions=self._parse_csv( 3229 lambda: self._parse_function() or self._parse_var(any_token=True) 3230 ), 3231 ) 3232 ) 3233 self._match_r_paren() 3234 else: 3235 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3236 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3237 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3238 3239 self._match_texts(("INDEX", "KEY")) 3240 if self._match(TokenType.FOR): 3241 hint.set("target", self._advance_any() and self._prev.text.upper()) 3242 3243 hint.set("expressions", self._parse_wrapped_id_vars()) 3244 hints.append(hint) 3245 3246 return hints or None 3247 3248 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3249 return ( 3250 (not schema and self._parse_function(optional_parens=False)) 3251 or self._parse_id_var(any_token=False) 3252 or self._parse_string_as_identifier() 3253 or self._parse_placeholder() 3254 ) 3255 3256 def _parse_table_parts( 3257 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3258 ) -> exp.Table: 3259 catalog = None 3260 db = None 3261 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3262 3263 while self._match(TokenType.DOT): 3264 if catalog: 3265 # This allows nesting the table in arbitrarily many dot expressions if needed 3266 table = self.expression( 3267 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3268 ) 3269 else: 3270 catalog = db 3271 db = table 3272 # "" used for tsql FROM a..b case 3273 table = self._parse_table_part(schema=schema) or "" 3274 3275 if ( 3276 wildcard 3277 and self._is_connected() 3278 and (isinstance(table, exp.Identifier) or not table) 3279 and self._match(TokenType.STAR) 3280 ): 3281 if isinstance(table, exp.Identifier): 3282 table.args["this"] += "*" 3283 else: 3284 table = exp.Identifier(this="*") 3285 3286 # We bubble up comments from the Identifier to the Table 3287 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3288 3289 if is_db_reference: 3290 catalog = db 3291 db = table 3292 table = None 3293 3294 if not table and not is_db_reference: 3295 self.raise_error(f"Expected table name but got {self._curr}") 3296 if not db and is_db_reference: 3297 self.raise_error(f"Expected database name but got {self._curr}") 3298 3299 return self.expression( 3300 exp.Table, 3301 comments=comments, 3302 this=table, 3303 db=db, 3304 catalog=catalog, 3305 pivots=self._parse_pivots(), 3306 ) 3307 3308 def _parse_table( 3309 self, 3310 schema: bool = False, 3311 joins: bool = False, 3312 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3313 parse_bracket: bool = False, 3314 is_db_reference: bool = False, 3315 parse_partition: bool = False, 3316 ) -> t.Optional[exp.Expression]: 3317 lateral = self._parse_lateral() 3318 if lateral: 3319 return lateral 3320 3321 unnest = self._parse_unnest() 3322 if unnest: 3323 return unnest 3324 3325 values = self._parse_derived_table_values() 3326 if values: 3327 return values 3328 3329 subquery = self._parse_select(table=True) 3330 if subquery: 3331 if not subquery.args.get("pivots"): 3332 subquery.set("pivots", self._parse_pivots()) 3333 return subquery 3334 3335 bracket = parse_bracket and self._parse_bracket(None) 3336 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3337 3338 only = self._match(TokenType.ONLY) 3339 3340 this = t.cast( 3341 exp.Expression, 3342 bracket 3343 or self._parse_bracket( 3344 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3345 ), 3346 ) 3347 3348 if only: 3349 this.set("only", only) 3350 3351 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3352 self._match_text_seq("*") 3353 3354 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3355 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3356 this.set("partition", self._parse_partition()) 3357 3358 if schema: 3359 return self._parse_schema(this=this) 3360 3361 version = self._parse_version() 3362 3363 if version: 3364 this.set("version", version) 3365 3366 if self.dialect.ALIAS_POST_TABLESAMPLE: 3367 table_sample = self._parse_table_sample() 3368 3369 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3370 if alias: 3371 this.set("alias", alias) 3372 3373 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3374 return self.expression( 3375 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3376 ) 3377 3378 this.set("hints", self._parse_table_hints()) 3379 3380 if not this.args.get("pivots"): 3381 this.set("pivots", self._parse_pivots()) 3382 3383 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3384 table_sample = self._parse_table_sample() 3385 3386 if table_sample: 3387 table_sample.set("this", this) 3388 this = table_sample 3389 3390 if joins: 3391 for join in self._parse_joins(): 3392 this.append("joins", join) 3393 3394 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3395 this.set("ordinality", True) 3396 this.set("alias", self._parse_table_alias()) 3397 3398 return this 3399 3400 def _parse_version(self) -> t.Optional[exp.Version]: 3401 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3402 this = "TIMESTAMP" 3403 elif self._match(TokenType.VERSION_SNAPSHOT): 3404 this = "VERSION" 3405 else: 3406 return None 3407 3408 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3409 kind = self._prev.text.upper() 3410 start = self._parse_bitwise() 3411 self._match_texts(("TO", "AND")) 3412 end = self._parse_bitwise() 3413 expression: t.Optional[exp.Expression] = self.expression( 3414 exp.Tuple, expressions=[start, end] 3415 ) 3416 elif self._match_text_seq("CONTAINED", "IN"): 3417 kind = "CONTAINED IN" 3418 expression = self.expression( 3419 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3420 ) 3421 elif self._match(TokenType.ALL): 3422 kind = "ALL" 3423 expression = None 3424 else: 3425 self._match_text_seq("AS", "OF") 3426 kind = "AS OF" 3427 expression = self._parse_type() 3428 3429 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3430 3431 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3432 if not self._match(TokenType.UNNEST): 3433 return None 3434 3435 expressions = self._parse_wrapped_csv(self._parse_equality) 3436 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3437 3438 alias = self._parse_table_alias() if with_alias else None 3439 3440 if alias: 3441 if self.dialect.UNNEST_COLUMN_ONLY: 3442 if alias.args.get("columns"): 3443 self.raise_error("Unexpected extra column alias in unnest.") 3444 3445 alias.set("columns", [alias.this]) 3446 alias.set("this", None) 3447 3448 columns = alias.args.get("columns") or [] 3449 if offset and len(expressions) < len(columns): 3450 offset = columns.pop() 3451 3452 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3453 self._match(TokenType.ALIAS) 3454 offset = self._parse_id_var( 3455 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3456 ) or exp.to_identifier("offset") 3457 3458 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3459 3460 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3461 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3462 if not is_derived and not self._match_text_seq("VALUES"): 3463 return None 3464 3465 expressions = self._parse_csv(self._parse_value) 3466 alias = self._parse_table_alias() 3467 3468 if is_derived: 3469 self._match_r_paren() 3470 3471 return self.expression( 3472 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3473 ) 3474 3475 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3476 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3477 as_modifier and self._match_text_seq("USING", "SAMPLE") 3478 ): 3479 return None 3480 3481 bucket_numerator = None 3482 bucket_denominator = None 3483 bucket_field = None 3484 percent = None 3485 size = None 3486 seed = None 3487 3488 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3489 matched_l_paren = self._match(TokenType.L_PAREN) 3490 3491 if self.TABLESAMPLE_CSV: 3492 num = None 3493 expressions = self._parse_csv(self._parse_primary) 3494 else: 3495 expressions = None 3496 num = ( 3497 self._parse_factor() 3498 if self._match(TokenType.NUMBER, advance=False) 3499 else self._parse_primary() or self._parse_placeholder() 3500 ) 3501 3502 if self._match_text_seq("BUCKET"): 3503 bucket_numerator = self._parse_number() 3504 self._match_text_seq("OUT", "OF") 3505 bucket_denominator = bucket_denominator = self._parse_number() 3506 self._match(TokenType.ON) 3507 bucket_field = self._parse_field() 3508 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3509 percent = num 3510 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3511 size = num 3512 else: 3513 percent = num 3514 3515 if matched_l_paren: 3516 self._match_r_paren() 3517 3518 if self._match(TokenType.L_PAREN): 3519 method = self._parse_var(upper=True) 3520 seed = self._match(TokenType.COMMA) and self._parse_number() 3521 self._match_r_paren() 3522 elif self._match_texts(("SEED", "REPEATABLE")): 3523 seed = self._parse_wrapped(self._parse_number) 3524 3525 if not method and self.DEFAULT_SAMPLING_METHOD: 3526 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3527 3528 return self.expression( 3529 exp.TableSample, 3530 expressions=expressions, 3531 method=method, 3532 bucket_numerator=bucket_numerator, 3533 bucket_denominator=bucket_denominator, 3534 bucket_field=bucket_field, 3535 percent=percent, 3536 size=size, 3537 seed=seed, 3538 ) 3539 3540 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3541 return list(iter(self._parse_pivot, None)) or None 3542 3543 def _parse_joins(self) -> t.Iterator[exp.Join]: 3544 return iter(self._parse_join, None) 3545 3546 # https://duckdb.org/docs/sql/statements/pivot 3547 def _parse_simplified_pivot(self) -> exp.Pivot: 3548 def _parse_on() -> t.Optional[exp.Expression]: 3549 this = self._parse_bitwise() 3550 return self._parse_in(this) if self._match(TokenType.IN) else this 3551 3552 this = self._parse_table() 3553 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3554 using = self._match(TokenType.USING) and self._parse_csv( 3555 lambda: self._parse_alias(self._parse_function()) 3556 ) 3557 group = self._parse_group() 3558 return self.expression( 3559 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3560 ) 3561 3562 def _parse_pivot_in(self) -> exp.In: 3563 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3564 this = self._parse_assignment() 3565 3566 self._match(TokenType.ALIAS) 3567 alias = self._parse_field() 3568 if alias: 3569 return self.expression(exp.PivotAlias, this=this, alias=alias) 3570 3571 return this 3572 3573 value = self._parse_column() 3574 3575 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3576 self.raise_error("Expecting IN (") 3577 3578 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3579 3580 self._match_r_paren() 3581 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3582 3583 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3584 index = self._index 3585 include_nulls = None 3586 3587 if self._match(TokenType.PIVOT): 3588 unpivot = False 3589 elif self._match(TokenType.UNPIVOT): 3590 unpivot = True 3591 3592 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3593 if self._match_text_seq("INCLUDE", "NULLS"): 3594 include_nulls = True 3595 elif self._match_text_seq("EXCLUDE", "NULLS"): 3596 include_nulls = False 3597 else: 3598 return None 3599 3600 expressions = [] 3601 3602 if not self._match(TokenType.L_PAREN): 3603 self._retreat(index) 3604 return None 3605 3606 if unpivot: 3607 expressions = self._parse_csv(self._parse_column) 3608 else: 3609 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3610 3611 if not expressions: 3612 self.raise_error("Failed to parse PIVOT's aggregation list") 3613 3614 if not self._match(TokenType.FOR): 3615 self.raise_error("Expecting FOR") 3616 3617 field = self._parse_pivot_in() 3618 3619 self._match_r_paren() 3620 3621 pivot = self.expression( 3622 exp.Pivot, 3623 expressions=expressions, 3624 field=field, 3625 unpivot=unpivot, 3626 include_nulls=include_nulls, 3627 ) 3628 3629 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3630 pivot.set("alias", self._parse_table_alias()) 3631 3632 if not unpivot: 3633 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3634 3635 columns: t.List[exp.Expression] = [] 3636 for fld in pivot.args["field"].expressions: 3637 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3638 for name in names: 3639 if self.PREFIXED_PIVOT_COLUMNS: 3640 name = f"{name}_{field_name}" if name else field_name 3641 else: 3642 name = f"{field_name}_{name}" if name else field_name 3643 3644 columns.append(exp.to_identifier(name)) 3645 3646 pivot.set("columns", columns) 3647 3648 return pivot 3649 3650 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3651 return [agg.alias for agg in aggregations] 3652 3653 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3654 if not skip_where_token and not self._match(TokenType.PREWHERE): 3655 return None 3656 3657 return self.expression( 3658 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3659 ) 3660 3661 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3662 if not skip_where_token and not self._match(TokenType.WHERE): 3663 return None 3664 3665 return self.expression( 3666 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3667 ) 3668 3669 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3670 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3671 return None 3672 3673 elements: t.Dict[str, t.Any] = defaultdict(list) 3674 3675 if self._match(TokenType.ALL): 3676 elements["all"] = True 3677 elif self._match(TokenType.DISTINCT): 3678 elements["all"] = False 3679 3680 while True: 3681 expressions = self._parse_csv( 3682 lambda: None 3683 if self._match(TokenType.ROLLUP, advance=False) 3684 else self._parse_assignment() 3685 ) 3686 if expressions: 3687 elements["expressions"].extend(expressions) 3688 3689 grouping_sets = self._parse_grouping_sets() 3690 if grouping_sets: 3691 elements["grouping_sets"].extend(grouping_sets) 3692 3693 rollup = None 3694 cube = None 3695 totals = None 3696 3697 index = self._index 3698 with_ = self._match(TokenType.WITH) 3699 if self._match(TokenType.ROLLUP): 3700 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3701 elements["rollup"].extend(ensure_list(rollup)) 3702 3703 if self._match(TokenType.CUBE): 3704 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3705 elements["cube"].extend(ensure_list(cube)) 3706 3707 if self._match_text_seq("TOTALS"): 3708 totals = True 3709 elements["totals"] = True # type: ignore 3710 3711 if not (grouping_sets or rollup or cube or totals): 3712 if with_: 3713 self._retreat(index) 3714 break 3715 3716 return self.expression(exp.Group, **elements) # type: ignore 3717 3718 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3719 if not self._match(TokenType.GROUPING_SETS): 3720 return None 3721 3722 return self._parse_wrapped_csv(self._parse_grouping_set) 3723 3724 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3725 if self._match(TokenType.L_PAREN): 3726 grouping_set = self._parse_csv(self._parse_column) 3727 self._match_r_paren() 3728 return self.expression(exp.Tuple, expressions=grouping_set) 3729 3730 return self._parse_column() 3731 3732 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3733 if not skip_having_token and not self._match(TokenType.HAVING): 3734 return None 3735 return self.expression(exp.Having, this=self._parse_assignment()) 3736 3737 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3738 if not self._match(TokenType.QUALIFY): 3739 return None 3740 return self.expression(exp.Qualify, this=self._parse_assignment()) 3741 3742 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3743 if skip_start_token: 3744 start = None 3745 elif self._match(TokenType.START_WITH): 3746 start = self._parse_assignment() 3747 else: 3748 return None 3749 3750 self._match(TokenType.CONNECT_BY) 3751 nocycle = self._match_text_seq("NOCYCLE") 3752 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3753 exp.Prior, this=self._parse_bitwise() 3754 ) 3755 connect = self._parse_assignment() 3756 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3757 3758 if not start and self._match(TokenType.START_WITH): 3759 start = self._parse_assignment() 3760 3761 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3762 3763 def _parse_name_as_expression(self) -> exp.Alias: 3764 return self.expression( 3765 exp.Alias, 3766 alias=self._parse_id_var(any_token=True), 3767 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3768 ) 3769 3770 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3771 if self._match_text_seq("INTERPOLATE"): 3772 return self._parse_wrapped_csv(self._parse_name_as_expression) 3773 return None 3774 3775 def _parse_order( 3776 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3777 ) -> t.Optional[exp.Expression]: 3778 siblings = None 3779 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3780 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3781 return this 3782 3783 siblings = True 3784 3785 return self.expression( 3786 exp.Order, 3787 this=this, 3788 expressions=self._parse_csv(self._parse_ordered), 3789 interpolate=self._parse_interpolate(), 3790 siblings=siblings, 3791 ) 3792 3793 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3794 if not self._match(token): 3795 return None 3796 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3797 3798 def _parse_ordered( 3799 self, parse_method: t.Optional[t.Callable] = None 3800 ) -> t.Optional[exp.Ordered]: 3801 this = parse_method() if parse_method else self._parse_assignment() 3802 if not this: 3803 return None 3804 3805 asc = self._match(TokenType.ASC) 3806 desc = self._match(TokenType.DESC) or (asc and False) 3807 3808 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3809 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3810 3811 nulls_first = is_nulls_first or False 3812 explicitly_null_ordered = is_nulls_first or is_nulls_last 3813 3814 if ( 3815 not explicitly_null_ordered 3816 and ( 3817 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3818 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3819 ) 3820 and self.dialect.NULL_ORDERING != "nulls_are_last" 3821 ): 3822 nulls_first = True 3823 3824 if self._match_text_seq("WITH", "FILL"): 3825 with_fill = self.expression( 3826 exp.WithFill, 3827 **{ # type: ignore 3828 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3829 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3830 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3831 }, 3832 ) 3833 else: 3834 with_fill = None 3835 3836 return self.expression( 3837 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3838 ) 3839 3840 def _parse_limit( 3841 self, 3842 this: t.Optional[exp.Expression] = None, 3843 top: bool = False, 3844 skip_limit_token: bool = False, 3845 ) -> t.Optional[exp.Expression]: 3846 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3847 comments = self._prev_comments 3848 if top: 3849 limit_paren = self._match(TokenType.L_PAREN) 3850 expression = self._parse_term() if limit_paren else self._parse_number() 3851 3852 if limit_paren: 3853 self._match_r_paren() 3854 else: 3855 expression = self._parse_term() 3856 3857 if self._match(TokenType.COMMA): 3858 offset = expression 3859 expression = self._parse_term() 3860 else: 3861 offset = None 3862 3863 limit_exp = self.expression( 3864 exp.Limit, 3865 this=this, 3866 expression=expression, 3867 offset=offset, 3868 comments=comments, 3869 expressions=self._parse_limit_by(), 3870 ) 3871 3872 return limit_exp 3873 3874 if self._match(TokenType.FETCH): 3875 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3876 direction = self._prev.text.upper() if direction else "FIRST" 3877 3878 count = self._parse_field(tokens=self.FETCH_TOKENS) 3879 percent = self._match(TokenType.PERCENT) 3880 3881 self._match_set((TokenType.ROW, TokenType.ROWS)) 3882 3883 only = self._match_text_seq("ONLY") 3884 with_ties = self._match_text_seq("WITH", "TIES") 3885 3886 if only and with_ties: 3887 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3888 3889 return self.expression( 3890 exp.Fetch, 3891 direction=direction, 3892 count=count, 3893 percent=percent, 3894 with_ties=with_ties, 3895 ) 3896 3897 return this 3898 3899 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3900 if not self._match(TokenType.OFFSET): 3901 return this 3902 3903 count = self._parse_term() 3904 self._match_set((TokenType.ROW, TokenType.ROWS)) 3905 3906 return self.expression( 3907 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3908 ) 3909 3910 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3911 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3912 3913 def _parse_locks(self) -> t.List[exp.Lock]: 3914 locks = [] 3915 while True: 3916 if self._match_text_seq("FOR", "UPDATE"): 3917 update = True 3918 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3919 "LOCK", "IN", "SHARE", "MODE" 3920 ): 3921 update = False 3922 else: 3923 break 3924 3925 expressions = None 3926 if self._match_text_seq("OF"): 3927 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3928 3929 wait: t.Optional[bool | exp.Expression] = None 3930 if self._match_text_seq("NOWAIT"): 3931 wait = True 3932 elif self._match_text_seq("WAIT"): 3933 wait = self._parse_primary() 3934 elif self._match_text_seq("SKIP", "LOCKED"): 3935 wait = False 3936 3937 locks.append( 3938 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3939 ) 3940 3941 return locks 3942 3943 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3944 while this and self._match_set(self.SET_OPERATIONS): 3945 token_type = self._prev.token_type 3946 3947 if token_type == TokenType.UNION: 3948 operation = exp.Union 3949 elif token_type == TokenType.EXCEPT: 3950 operation = exp.Except 3951 else: 3952 operation = exp.Intersect 3953 3954 comments = self._prev.comments 3955 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3956 by_name = self._match_text_seq("BY", "NAME") 3957 expression = self._parse_select(nested=True, parse_set_operation=False) 3958 3959 this = self.expression( 3960 operation, 3961 comments=comments, 3962 this=this, 3963 distinct=distinct, 3964 by_name=by_name, 3965 expression=expression, 3966 ) 3967 3968 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3969 expression = this.expression 3970 3971 if expression: 3972 for arg in self.UNION_MODIFIERS: 3973 expr = expression.args.get(arg) 3974 if expr: 3975 this.set(arg, expr.pop()) 3976 3977 return this 3978 3979 def _parse_expression(self) -> t.Optional[exp.Expression]: 3980 return self._parse_alias(self._parse_assignment()) 3981 3982 def _parse_assignment(self) -> t.Optional[exp.Expression]: 3983 this = self._parse_disjunction() 3984 3985 while self._match_set(self.ASSIGNMENT): 3986 this = self.expression( 3987 self.ASSIGNMENT[self._prev.token_type], 3988 this=this, 3989 comments=self._prev_comments, 3990 expression=self._parse_assignment(), 3991 ) 3992 3993 return this 3994 3995 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 3996 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 3997 3998 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3999 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4000 4001 def _parse_equality(self) -> t.Optional[exp.Expression]: 4002 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4003 4004 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4005 return self._parse_tokens(self._parse_range, self.COMPARISON) 4006 4007 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4008 this = this or self._parse_bitwise() 4009 negate = self._match(TokenType.NOT) 4010 4011 if self._match_set(self.RANGE_PARSERS): 4012 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4013 if not expression: 4014 return this 4015 4016 this = expression 4017 elif self._match(TokenType.ISNULL): 4018 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4019 4020 # Postgres supports ISNULL and NOTNULL for conditions. 4021 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4022 if self._match(TokenType.NOTNULL): 4023 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4024 this = self.expression(exp.Not, this=this) 4025 4026 if negate: 4027 this = self.expression(exp.Not, this=this) 4028 4029 if self._match(TokenType.IS): 4030 this = self._parse_is(this) 4031 4032 return this 4033 4034 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4035 index = self._index - 1 4036 negate = self._match(TokenType.NOT) 4037 4038 if self._match_text_seq("DISTINCT", "FROM"): 4039 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4040 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4041 4042 expression = self._parse_null() or self._parse_boolean() 4043 if not expression: 4044 self._retreat(index) 4045 return None 4046 4047 this = self.expression(exp.Is, this=this, expression=expression) 4048 return self.expression(exp.Not, this=this) if negate else this 4049 4050 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4051 unnest = self._parse_unnest(with_alias=False) 4052 if unnest: 4053 this = self.expression(exp.In, this=this, unnest=unnest) 4054 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4055 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4056 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4057 4058 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4059 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4060 else: 4061 this = self.expression(exp.In, this=this, expressions=expressions) 4062 4063 if matched_l_paren: 4064 self._match_r_paren(this) 4065 elif not self._match(TokenType.R_BRACKET, expression=this): 4066 self.raise_error("Expecting ]") 4067 else: 4068 this = self.expression(exp.In, this=this, field=self._parse_field()) 4069 4070 return this 4071 4072 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4073 low = self._parse_bitwise() 4074 self._match(TokenType.AND) 4075 high = self._parse_bitwise() 4076 return self.expression(exp.Between, this=this, low=low, high=high) 4077 4078 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4079 if not self._match(TokenType.ESCAPE): 4080 return this 4081 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4082 4083 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4084 index = self._index 4085 4086 if not self._match(TokenType.INTERVAL) and match_interval: 4087 return None 4088 4089 if self._match(TokenType.STRING, advance=False): 4090 this = self._parse_primary() 4091 else: 4092 this = self._parse_term() 4093 4094 if not this or ( 4095 isinstance(this, exp.Column) 4096 and not this.table 4097 and not this.this.quoted 4098 and this.name.upper() == "IS" 4099 ): 4100 self._retreat(index) 4101 return None 4102 4103 unit = self._parse_function() or ( 4104 not self._match(TokenType.ALIAS, advance=False) 4105 and self._parse_var(any_token=True, upper=True) 4106 ) 4107 4108 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4109 # each INTERVAL expression into this canonical form so it's easy to transpile 4110 if this and this.is_number: 4111 this = exp.Literal.string(this.name) 4112 elif this and this.is_string: 4113 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4114 if len(parts) == 1: 4115 if unit: 4116 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4117 self._retreat(self._index - 1) 4118 4119 this = exp.Literal.string(parts[0][0]) 4120 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4121 4122 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4123 unit = self.expression( 4124 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4125 ) 4126 4127 interval = self.expression(exp.Interval, this=this, unit=unit) 4128 4129 index = self._index 4130 self._match(TokenType.PLUS) 4131 4132 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4133 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4134 return self.expression( 4135 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4136 ) 4137 4138 self._retreat(index) 4139 return interval 4140 4141 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4142 this = self._parse_term() 4143 4144 while True: 4145 if self._match_set(self.BITWISE): 4146 this = self.expression( 4147 self.BITWISE[self._prev.token_type], 4148 this=this, 4149 expression=self._parse_term(), 4150 ) 4151 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4152 this = self.expression( 4153 exp.DPipe, 4154 this=this, 4155 expression=self._parse_term(), 4156 safe=not self.dialect.STRICT_STRING_CONCAT, 4157 ) 4158 elif self._match(TokenType.DQMARK): 4159 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4160 elif self._match_pair(TokenType.LT, TokenType.LT): 4161 this = self.expression( 4162 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4163 ) 4164 elif self._match_pair(TokenType.GT, TokenType.GT): 4165 this = self.expression( 4166 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4167 ) 4168 else: 4169 break 4170 4171 return this 4172 4173 def _parse_term(self) -> t.Optional[exp.Expression]: 4174 return self._parse_tokens(self._parse_factor, self.TERM) 4175 4176 def _parse_factor(self) -> t.Optional[exp.Expression]: 4177 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4178 this = parse_method() 4179 4180 while self._match_set(self.FACTOR): 4181 klass = self.FACTOR[self._prev.token_type] 4182 comments = self._prev_comments 4183 expression = parse_method() 4184 4185 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4186 self._retreat(self._index - 1) 4187 return this 4188 4189 this = self.expression(klass, this=this, comments=comments, expression=expression) 4190 4191 if isinstance(this, exp.Div): 4192 this.args["typed"] = self.dialect.TYPED_DIVISION 4193 this.args["safe"] = self.dialect.SAFE_DIVISION 4194 4195 return this 4196 4197 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4198 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4199 4200 def _parse_unary(self) -> t.Optional[exp.Expression]: 4201 if self._match_set(self.UNARY_PARSERS): 4202 return self.UNARY_PARSERS[self._prev.token_type](self) 4203 return self._parse_at_time_zone(self._parse_type()) 4204 4205 def _parse_type( 4206 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4207 ) -> t.Optional[exp.Expression]: 4208 interval = parse_interval and self._parse_interval() 4209 if interval: 4210 return interval 4211 4212 index = self._index 4213 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4214 4215 if data_type: 4216 index2 = self._index 4217 this = self._parse_primary() 4218 4219 if isinstance(this, exp.Literal): 4220 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4221 if parser: 4222 return parser(self, this, data_type) 4223 4224 return self.expression(exp.Cast, this=this, to=data_type) 4225 4226 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4227 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4228 # 4229 # If the index difference here is greater than 1, that means the parser itself must have 4230 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4231 # 4232 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4233 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4234 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4235 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4236 # 4237 # In these cases, we don't really want to return the converted type, but instead retreat 4238 # and try to parse a Column or Identifier in the section below. 4239 if data_type.expressions and index2 - index > 1: 4240 self._retreat(index2) 4241 return self._parse_column_ops(data_type) 4242 4243 self._retreat(index) 4244 4245 if fallback_to_identifier: 4246 return self._parse_id_var() 4247 4248 this = self._parse_column() 4249 return this and self._parse_column_ops(this) 4250 4251 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4252 this = self._parse_type() 4253 if not this: 4254 return None 4255 4256 if isinstance(this, exp.Column) and not this.table: 4257 this = exp.var(this.name.upper()) 4258 4259 return self.expression( 4260 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4261 ) 4262 4263 def _parse_types( 4264 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4265 ) -> t.Optional[exp.Expression]: 4266 index = self._index 4267 4268 this: t.Optional[exp.Expression] = None 4269 prefix = self._match_text_seq("SYSUDTLIB", ".") 4270 4271 if not self._match_set(self.TYPE_TOKENS): 4272 identifier = allow_identifiers and self._parse_id_var( 4273 any_token=False, tokens=(TokenType.VAR,) 4274 ) 4275 if identifier: 4276 tokens = self.dialect.tokenize(identifier.name) 4277 4278 if len(tokens) != 1: 4279 self.raise_error("Unexpected identifier", self._prev) 4280 4281 if tokens[0].token_type in self.TYPE_TOKENS: 4282 self._prev = tokens[0] 4283 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4284 type_name = identifier.name 4285 4286 while self._match(TokenType.DOT): 4287 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4288 4289 this = exp.DataType.build(type_name, udt=True) 4290 else: 4291 self._retreat(self._index - 1) 4292 return None 4293 else: 4294 return None 4295 4296 type_token = self._prev.token_type 4297 4298 if type_token == TokenType.PSEUDO_TYPE: 4299 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4300 4301 if type_token == TokenType.OBJECT_IDENTIFIER: 4302 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4303 4304 nested = type_token in self.NESTED_TYPE_TOKENS 4305 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4306 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4307 expressions = None 4308 maybe_func = False 4309 4310 if self._match(TokenType.L_PAREN): 4311 if is_struct: 4312 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4313 elif nested: 4314 expressions = self._parse_csv( 4315 lambda: self._parse_types( 4316 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4317 ) 4318 ) 4319 elif type_token in self.ENUM_TYPE_TOKENS: 4320 expressions = self._parse_csv(self._parse_equality) 4321 elif is_aggregate: 4322 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4323 any_token=False, tokens=(TokenType.VAR,) 4324 ) 4325 if not func_or_ident or not self._match(TokenType.COMMA): 4326 return None 4327 expressions = self._parse_csv( 4328 lambda: self._parse_types( 4329 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4330 ) 4331 ) 4332 expressions.insert(0, func_or_ident) 4333 else: 4334 expressions = self._parse_csv(self._parse_type_size) 4335 4336 if not expressions or not self._match(TokenType.R_PAREN): 4337 self._retreat(index) 4338 return None 4339 4340 maybe_func = True 4341 4342 values: t.Optional[t.List[exp.Expression]] = None 4343 4344 if nested and self._match(TokenType.LT): 4345 if is_struct: 4346 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4347 else: 4348 expressions = self._parse_csv( 4349 lambda: self._parse_types( 4350 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4351 ) 4352 ) 4353 4354 if not self._match(TokenType.GT): 4355 self.raise_error("Expecting >") 4356 4357 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4358 values = self._parse_csv(self._parse_assignment) 4359 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4360 4361 if type_token in self.TIMESTAMPS: 4362 if self._match_text_seq("WITH", "TIME", "ZONE"): 4363 maybe_func = False 4364 tz_type = ( 4365 exp.DataType.Type.TIMETZ 4366 if type_token in self.TIMES 4367 else exp.DataType.Type.TIMESTAMPTZ 4368 ) 4369 this = exp.DataType(this=tz_type, expressions=expressions) 4370 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4371 maybe_func = False 4372 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4373 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4374 maybe_func = False 4375 elif type_token == TokenType.INTERVAL: 4376 unit = self._parse_var(upper=True) 4377 if unit: 4378 if self._match_text_seq("TO"): 4379 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4380 4381 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4382 else: 4383 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4384 4385 if maybe_func and check_func: 4386 index2 = self._index 4387 peek = self._parse_string() 4388 4389 if not peek: 4390 self._retreat(index) 4391 return None 4392 4393 self._retreat(index2) 4394 4395 if not this: 4396 if self._match_text_seq("UNSIGNED"): 4397 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4398 if not unsigned_type_token: 4399 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4400 4401 type_token = unsigned_type_token or type_token 4402 4403 this = exp.DataType( 4404 this=exp.DataType.Type[type_token.value], 4405 expressions=expressions, 4406 nested=nested, 4407 values=values, 4408 prefix=prefix, 4409 ) 4410 elif expressions: 4411 this.set("expressions", expressions) 4412 4413 index = self._index 4414 4415 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4416 matched_array = self._match(TokenType.ARRAY) 4417 4418 while self._curr: 4419 matched_l_bracket = self._match(TokenType.L_BRACKET) 4420 if not matched_l_bracket and not matched_array: 4421 break 4422 4423 matched_array = False 4424 values = self._parse_csv(self._parse_assignment) or None 4425 if values and not schema: 4426 self._retreat(index) 4427 break 4428 4429 this = exp.DataType( 4430 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4431 ) 4432 self._match(TokenType.R_BRACKET) 4433 4434 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4435 converter = self.TYPE_CONVERTER.get(this.this) 4436 if converter: 4437 this = converter(t.cast(exp.DataType, this)) 4438 4439 return this 4440 4441 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4442 index = self._index 4443 this = ( 4444 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4445 or self._parse_id_var() 4446 ) 4447 self._match(TokenType.COLON) 4448 4449 if ( 4450 type_required 4451 and not isinstance(this, exp.DataType) 4452 and not self._match_set(self.TYPE_TOKENS, advance=False) 4453 ): 4454 self._retreat(index) 4455 return self._parse_types() 4456 4457 return self._parse_column_def(this) 4458 4459 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4460 if not self._match_text_seq("AT", "TIME", "ZONE"): 4461 return this 4462 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4463 4464 def _parse_column(self) -> t.Optional[exp.Expression]: 4465 this = self._parse_column_reference() 4466 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4467 4468 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4469 this = self._parse_field() 4470 if ( 4471 not this 4472 and self._match(TokenType.VALUES, advance=False) 4473 and self.VALUES_FOLLOWED_BY_PAREN 4474 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4475 ): 4476 this = self._parse_id_var() 4477 4478 if isinstance(this, exp.Identifier): 4479 # We bubble up comments from the Identifier to the Column 4480 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4481 4482 return this 4483 4484 def _parse_colon_as_json_extract( 4485 self, this: t.Optional[exp.Expression] 4486 ) -> t.Optional[exp.Expression]: 4487 casts = [] 4488 json_path = [] 4489 4490 while self._match(TokenType.COLON): 4491 start_index = self._index 4492 path = self._parse_column_ops(self._parse_field(any_token=True)) 4493 4494 # The cast :: operator has a lower precedence than the extraction operator :, so 4495 # we rearrange the AST appropriately to avoid casting the JSON path 4496 while isinstance(path, exp.Cast): 4497 casts.append(path.to) 4498 path = path.this 4499 4500 if casts: 4501 dcolon_offset = next( 4502 i 4503 for i, t in enumerate(self._tokens[start_index:]) 4504 if t.token_type == TokenType.DCOLON 4505 ) 4506 end_token = self._tokens[start_index + dcolon_offset - 1] 4507 else: 4508 end_token = self._prev 4509 4510 if path: 4511 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4512 4513 if json_path: 4514 this = self.expression( 4515 exp.JSONExtract, 4516 this=this, 4517 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4518 ) 4519 4520 while casts: 4521 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4522 4523 return this 4524 4525 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4526 this = self._parse_bracket(this) 4527 4528 while self._match_set(self.COLUMN_OPERATORS): 4529 op_token = self._prev.token_type 4530 op = self.COLUMN_OPERATORS.get(op_token) 4531 4532 if op_token == TokenType.DCOLON: 4533 field = self._parse_types() 4534 if not field: 4535 self.raise_error("Expected type") 4536 elif op and self._curr: 4537 field = self._parse_column_reference() 4538 else: 4539 field = self._parse_field(any_token=True, anonymous_func=True) 4540 4541 if isinstance(field, exp.Func) and this: 4542 # bigquery allows function calls like x.y.count(...) 4543 # SAFE.SUBSTR(...) 4544 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4545 this = exp.replace_tree( 4546 this, 4547 lambda n: ( 4548 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4549 if n.table 4550 else n.this 4551 ) 4552 if isinstance(n, exp.Column) 4553 else n, 4554 ) 4555 4556 if op: 4557 this = op(self, this, field) 4558 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4559 this = self.expression( 4560 exp.Column, 4561 this=field, 4562 table=this.this, 4563 db=this.args.get("table"), 4564 catalog=this.args.get("db"), 4565 ) 4566 else: 4567 this = self.expression(exp.Dot, this=this, expression=field) 4568 4569 this = self._parse_bracket(this) 4570 4571 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4572 4573 def _parse_primary(self) -> t.Optional[exp.Expression]: 4574 if self._match_set(self.PRIMARY_PARSERS): 4575 token_type = self._prev.token_type 4576 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4577 4578 if token_type == TokenType.STRING: 4579 expressions = [primary] 4580 while self._match(TokenType.STRING): 4581 expressions.append(exp.Literal.string(self._prev.text)) 4582 4583 if len(expressions) > 1: 4584 return self.expression(exp.Concat, expressions=expressions) 4585 4586 return primary 4587 4588 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4589 return exp.Literal.number(f"0.{self._prev.text}") 4590 4591 if self._match(TokenType.L_PAREN): 4592 comments = self._prev_comments 4593 query = self._parse_select() 4594 4595 if query: 4596 expressions = [query] 4597 else: 4598 expressions = self._parse_expressions() 4599 4600 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4601 4602 if not this and self._match(TokenType.R_PAREN, advance=False): 4603 this = self.expression(exp.Tuple) 4604 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4605 this = self._parse_subquery(this=this, parse_alias=False) 4606 elif isinstance(this, exp.Subquery): 4607 this = self._parse_subquery( 4608 this=self._parse_set_operations(this), parse_alias=False 4609 ) 4610 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4611 this = self.expression(exp.Tuple, expressions=expressions) 4612 else: 4613 this = self.expression(exp.Paren, this=this) 4614 4615 if this: 4616 this.add_comments(comments) 4617 4618 self._match_r_paren(expression=this) 4619 return this 4620 4621 return None 4622 4623 def _parse_field( 4624 self, 4625 any_token: bool = False, 4626 tokens: t.Optional[t.Collection[TokenType]] = None, 4627 anonymous_func: bool = False, 4628 ) -> t.Optional[exp.Expression]: 4629 if anonymous_func: 4630 field = ( 4631 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4632 or self._parse_primary() 4633 ) 4634 else: 4635 field = self._parse_primary() or self._parse_function( 4636 anonymous=anonymous_func, any_token=any_token 4637 ) 4638 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4639 4640 def _parse_function( 4641 self, 4642 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4643 anonymous: bool = False, 4644 optional_parens: bool = True, 4645 any_token: bool = False, 4646 ) -> t.Optional[exp.Expression]: 4647 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4648 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4649 fn_syntax = False 4650 if ( 4651 self._match(TokenType.L_BRACE, advance=False) 4652 and self._next 4653 and self._next.text.upper() == "FN" 4654 ): 4655 self._advance(2) 4656 fn_syntax = True 4657 4658 func = self._parse_function_call( 4659 functions=functions, 4660 anonymous=anonymous, 4661 optional_parens=optional_parens, 4662 any_token=any_token, 4663 ) 4664 4665 if fn_syntax: 4666 self._match(TokenType.R_BRACE) 4667 4668 return func 4669 4670 def _parse_function_call( 4671 self, 4672 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4673 anonymous: bool = False, 4674 optional_parens: bool = True, 4675 any_token: bool = False, 4676 ) -> t.Optional[exp.Expression]: 4677 if not self._curr: 4678 return None 4679 4680 comments = self._curr.comments 4681 token_type = self._curr.token_type 4682 this = self._curr.text 4683 upper = this.upper() 4684 4685 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4686 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4687 self._advance() 4688 return self._parse_window(parser(self)) 4689 4690 if not self._next or self._next.token_type != TokenType.L_PAREN: 4691 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4692 self._advance() 4693 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4694 4695 return None 4696 4697 if any_token: 4698 if token_type in self.RESERVED_TOKENS: 4699 return None 4700 elif token_type not in self.FUNC_TOKENS: 4701 return None 4702 4703 self._advance(2) 4704 4705 parser = self.FUNCTION_PARSERS.get(upper) 4706 if parser and not anonymous: 4707 this = parser(self) 4708 else: 4709 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4710 4711 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4712 this = self.expression(subquery_predicate, this=self._parse_select()) 4713 self._match_r_paren() 4714 return this 4715 4716 if functions is None: 4717 functions = self.FUNCTIONS 4718 4719 function = functions.get(upper) 4720 4721 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4722 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4723 4724 if alias: 4725 args = self._kv_to_prop_eq(args) 4726 4727 if function and not anonymous: 4728 if "dialect" in function.__code__.co_varnames: 4729 func = function(args, dialect=self.dialect) 4730 else: 4731 func = function(args) 4732 4733 func = self.validate_expression(func, args) 4734 if not self.dialect.NORMALIZE_FUNCTIONS: 4735 func.meta["name"] = this 4736 4737 this = func 4738 else: 4739 if token_type == TokenType.IDENTIFIER: 4740 this = exp.Identifier(this=this, quoted=True) 4741 this = self.expression(exp.Anonymous, this=this, expressions=args) 4742 4743 if isinstance(this, exp.Expression): 4744 this.add_comments(comments) 4745 4746 self._match_r_paren(this) 4747 return self._parse_window(this) 4748 4749 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4750 transformed = [] 4751 4752 for e in expressions: 4753 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4754 if isinstance(e, exp.Alias): 4755 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4756 4757 if not isinstance(e, exp.PropertyEQ): 4758 e = self.expression( 4759 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4760 ) 4761 4762 if isinstance(e.this, exp.Column): 4763 e.this.replace(e.this.this) 4764 4765 transformed.append(e) 4766 4767 return transformed 4768 4769 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4770 return self._parse_column_def(self._parse_id_var()) 4771 4772 def _parse_user_defined_function( 4773 self, kind: t.Optional[TokenType] = None 4774 ) -> t.Optional[exp.Expression]: 4775 this = self._parse_id_var() 4776 4777 while self._match(TokenType.DOT): 4778 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4779 4780 if not self._match(TokenType.L_PAREN): 4781 return this 4782 4783 expressions = self._parse_csv(self._parse_function_parameter) 4784 self._match_r_paren() 4785 return self.expression( 4786 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4787 ) 4788 4789 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4790 literal = self._parse_primary() 4791 if literal: 4792 return self.expression(exp.Introducer, this=token.text, expression=literal) 4793 4794 return self.expression(exp.Identifier, this=token.text) 4795 4796 def _parse_session_parameter(self) -> exp.SessionParameter: 4797 kind = None 4798 this = self._parse_id_var() or self._parse_primary() 4799 4800 if this and self._match(TokenType.DOT): 4801 kind = this.name 4802 this = self._parse_var() or self._parse_primary() 4803 4804 return self.expression(exp.SessionParameter, this=this, kind=kind) 4805 4806 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4807 return self._parse_id_var() 4808 4809 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4810 index = self._index 4811 4812 if self._match(TokenType.L_PAREN): 4813 expressions = t.cast( 4814 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4815 ) 4816 4817 if not self._match(TokenType.R_PAREN): 4818 self._retreat(index) 4819 else: 4820 expressions = [self._parse_lambda_arg()] 4821 4822 if self._match_set(self.LAMBDAS): 4823 return self.LAMBDAS[self._prev.token_type](self, expressions) 4824 4825 self._retreat(index) 4826 4827 this: t.Optional[exp.Expression] 4828 4829 if self._match(TokenType.DISTINCT): 4830 this = self.expression( 4831 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4832 ) 4833 else: 4834 this = self._parse_select_or_expression(alias=alias) 4835 4836 return self._parse_limit( 4837 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4838 ) 4839 4840 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4841 index = self._index 4842 if not self._match(TokenType.L_PAREN): 4843 return this 4844 4845 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4846 # expr can be of both types 4847 if self._match_set(self.SELECT_START_TOKENS): 4848 self._retreat(index) 4849 return this 4850 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4851 self._match_r_paren() 4852 return self.expression(exp.Schema, this=this, expressions=args) 4853 4854 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4855 return self._parse_column_def(self._parse_field(any_token=True)) 4856 4857 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4858 # column defs are not really columns, they're identifiers 4859 if isinstance(this, exp.Column): 4860 this = this.this 4861 4862 kind = self._parse_types(schema=True) 4863 4864 if self._match_text_seq("FOR", "ORDINALITY"): 4865 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4866 4867 constraints: t.List[exp.Expression] = [] 4868 4869 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4870 ("ALIAS", "MATERIALIZED") 4871 ): 4872 persisted = self._prev.text.upper() == "MATERIALIZED" 4873 constraints.append( 4874 self.expression( 4875 exp.ComputedColumnConstraint, 4876 this=self._parse_assignment(), 4877 persisted=persisted or self._match_text_seq("PERSISTED"), 4878 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4879 ) 4880 ) 4881 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4882 self._match(TokenType.ALIAS) 4883 constraints.append( 4884 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4885 ) 4886 4887 while True: 4888 constraint = self._parse_column_constraint() 4889 if not constraint: 4890 break 4891 constraints.append(constraint) 4892 4893 if not kind and not constraints: 4894 return this 4895 4896 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4897 4898 def _parse_auto_increment( 4899 self, 4900 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4901 start = None 4902 increment = None 4903 4904 if self._match(TokenType.L_PAREN, advance=False): 4905 args = self._parse_wrapped_csv(self._parse_bitwise) 4906 start = seq_get(args, 0) 4907 increment = seq_get(args, 1) 4908 elif self._match_text_seq("START"): 4909 start = self._parse_bitwise() 4910 self._match_text_seq("INCREMENT") 4911 increment = self._parse_bitwise() 4912 4913 if start and increment: 4914 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4915 4916 return exp.AutoIncrementColumnConstraint() 4917 4918 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4919 if not self._match_text_seq("REFRESH"): 4920 self._retreat(self._index - 1) 4921 return None 4922 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4923 4924 def _parse_compress(self) -> exp.CompressColumnConstraint: 4925 if self._match(TokenType.L_PAREN, advance=False): 4926 return self.expression( 4927 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4928 ) 4929 4930 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4931 4932 def _parse_generated_as_identity( 4933 self, 4934 ) -> ( 4935 exp.GeneratedAsIdentityColumnConstraint 4936 | exp.ComputedColumnConstraint 4937 | exp.GeneratedAsRowColumnConstraint 4938 ): 4939 if self._match_text_seq("BY", "DEFAULT"): 4940 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4941 this = self.expression( 4942 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4943 ) 4944 else: 4945 self._match_text_seq("ALWAYS") 4946 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4947 4948 self._match(TokenType.ALIAS) 4949 4950 if self._match_text_seq("ROW"): 4951 start = self._match_text_seq("START") 4952 if not start: 4953 self._match(TokenType.END) 4954 hidden = self._match_text_seq("HIDDEN") 4955 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4956 4957 identity = self._match_text_seq("IDENTITY") 4958 4959 if self._match(TokenType.L_PAREN): 4960 if self._match(TokenType.START_WITH): 4961 this.set("start", self._parse_bitwise()) 4962 if self._match_text_seq("INCREMENT", "BY"): 4963 this.set("increment", self._parse_bitwise()) 4964 if self._match_text_seq("MINVALUE"): 4965 this.set("minvalue", self._parse_bitwise()) 4966 if self._match_text_seq("MAXVALUE"): 4967 this.set("maxvalue", self._parse_bitwise()) 4968 4969 if self._match_text_seq("CYCLE"): 4970 this.set("cycle", True) 4971 elif self._match_text_seq("NO", "CYCLE"): 4972 this.set("cycle", False) 4973 4974 if not identity: 4975 this.set("expression", self._parse_range()) 4976 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4977 args = self._parse_csv(self._parse_bitwise) 4978 this.set("start", seq_get(args, 0)) 4979 this.set("increment", seq_get(args, 1)) 4980 4981 self._match_r_paren() 4982 4983 return this 4984 4985 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4986 self._match_text_seq("LENGTH") 4987 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4988 4989 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4990 if self._match_text_seq("NULL"): 4991 return self.expression(exp.NotNullColumnConstraint) 4992 if self._match_text_seq("CASESPECIFIC"): 4993 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4994 if self._match_text_seq("FOR", "REPLICATION"): 4995 return self.expression(exp.NotForReplicationColumnConstraint) 4996 return None 4997 4998 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4999 if self._match(TokenType.CONSTRAINT): 5000 this = self._parse_id_var() 5001 else: 5002 this = None 5003 5004 if self._match_texts(self.CONSTRAINT_PARSERS): 5005 return self.expression( 5006 exp.ColumnConstraint, 5007 this=this, 5008 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5009 ) 5010 5011 return this 5012 5013 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5014 if not self._match(TokenType.CONSTRAINT): 5015 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5016 5017 return self.expression( 5018 exp.Constraint, 5019 this=self._parse_id_var(), 5020 expressions=self._parse_unnamed_constraints(), 5021 ) 5022 5023 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5024 constraints = [] 5025 while True: 5026 constraint = self._parse_unnamed_constraint() or self._parse_function() 5027 if not constraint: 5028 break 5029 constraints.append(constraint) 5030 5031 return constraints 5032 5033 def _parse_unnamed_constraint( 5034 self, constraints: t.Optional[t.Collection[str]] = None 5035 ) -> t.Optional[exp.Expression]: 5036 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5037 constraints or self.CONSTRAINT_PARSERS 5038 ): 5039 return None 5040 5041 constraint = self._prev.text.upper() 5042 if constraint not in self.CONSTRAINT_PARSERS: 5043 self.raise_error(f"No parser found for schema constraint {constraint}.") 5044 5045 return self.CONSTRAINT_PARSERS[constraint](self) 5046 5047 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5048 self._match_text_seq("KEY") 5049 return self.expression( 5050 exp.UniqueColumnConstraint, 5051 this=self._parse_schema(self._parse_id_var(any_token=False)), 5052 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5053 on_conflict=self._parse_on_conflict(), 5054 ) 5055 5056 def _parse_key_constraint_options(self) -> t.List[str]: 5057 options = [] 5058 while True: 5059 if not self._curr: 5060 break 5061 5062 if self._match(TokenType.ON): 5063 action = None 5064 on = self._advance_any() and self._prev.text 5065 5066 if self._match_text_seq("NO", "ACTION"): 5067 action = "NO ACTION" 5068 elif self._match_text_seq("CASCADE"): 5069 action = "CASCADE" 5070 elif self._match_text_seq("RESTRICT"): 5071 action = "RESTRICT" 5072 elif self._match_pair(TokenType.SET, TokenType.NULL): 5073 action = "SET NULL" 5074 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5075 action = "SET DEFAULT" 5076 else: 5077 self.raise_error("Invalid key constraint") 5078 5079 options.append(f"ON {on} {action}") 5080 elif self._match_text_seq("NOT", "ENFORCED"): 5081 options.append("NOT ENFORCED") 5082 elif self._match_text_seq("DEFERRABLE"): 5083 options.append("DEFERRABLE") 5084 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5085 options.append("INITIALLY DEFERRED") 5086 elif self._match_text_seq("NORELY"): 5087 options.append("NORELY") 5088 elif self._match_text_seq("MATCH", "FULL"): 5089 options.append("MATCH FULL") 5090 else: 5091 break 5092 5093 return options 5094 5095 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5096 if match and not self._match(TokenType.REFERENCES): 5097 return None 5098 5099 expressions = None 5100 this = self._parse_table(schema=True) 5101 options = self._parse_key_constraint_options() 5102 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5103 5104 def _parse_foreign_key(self) -> exp.ForeignKey: 5105 expressions = self._parse_wrapped_id_vars() 5106 reference = self._parse_references() 5107 options = {} 5108 5109 while self._match(TokenType.ON): 5110 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5111 self.raise_error("Expected DELETE or UPDATE") 5112 5113 kind = self._prev.text.lower() 5114 5115 if self._match_text_seq("NO", "ACTION"): 5116 action = "NO ACTION" 5117 elif self._match(TokenType.SET): 5118 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5119 action = "SET " + self._prev.text.upper() 5120 else: 5121 self._advance() 5122 action = self._prev.text.upper() 5123 5124 options[kind] = action 5125 5126 return self.expression( 5127 exp.ForeignKey, 5128 expressions=expressions, 5129 reference=reference, 5130 **options, # type: ignore 5131 ) 5132 5133 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5134 return self._parse_field() 5135 5136 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5137 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5138 self._retreat(self._index - 1) 5139 return None 5140 5141 id_vars = self._parse_wrapped_id_vars() 5142 return self.expression( 5143 exp.PeriodForSystemTimeConstraint, 5144 this=seq_get(id_vars, 0), 5145 expression=seq_get(id_vars, 1), 5146 ) 5147 5148 def _parse_primary_key( 5149 self, wrapped_optional: bool = False, in_props: bool = False 5150 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5151 desc = ( 5152 self._match_set((TokenType.ASC, TokenType.DESC)) 5153 and self._prev.token_type == TokenType.DESC 5154 ) 5155 5156 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5157 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5158 5159 expressions = self._parse_wrapped_csv( 5160 self._parse_primary_key_part, optional=wrapped_optional 5161 ) 5162 options = self._parse_key_constraint_options() 5163 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5164 5165 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5166 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5167 5168 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5169 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5170 return this 5171 5172 bracket_kind = self._prev.token_type 5173 expressions = self._parse_csv( 5174 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5175 ) 5176 5177 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5178 self.raise_error("Expected ]") 5179 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5180 self.raise_error("Expected }") 5181 5182 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5183 if bracket_kind == TokenType.L_BRACE: 5184 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5185 elif not this or this.name.upper() == "ARRAY": 5186 this = self.expression(exp.Array, expressions=expressions) 5187 else: 5188 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5189 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5190 5191 self._add_comments(this) 5192 return self._parse_bracket(this) 5193 5194 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5195 if self._match(TokenType.COLON): 5196 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5197 return this 5198 5199 def _parse_case(self) -> t.Optional[exp.Expression]: 5200 ifs = [] 5201 default = None 5202 5203 comments = self._prev_comments 5204 expression = self._parse_assignment() 5205 5206 while self._match(TokenType.WHEN): 5207 this = self._parse_assignment() 5208 self._match(TokenType.THEN) 5209 then = self._parse_assignment() 5210 ifs.append(self.expression(exp.If, this=this, true=then)) 5211 5212 if self._match(TokenType.ELSE): 5213 default = self._parse_assignment() 5214 5215 if not self._match(TokenType.END): 5216 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5217 default = exp.column("interval") 5218 else: 5219 self.raise_error("Expected END after CASE", self._prev) 5220 5221 return self.expression( 5222 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5223 ) 5224 5225 def _parse_if(self) -> t.Optional[exp.Expression]: 5226 if self._match(TokenType.L_PAREN): 5227 args = self._parse_csv(self._parse_assignment) 5228 this = self.validate_expression(exp.If.from_arg_list(args), args) 5229 self._match_r_paren() 5230 else: 5231 index = self._index - 1 5232 5233 if self.NO_PAREN_IF_COMMANDS and index == 0: 5234 return self._parse_as_command(self._prev) 5235 5236 condition = self._parse_assignment() 5237 5238 if not condition: 5239 self._retreat(index) 5240 return None 5241 5242 self._match(TokenType.THEN) 5243 true = self._parse_assignment() 5244 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5245 self._match(TokenType.END) 5246 this = self.expression(exp.If, this=condition, true=true, false=false) 5247 5248 return this 5249 5250 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5251 if not self._match_text_seq("VALUE", "FOR"): 5252 self._retreat(self._index - 1) 5253 return None 5254 5255 return self.expression( 5256 exp.NextValueFor, 5257 this=self._parse_column(), 5258 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5259 ) 5260 5261 def _parse_extract(self) -> exp.Extract: 5262 this = self._parse_function() or self._parse_var() or self._parse_type() 5263 5264 if self._match(TokenType.FROM): 5265 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5266 5267 if not self._match(TokenType.COMMA): 5268 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5269 5270 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5271 5272 def _parse_gap_fill(self) -> exp.GapFill: 5273 self._match(TokenType.TABLE) 5274 this = self._parse_table() 5275 5276 self._match(TokenType.COMMA) 5277 args = [this, *self._parse_csv(lambda: self._parse_lambda())] 5278 5279 gap_fill = exp.GapFill.from_arg_list(args) 5280 return self.validate_expression(gap_fill, args) 5281 5282 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5283 this = self._parse_assignment() 5284 5285 if not self._match(TokenType.ALIAS): 5286 if self._match(TokenType.COMMA): 5287 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5288 5289 self.raise_error("Expected AS after CAST") 5290 5291 fmt = None 5292 to = self._parse_types() 5293 5294 if self._match(TokenType.FORMAT): 5295 fmt_string = self._parse_string() 5296 fmt = self._parse_at_time_zone(fmt_string) 5297 5298 if not to: 5299 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5300 if to.this in exp.DataType.TEMPORAL_TYPES: 5301 this = self.expression( 5302 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5303 this=this, 5304 format=exp.Literal.string( 5305 format_time( 5306 fmt_string.this if fmt_string else "", 5307 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5308 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5309 ) 5310 ), 5311 ) 5312 5313 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5314 this.set("zone", fmt.args["zone"]) 5315 return this 5316 elif not to: 5317 self.raise_error("Expected TYPE after CAST") 5318 elif isinstance(to, exp.Identifier): 5319 to = exp.DataType.build(to.name, udt=True) 5320 elif to.this == exp.DataType.Type.CHAR: 5321 if self._match(TokenType.CHARACTER_SET): 5322 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5323 5324 return self.expression( 5325 exp.Cast if strict else exp.TryCast, 5326 this=this, 5327 to=to, 5328 format=fmt, 5329 safe=safe, 5330 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5331 ) 5332 5333 def _parse_string_agg(self) -> exp.Expression: 5334 if self._match(TokenType.DISTINCT): 5335 args: t.List[t.Optional[exp.Expression]] = [ 5336 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5337 ] 5338 if self._match(TokenType.COMMA): 5339 args.extend(self._parse_csv(self._parse_assignment)) 5340 else: 5341 args = self._parse_csv(self._parse_assignment) # type: ignore 5342 5343 index = self._index 5344 if not self._match(TokenType.R_PAREN) and args: 5345 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5346 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5347 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5348 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5349 5350 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5351 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5352 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5353 if not self._match_text_seq("WITHIN", "GROUP"): 5354 self._retreat(index) 5355 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5356 5357 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5358 order = self._parse_order(this=seq_get(args, 0)) 5359 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5360 5361 def _parse_convert( 5362 self, strict: bool, safe: t.Optional[bool] = None 5363 ) -> t.Optional[exp.Expression]: 5364 this = self._parse_bitwise() 5365 5366 if self._match(TokenType.USING): 5367 to: t.Optional[exp.Expression] = self.expression( 5368 exp.CharacterSet, this=self._parse_var() 5369 ) 5370 elif self._match(TokenType.COMMA): 5371 to = self._parse_types() 5372 else: 5373 to = None 5374 5375 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5376 5377 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5378 """ 5379 There are generally two variants of the DECODE function: 5380 5381 - DECODE(bin, charset) 5382 - DECODE(expression, search, result [, search, result] ... [, default]) 5383 5384 The second variant will always be parsed into a CASE expression. Note that NULL 5385 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5386 instead of relying on pattern matching. 5387 """ 5388 args = self._parse_csv(self._parse_assignment) 5389 5390 if len(args) < 3: 5391 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5392 5393 expression, *expressions = args 5394 if not expression: 5395 return None 5396 5397 ifs = [] 5398 for search, result in zip(expressions[::2], expressions[1::2]): 5399 if not search or not result: 5400 return None 5401 5402 if isinstance(search, exp.Literal): 5403 ifs.append( 5404 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5405 ) 5406 elif isinstance(search, exp.Null): 5407 ifs.append( 5408 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5409 ) 5410 else: 5411 cond = exp.or_( 5412 exp.EQ(this=expression.copy(), expression=search), 5413 exp.and_( 5414 exp.Is(this=expression.copy(), expression=exp.Null()), 5415 exp.Is(this=search.copy(), expression=exp.Null()), 5416 copy=False, 5417 ), 5418 copy=False, 5419 ) 5420 ifs.append(exp.If(this=cond, true=result)) 5421 5422 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5423 5424 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5425 self._match_text_seq("KEY") 5426 key = self._parse_column() 5427 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5428 self._match_text_seq("VALUE") 5429 value = self._parse_bitwise() 5430 5431 if not key and not value: 5432 return None 5433 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5434 5435 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5436 if not this or not self._match_text_seq("FORMAT", "JSON"): 5437 return this 5438 5439 return self.expression(exp.FormatJson, this=this) 5440 5441 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5442 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5443 for value in values: 5444 if self._match_text_seq(value, "ON", on): 5445 return f"{value} ON {on}" 5446 5447 return None 5448 5449 @t.overload 5450 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5451 5452 @t.overload 5453 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5454 5455 def _parse_json_object(self, agg=False): 5456 star = self._parse_star() 5457 expressions = ( 5458 [star] 5459 if star 5460 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5461 ) 5462 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5463 5464 unique_keys = None 5465 if self._match_text_seq("WITH", "UNIQUE"): 5466 unique_keys = True 5467 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5468 unique_keys = False 5469 5470 self._match_text_seq("KEYS") 5471 5472 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5473 self._parse_type() 5474 ) 5475 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5476 5477 return self.expression( 5478 exp.JSONObjectAgg if agg else exp.JSONObject, 5479 expressions=expressions, 5480 null_handling=null_handling, 5481 unique_keys=unique_keys, 5482 return_type=return_type, 5483 encoding=encoding, 5484 ) 5485 5486 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5487 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5488 if not self._match_text_seq("NESTED"): 5489 this = self._parse_id_var() 5490 kind = self._parse_types(allow_identifiers=False) 5491 nested = None 5492 else: 5493 this = None 5494 kind = None 5495 nested = True 5496 5497 path = self._match_text_seq("PATH") and self._parse_string() 5498 nested_schema = nested and self._parse_json_schema() 5499 5500 return self.expression( 5501 exp.JSONColumnDef, 5502 this=this, 5503 kind=kind, 5504 path=path, 5505 nested_schema=nested_schema, 5506 ) 5507 5508 def _parse_json_schema(self) -> exp.JSONSchema: 5509 self._match_text_seq("COLUMNS") 5510 return self.expression( 5511 exp.JSONSchema, 5512 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5513 ) 5514 5515 def _parse_json_table(self) -> exp.JSONTable: 5516 this = self._parse_format_json(self._parse_bitwise()) 5517 path = self._match(TokenType.COMMA) and self._parse_string() 5518 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5519 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5520 schema = self._parse_json_schema() 5521 5522 return exp.JSONTable( 5523 this=this, 5524 schema=schema, 5525 path=path, 5526 error_handling=error_handling, 5527 empty_handling=empty_handling, 5528 ) 5529 5530 def _parse_match_against(self) -> exp.MatchAgainst: 5531 expressions = self._parse_csv(self._parse_column) 5532 5533 self._match_text_seq(")", "AGAINST", "(") 5534 5535 this = self._parse_string() 5536 5537 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5538 modifier = "IN NATURAL LANGUAGE MODE" 5539 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5540 modifier = f"{modifier} WITH QUERY EXPANSION" 5541 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5542 modifier = "IN BOOLEAN MODE" 5543 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5544 modifier = "WITH QUERY EXPANSION" 5545 else: 5546 modifier = None 5547 5548 return self.expression( 5549 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5550 ) 5551 5552 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5553 def _parse_open_json(self) -> exp.OpenJSON: 5554 this = self._parse_bitwise() 5555 path = self._match(TokenType.COMMA) and self._parse_string() 5556 5557 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5558 this = self._parse_field(any_token=True) 5559 kind = self._parse_types() 5560 path = self._parse_string() 5561 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5562 5563 return self.expression( 5564 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5565 ) 5566 5567 expressions = None 5568 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5569 self._match_l_paren() 5570 expressions = self._parse_csv(_parse_open_json_column_def) 5571 5572 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5573 5574 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5575 args = self._parse_csv(self._parse_bitwise) 5576 5577 if self._match(TokenType.IN): 5578 return self.expression( 5579 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5580 ) 5581 5582 if haystack_first: 5583 haystack = seq_get(args, 0) 5584 needle = seq_get(args, 1) 5585 else: 5586 needle = seq_get(args, 0) 5587 haystack = seq_get(args, 1) 5588 5589 return self.expression( 5590 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5591 ) 5592 5593 def _parse_predict(self) -> exp.Predict: 5594 self._match_text_seq("MODEL") 5595 this = self._parse_table() 5596 5597 self._match(TokenType.COMMA) 5598 self._match_text_seq("TABLE") 5599 5600 return self.expression( 5601 exp.Predict, 5602 this=this, 5603 expression=self._parse_table(), 5604 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5605 ) 5606 5607 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5608 args = self._parse_csv(self._parse_table) 5609 return exp.JoinHint(this=func_name.upper(), expressions=args) 5610 5611 def _parse_substring(self) -> exp.Substring: 5612 # Postgres supports the form: substring(string [from int] [for int]) 5613 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5614 5615 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5616 5617 if self._match(TokenType.FROM): 5618 args.append(self._parse_bitwise()) 5619 if self._match(TokenType.FOR): 5620 if len(args) == 1: 5621 args.append(exp.Literal.number(1)) 5622 args.append(self._parse_bitwise()) 5623 5624 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5625 5626 def _parse_trim(self) -> exp.Trim: 5627 # https://www.w3resource.com/sql/character-functions/trim.php 5628 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5629 5630 position = None 5631 collation = None 5632 expression = None 5633 5634 if self._match_texts(self.TRIM_TYPES): 5635 position = self._prev.text.upper() 5636 5637 this = self._parse_bitwise() 5638 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5639 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5640 expression = self._parse_bitwise() 5641 5642 if invert_order: 5643 this, expression = expression, this 5644 5645 if self._match(TokenType.COLLATE): 5646 collation = self._parse_bitwise() 5647 5648 return self.expression( 5649 exp.Trim, this=this, position=position, expression=expression, collation=collation 5650 ) 5651 5652 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5653 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5654 5655 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5656 return self._parse_window(self._parse_id_var(), alias=True) 5657 5658 def _parse_respect_or_ignore_nulls( 5659 self, this: t.Optional[exp.Expression] 5660 ) -> t.Optional[exp.Expression]: 5661 if self._match_text_seq("IGNORE", "NULLS"): 5662 return self.expression(exp.IgnoreNulls, this=this) 5663 if self._match_text_seq("RESPECT", "NULLS"): 5664 return self.expression(exp.RespectNulls, this=this) 5665 return this 5666 5667 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5668 if self._match(TokenType.HAVING): 5669 self._match_texts(("MAX", "MIN")) 5670 max = self._prev.text.upper() != "MIN" 5671 return self.expression( 5672 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5673 ) 5674 5675 return this 5676 5677 def _parse_window( 5678 self, this: t.Optional[exp.Expression], alias: bool = False 5679 ) -> t.Optional[exp.Expression]: 5680 func = this 5681 comments = func.comments if isinstance(func, exp.Expression) else None 5682 5683 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5684 self._match(TokenType.WHERE) 5685 this = self.expression( 5686 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5687 ) 5688 self._match_r_paren() 5689 5690 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5691 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5692 if self._match_text_seq("WITHIN", "GROUP"): 5693 order = self._parse_wrapped(self._parse_order) 5694 this = self.expression(exp.WithinGroup, this=this, expression=order) 5695 5696 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5697 # Some dialects choose to implement and some do not. 5698 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5699 5700 # There is some code above in _parse_lambda that handles 5701 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5702 5703 # The below changes handle 5704 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5705 5706 # Oracle allows both formats 5707 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5708 # and Snowflake chose to do the same for familiarity 5709 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5710 if isinstance(this, exp.AggFunc): 5711 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5712 5713 if ignore_respect and ignore_respect is not this: 5714 ignore_respect.replace(ignore_respect.this) 5715 this = self.expression(ignore_respect.__class__, this=this) 5716 5717 this = self._parse_respect_or_ignore_nulls(this) 5718 5719 # bigquery select from window x AS (partition by ...) 5720 if alias: 5721 over = None 5722 self._match(TokenType.ALIAS) 5723 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5724 return this 5725 else: 5726 over = self._prev.text.upper() 5727 5728 if comments and isinstance(func, exp.Expression): 5729 func.pop_comments() 5730 5731 if not self._match(TokenType.L_PAREN): 5732 return self.expression( 5733 exp.Window, 5734 comments=comments, 5735 this=this, 5736 alias=self._parse_id_var(False), 5737 over=over, 5738 ) 5739 5740 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5741 5742 first = self._match(TokenType.FIRST) 5743 if self._match_text_seq("LAST"): 5744 first = False 5745 5746 partition, order = self._parse_partition_and_order() 5747 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5748 5749 if kind: 5750 self._match(TokenType.BETWEEN) 5751 start = self._parse_window_spec() 5752 self._match(TokenType.AND) 5753 end = self._parse_window_spec() 5754 5755 spec = self.expression( 5756 exp.WindowSpec, 5757 kind=kind, 5758 start=start["value"], 5759 start_side=start["side"], 5760 end=end["value"], 5761 end_side=end["side"], 5762 ) 5763 else: 5764 spec = None 5765 5766 self._match_r_paren() 5767 5768 window = self.expression( 5769 exp.Window, 5770 comments=comments, 5771 this=this, 5772 partition_by=partition, 5773 order=order, 5774 spec=spec, 5775 alias=window_alias, 5776 over=over, 5777 first=first, 5778 ) 5779 5780 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5781 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5782 return self._parse_window(window, alias=alias) 5783 5784 return window 5785 5786 def _parse_partition_and_order( 5787 self, 5788 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5789 return self._parse_partition_by(), self._parse_order() 5790 5791 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5792 self._match(TokenType.BETWEEN) 5793 5794 return { 5795 "value": ( 5796 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5797 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5798 or self._parse_bitwise() 5799 ), 5800 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5801 } 5802 5803 def _parse_alias( 5804 self, this: t.Optional[exp.Expression], explicit: bool = False 5805 ) -> t.Optional[exp.Expression]: 5806 any_token = self._match(TokenType.ALIAS) 5807 comments = self._prev_comments or [] 5808 5809 if explicit and not any_token: 5810 return this 5811 5812 if self._match(TokenType.L_PAREN): 5813 aliases = self.expression( 5814 exp.Aliases, 5815 comments=comments, 5816 this=this, 5817 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5818 ) 5819 self._match_r_paren(aliases) 5820 return aliases 5821 5822 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5823 self.STRING_ALIASES and self._parse_string_as_identifier() 5824 ) 5825 5826 if alias: 5827 comments.extend(alias.pop_comments()) 5828 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5829 column = this.this 5830 5831 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5832 if not this.comments and column and column.comments: 5833 this.comments = column.pop_comments() 5834 5835 return this 5836 5837 def _parse_id_var( 5838 self, 5839 any_token: bool = True, 5840 tokens: t.Optional[t.Collection[TokenType]] = None, 5841 ) -> t.Optional[exp.Expression]: 5842 expression = self._parse_identifier() 5843 if not expression and ( 5844 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5845 ): 5846 quoted = self._prev.token_type == TokenType.STRING 5847 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5848 5849 return expression 5850 5851 def _parse_string(self) -> t.Optional[exp.Expression]: 5852 if self._match_set(self.STRING_PARSERS): 5853 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5854 return self._parse_placeholder() 5855 5856 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5857 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5858 5859 def _parse_number(self) -> t.Optional[exp.Expression]: 5860 if self._match_set(self.NUMERIC_PARSERS): 5861 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5862 return self._parse_placeholder() 5863 5864 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5865 if self._match(TokenType.IDENTIFIER): 5866 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5867 return self._parse_placeholder() 5868 5869 def _parse_var( 5870 self, 5871 any_token: bool = False, 5872 tokens: t.Optional[t.Collection[TokenType]] = None, 5873 upper: bool = False, 5874 ) -> t.Optional[exp.Expression]: 5875 if ( 5876 (any_token and self._advance_any()) 5877 or self._match(TokenType.VAR) 5878 or (self._match_set(tokens) if tokens else False) 5879 ): 5880 return self.expression( 5881 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5882 ) 5883 return self._parse_placeholder() 5884 5885 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5886 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5887 self._advance() 5888 return self._prev 5889 return None 5890 5891 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5892 return self._parse_var() or self._parse_string() 5893 5894 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5895 return self._parse_primary() or self._parse_var(any_token=True) 5896 5897 def _parse_null(self) -> t.Optional[exp.Expression]: 5898 if self._match_set(self.NULL_TOKENS): 5899 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5900 return self._parse_placeholder() 5901 5902 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5903 if self._match(TokenType.TRUE): 5904 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5905 if self._match(TokenType.FALSE): 5906 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5907 return self._parse_placeholder() 5908 5909 def _parse_star(self) -> t.Optional[exp.Expression]: 5910 if self._match(TokenType.STAR): 5911 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5912 return self._parse_placeholder() 5913 5914 def _parse_parameter(self) -> exp.Parameter: 5915 this = self._parse_identifier() or self._parse_primary_or_var() 5916 return self.expression(exp.Parameter, this=this) 5917 5918 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5919 if self._match_set(self.PLACEHOLDER_PARSERS): 5920 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5921 if placeholder: 5922 return placeholder 5923 self._advance(-1) 5924 return None 5925 5926 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5927 if not self._match_texts(keywords): 5928 return None 5929 if self._match(TokenType.L_PAREN, advance=False): 5930 return self._parse_wrapped_csv(self._parse_expression) 5931 5932 expression = self._parse_expression() 5933 return [expression] if expression else None 5934 5935 def _parse_csv( 5936 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5937 ) -> t.List[exp.Expression]: 5938 parse_result = parse_method() 5939 items = [parse_result] if parse_result is not None else [] 5940 5941 while self._match(sep): 5942 self._add_comments(parse_result) 5943 parse_result = parse_method() 5944 if parse_result is not None: 5945 items.append(parse_result) 5946 5947 return items 5948 5949 def _parse_tokens( 5950 self, parse_method: t.Callable, expressions: t.Dict 5951 ) -> t.Optional[exp.Expression]: 5952 this = parse_method() 5953 5954 while self._match_set(expressions): 5955 this = self.expression( 5956 expressions[self._prev.token_type], 5957 this=this, 5958 comments=self._prev_comments, 5959 expression=parse_method(), 5960 ) 5961 5962 return this 5963 5964 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5965 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5966 5967 def _parse_wrapped_csv( 5968 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5969 ) -> t.List[exp.Expression]: 5970 return self._parse_wrapped( 5971 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5972 ) 5973 5974 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5975 wrapped = self._match(TokenType.L_PAREN) 5976 if not wrapped and not optional: 5977 self.raise_error("Expecting (") 5978 parse_result = parse_method() 5979 if wrapped: 5980 self._match_r_paren() 5981 return parse_result 5982 5983 def _parse_expressions(self) -> t.List[exp.Expression]: 5984 return self._parse_csv(self._parse_expression) 5985 5986 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5987 return self._parse_select() or self._parse_set_operations( 5988 self._parse_expression() if alias else self._parse_assignment() 5989 ) 5990 5991 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5992 return self._parse_query_modifiers( 5993 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5994 ) 5995 5996 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5997 this = None 5998 if self._match_texts(self.TRANSACTION_KIND): 5999 this = self._prev.text 6000 6001 self._match_texts(("TRANSACTION", "WORK")) 6002 6003 modes = [] 6004 while True: 6005 mode = [] 6006 while self._match(TokenType.VAR): 6007 mode.append(self._prev.text) 6008 6009 if mode: 6010 modes.append(" ".join(mode)) 6011 if not self._match(TokenType.COMMA): 6012 break 6013 6014 return self.expression(exp.Transaction, this=this, modes=modes) 6015 6016 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6017 chain = None 6018 savepoint = None 6019 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6020 6021 self._match_texts(("TRANSACTION", "WORK")) 6022 6023 if self._match_text_seq("TO"): 6024 self._match_text_seq("SAVEPOINT") 6025 savepoint = self._parse_id_var() 6026 6027 if self._match(TokenType.AND): 6028 chain = not self._match_text_seq("NO") 6029 self._match_text_seq("CHAIN") 6030 6031 if is_rollback: 6032 return self.expression(exp.Rollback, savepoint=savepoint) 6033 6034 return self.expression(exp.Commit, chain=chain) 6035 6036 def _parse_refresh(self) -> exp.Refresh: 6037 self._match(TokenType.TABLE) 6038 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6039 6040 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6041 if not self._match_text_seq("ADD"): 6042 return None 6043 6044 self._match(TokenType.COLUMN) 6045 exists_column = self._parse_exists(not_=True) 6046 expression = self._parse_field_def() 6047 6048 if expression: 6049 expression.set("exists", exists_column) 6050 6051 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6052 if self._match_texts(("FIRST", "AFTER")): 6053 position = self._prev.text 6054 column_position = self.expression( 6055 exp.ColumnPosition, this=self._parse_column(), position=position 6056 ) 6057 expression.set("position", column_position) 6058 6059 return expression 6060 6061 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6062 drop = self._match(TokenType.DROP) and self._parse_drop() 6063 if drop and not isinstance(drop, exp.Command): 6064 drop.set("kind", drop.args.get("kind", "COLUMN")) 6065 return drop 6066 6067 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6068 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6069 return self.expression( 6070 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6071 ) 6072 6073 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6074 index = self._index - 1 6075 6076 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6077 return self._parse_csv( 6078 lambda: self.expression( 6079 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6080 ) 6081 ) 6082 6083 self._retreat(index) 6084 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6085 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6086 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6087 6088 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6089 if self._match_texts(self.ALTER_ALTER_PARSERS): 6090 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6091 6092 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6093 # keyword after ALTER we default to parsing this statement 6094 self._match(TokenType.COLUMN) 6095 column = self._parse_field(any_token=True) 6096 6097 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6098 return self.expression(exp.AlterColumn, this=column, drop=True) 6099 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6100 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6101 if self._match(TokenType.COMMENT): 6102 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6103 if self._match_text_seq("DROP", "NOT", "NULL"): 6104 return self.expression( 6105 exp.AlterColumn, 6106 this=column, 6107 drop=True, 6108 allow_null=True, 6109 ) 6110 if self._match_text_seq("SET", "NOT", "NULL"): 6111 return self.expression( 6112 exp.AlterColumn, 6113 this=column, 6114 allow_null=False, 6115 ) 6116 self._match_text_seq("SET", "DATA") 6117 self._match_text_seq("TYPE") 6118 return self.expression( 6119 exp.AlterColumn, 6120 this=column, 6121 dtype=self._parse_types(), 6122 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6123 using=self._match(TokenType.USING) and self._parse_assignment(), 6124 ) 6125 6126 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6127 if self._match_texts(("ALL", "EVEN", "AUTO")): 6128 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6129 6130 self._match_text_seq("KEY", "DISTKEY") 6131 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6132 6133 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6134 if compound: 6135 self._match_text_seq("SORTKEY") 6136 6137 if self._match(TokenType.L_PAREN, advance=False): 6138 return self.expression( 6139 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6140 ) 6141 6142 self._match_texts(("AUTO", "NONE")) 6143 return self.expression( 6144 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6145 ) 6146 6147 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6148 index = self._index - 1 6149 6150 partition_exists = self._parse_exists() 6151 if self._match(TokenType.PARTITION, advance=False): 6152 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6153 6154 self._retreat(index) 6155 return self._parse_csv(self._parse_drop_column) 6156 6157 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6158 if self._match(TokenType.COLUMN): 6159 exists = self._parse_exists() 6160 old_column = self._parse_column() 6161 to = self._match_text_seq("TO") 6162 new_column = self._parse_column() 6163 6164 if old_column is None or to is None or new_column is None: 6165 return None 6166 6167 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6168 6169 self._match_text_seq("TO") 6170 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6171 6172 def _parse_alter_table_set(self) -> exp.AlterSet: 6173 alter_set = self.expression(exp.AlterSet) 6174 6175 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6176 "TABLE", "PROPERTIES" 6177 ): 6178 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6179 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6180 alter_set.set("expressions", [self._parse_assignment()]) 6181 elif self._match_texts(("LOGGED", "UNLOGGED")): 6182 alter_set.set("option", exp.var(self._prev.text.upper())) 6183 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6184 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6185 elif self._match_text_seq("LOCATION"): 6186 alter_set.set("location", self._parse_field()) 6187 elif self._match_text_seq("ACCESS", "METHOD"): 6188 alter_set.set("access_method", self._parse_field()) 6189 elif self._match_text_seq("TABLESPACE"): 6190 alter_set.set("tablespace", self._parse_field()) 6191 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6192 alter_set.set("file_format", [self._parse_field()]) 6193 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6194 alter_set.set("file_format", self._parse_wrapped_options()) 6195 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6196 alter_set.set("copy_options", self._parse_wrapped_options()) 6197 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6198 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6199 else: 6200 if self._match_text_seq("SERDE"): 6201 alter_set.set("serde", self._parse_field()) 6202 6203 alter_set.set("expressions", [self._parse_properties()]) 6204 6205 return alter_set 6206 6207 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6208 start = self._prev 6209 6210 if not self._match(TokenType.TABLE): 6211 return self._parse_as_command(start) 6212 6213 exists = self._parse_exists() 6214 only = self._match_text_seq("ONLY") 6215 this = self._parse_table(schema=True) 6216 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6217 6218 if self._next: 6219 self._advance() 6220 6221 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6222 if parser: 6223 actions = ensure_list(parser(self)) 6224 options = self._parse_csv(self._parse_property) 6225 6226 if not self._curr and actions: 6227 return self.expression( 6228 exp.AlterTable, 6229 this=this, 6230 exists=exists, 6231 actions=actions, 6232 only=only, 6233 options=options, 6234 cluster=cluster, 6235 ) 6236 6237 return self._parse_as_command(start) 6238 6239 def _parse_merge(self) -> exp.Merge: 6240 self._match(TokenType.INTO) 6241 target = self._parse_table() 6242 6243 if target and self._match(TokenType.ALIAS, advance=False): 6244 target.set("alias", self._parse_table_alias()) 6245 6246 self._match(TokenType.USING) 6247 using = self._parse_table() 6248 6249 self._match(TokenType.ON) 6250 on = self._parse_assignment() 6251 6252 return self.expression( 6253 exp.Merge, 6254 this=target, 6255 using=using, 6256 on=on, 6257 expressions=self._parse_when_matched(), 6258 ) 6259 6260 def _parse_when_matched(self) -> t.List[exp.When]: 6261 whens = [] 6262 6263 while self._match(TokenType.WHEN): 6264 matched = not self._match(TokenType.NOT) 6265 self._match_text_seq("MATCHED") 6266 source = ( 6267 False 6268 if self._match_text_seq("BY", "TARGET") 6269 else self._match_text_seq("BY", "SOURCE") 6270 ) 6271 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6272 6273 self._match(TokenType.THEN) 6274 6275 if self._match(TokenType.INSERT): 6276 _this = self._parse_star() 6277 if _this: 6278 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6279 else: 6280 then = self.expression( 6281 exp.Insert, 6282 this=self._parse_value(), 6283 expression=self._match_text_seq("VALUES") and self._parse_value(), 6284 ) 6285 elif self._match(TokenType.UPDATE): 6286 expressions = self._parse_star() 6287 if expressions: 6288 then = self.expression(exp.Update, expressions=expressions) 6289 else: 6290 then = self.expression( 6291 exp.Update, 6292 expressions=self._match(TokenType.SET) 6293 and self._parse_csv(self._parse_equality), 6294 ) 6295 elif self._match(TokenType.DELETE): 6296 then = self.expression(exp.Var, this=self._prev.text) 6297 else: 6298 then = None 6299 6300 whens.append( 6301 self.expression( 6302 exp.When, 6303 matched=matched, 6304 source=source, 6305 condition=condition, 6306 then=then, 6307 ) 6308 ) 6309 return whens 6310 6311 def _parse_show(self) -> t.Optional[exp.Expression]: 6312 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6313 if parser: 6314 return parser(self) 6315 return self._parse_as_command(self._prev) 6316 6317 def _parse_set_item_assignment( 6318 self, kind: t.Optional[str] = None 6319 ) -> t.Optional[exp.Expression]: 6320 index = self._index 6321 6322 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6323 return self._parse_set_transaction(global_=kind == "GLOBAL") 6324 6325 left = self._parse_primary() or self._parse_column() 6326 assignment_delimiter = self._match_texts(("=", "TO")) 6327 6328 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6329 self._retreat(index) 6330 return None 6331 6332 right = self._parse_statement() or self._parse_id_var() 6333 if isinstance(right, (exp.Column, exp.Identifier)): 6334 right = exp.var(right.name) 6335 6336 this = self.expression(exp.EQ, this=left, expression=right) 6337 return self.expression(exp.SetItem, this=this, kind=kind) 6338 6339 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6340 self._match_text_seq("TRANSACTION") 6341 characteristics = self._parse_csv( 6342 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6343 ) 6344 return self.expression( 6345 exp.SetItem, 6346 expressions=characteristics, 6347 kind="TRANSACTION", 6348 **{"global": global_}, # type: ignore 6349 ) 6350 6351 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6352 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6353 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6354 6355 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6356 index = self._index 6357 set_ = self.expression( 6358 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6359 ) 6360 6361 if self._curr: 6362 self._retreat(index) 6363 return self._parse_as_command(self._prev) 6364 6365 return set_ 6366 6367 def _parse_var_from_options( 6368 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6369 ) -> t.Optional[exp.Var]: 6370 start = self._curr 6371 if not start: 6372 return None 6373 6374 option = start.text.upper() 6375 continuations = options.get(option) 6376 6377 index = self._index 6378 self._advance() 6379 for keywords in continuations or []: 6380 if isinstance(keywords, str): 6381 keywords = (keywords,) 6382 6383 if self._match_text_seq(*keywords): 6384 option = f"{option} {' '.join(keywords)}" 6385 break 6386 else: 6387 if continuations or continuations is None: 6388 if raise_unmatched: 6389 self.raise_error(f"Unknown option {option}") 6390 6391 self._retreat(index) 6392 return None 6393 6394 return exp.var(option) 6395 6396 def _parse_as_command(self, start: Token) -> exp.Command: 6397 while self._curr: 6398 self._advance() 6399 text = self._find_sql(start, self._prev) 6400 size = len(start.text) 6401 self._warn_unsupported() 6402 return exp.Command(this=text[:size], expression=text[size:]) 6403 6404 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6405 settings = [] 6406 6407 self._match_l_paren() 6408 kind = self._parse_id_var() 6409 6410 if self._match(TokenType.L_PAREN): 6411 while True: 6412 key = self._parse_id_var() 6413 value = self._parse_primary() 6414 6415 if not key and value is None: 6416 break 6417 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6418 self._match(TokenType.R_PAREN) 6419 6420 self._match_r_paren() 6421 6422 return self.expression( 6423 exp.DictProperty, 6424 this=this, 6425 kind=kind.this if kind else None, 6426 settings=settings, 6427 ) 6428 6429 def _parse_dict_range(self, this: str) -> exp.DictRange: 6430 self._match_l_paren() 6431 has_min = self._match_text_seq("MIN") 6432 if has_min: 6433 min = self._parse_var() or self._parse_primary() 6434 self._match_text_seq("MAX") 6435 max = self._parse_var() or self._parse_primary() 6436 else: 6437 max = self._parse_var() or self._parse_primary() 6438 min = exp.Literal.number(0) 6439 self._match_r_paren() 6440 return self.expression(exp.DictRange, this=this, min=min, max=max) 6441 6442 def _parse_comprehension( 6443 self, this: t.Optional[exp.Expression] 6444 ) -> t.Optional[exp.Comprehension]: 6445 index = self._index 6446 expression = self._parse_column() 6447 if not self._match(TokenType.IN): 6448 self._retreat(index - 1) 6449 return None 6450 iterator = self._parse_column() 6451 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6452 return self.expression( 6453 exp.Comprehension, 6454 this=this, 6455 expression=expression, 6456 iterator=iterator, 6457 condition=condition, 6458 ) 6459 6460 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6461 if self._match(TokenType.HEREDOC_STRING): 6462 return self.expression(exp.Heredoc, this=self._prev.text) 6463 6464 if not self._match_text_seq("$"): 6465 return None 6466 6467 tags = ["$"] 6468 tag_text = None 6469 6470 if self._is_connected(): 6471 self._advance() 6472 tags.append(self._prev.text.upper()) 6473 else: 6474 self.raise_error("No closing $ found") 6475 6476 if tags[-1] != "$": 6477 if self._is_connected() and self._match_text_seq("$"): 6478 tag_text = tags[-1] 6479 tags.append("$") 6480 else: 6481 self.raise_error("No closing $ found") 6482 6483 heredoc_start = self._curr 6484 6485 while self._curr: 6486 if self._match_text_seq(*tags, advance=False): 6487 this = self._find_sql(heredoc_start, self._prev) 6488 self._advance(len(tags)) 6489 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6490 6491 self._advance() 6492 6493 self.raise_error(f"No closing {''.join(tags)} found") 6494 return None 6495 6496 def _find_parser( 6497 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6498 ) -> t.Optional[t.Callable]: 6499 if not self._curr: 6500 return None 6501 6502 index = self._index 6503 this = [] 6504 while True: 6505 # The current token might be multiple words 6506 curr = self._curr.text.upper() 6507 key = curr.split(" ") 6508 this.append(curr) 6509 6510 self._advance() 6511 result, trie = in_trie(trie, key) 6512 if result == TrieResult.FAILED: 6513 break 6514 6515 if result == TrieResult.EXISTS: 6516 subparser = parsers[" ".join(this)] 6517 return subparser 6518 6519 self._retreat(index) 6520 return None 6521 6522 def _match(self, token_type, advance=True, expression=None): 6523 if not self._curr: 6524 return None 6525 6526 if self._curr.token_type == token_type: 6527 if advance: 6528 self._advance() 6529 self._add_comments(expression) 6530 return True 6531 6532 return None 6533 6534 def _match_set(self, types, advance=True): 6535 if not self._curr: 6536 return None 6537 6538 if self._curr.token_type in types: 6539 if advance: 6540 self._advance() 6541 return True 6542 6543 return None 6544 6545 def _match_pair(self, token_type_a, token_type_b, advance=True): 6546 if not self._curr or not self._next: 6547 return None 6548 6549 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6550 if advance: 6551 self._advance(2) 6552 return True 6553 6554 return None 6555 6556 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6557 if not self._match(TokenType.L_PAREN, expression=expression): 6558 self.raise_error("Expecting (") 6559 6560 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6561 if not self._match(TokenType.R_PAREN, expression=expression): 6562 self.raise_error("Expecting )") 6563 6564 def _match_texts(self, texts, advance=True): 6565 if self._curr and self._curr.text.upper() in texts: 6566 if advance: 6567 self._advance() 6568 return True 6569 return None 6570 6571 def _match_text_seq(self, *texts, advance=True): 6572 index = self._index 6573 for text in texts: 6574 if self._curr and self._curr.text.upper() == text: 6575 self._advance() 6576 else: 6577 self._retreat(index) 6578 return None 6579 6580 if not advance: 6581 self._retreat(index) 6582 6583 return True 6584 6585 def _replace_lambda( 6586 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6587 ) -> t.Optional[exp.Expression]: 6588 if not node: 6589 return node 6590 6591 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6592 6593 for column in node.find_all(exp.Column): 6594 typ = lambda_types.get(column.parts[0].name) 6595 if typ is not None: 6596 dot_or_id = column.to_dot() if column.table else column.this 6597 6598 if typ: 6599 dot_or_id = self.expression( 6600 exp.Cast, 6601 this=dot_or_id, 6602 to=typ, 6603 ) 6604 6605 parent = column.parent 6606 6607 while isinstance(parent, exp.Dot): 6608 if not isinstance(parent.parent, exp.Dot): 6609 parent.replace(dot_or_id) 6610 break 6611 parent = parent.parent 6612 else: 6613 if column is node: 6614 node = dot_or_id 6615 else: 6616 column.replace(dot_or_id) 6617 return node 6618 6619 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6620 start = self._prev 6621 6622 # Not to be confused with TRUNCATE(number, decimals) function call 6623 if self._match(TokenType.L_PAREN): 6624 self._retreat(self._index - 2) 6625 return self._parse_function() 6626 6627 # Clickhouse supports TRUNCATE DATABASE as well 6628 is_database = self._match(TokenType.DATABASE) 6629 6630 self._match(TokenType.TABLE) 6631 6632 exists = self._parse_exists(not_=False) 6633 6634 expressions = self._parse_csv( 6635 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6636 ) 6637 6638 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6639 6640 if self._match_text_seq("RESTART", "IDENTITY"): 6641 identity = "RESTART" 6642 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6643 identity = "CONTINUE" 6644 else: 6645 identity = None 6646 6647 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6648 option = self._prev.text 6649 else: 6650 option = None 6651 6652 partition = self._parse_partition() 6653 6654 # Fallback case 6655 if self._curr: 6656 return self._parse_as_command(start) 6657 6658 return self.expression( 6659 exp.TruncateTable, 6660 expressions=expressions, 6661 is_database=is_database, 6662 exists=exists, 6663 cluster=cluster, 6664 identity=identity, 6665 option=option, 6666 partition=partition, 6667 ) 6668 6669 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6670 this = self._parse_ordered(self._parse_opclass) 6671 6672 if not self._match(TokenType.WITH): 6673 return this 6674 6675 op = self._parse_var(any_token=True) 6676 6677 return self.expression(exp.WithOperator, this=this, op=op) 6678 6679 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6680 self._match(TokenType.EQ) 6681 self._match(TokenType.L_PAREN) 6682 6683 opts: t.List[t.Optional[exp.Expression]] = [] 6684 while self._curr and not self._match(TokenType.R_PAREN): 6685 if self._match_text_seq("FORMAT_NAME", "="): 6686 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6687 # so we parse it separately to use _parse_field() 6688 prop = self.expression( 6689 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6690 ) 6691 opts.append(prop) 6692 else: 6693 opts.append(self._parse_property()) 6694 6695 self._match(TokenType.COMMA) 6696 6697 return opts 6698 6699 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6700 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6701 6702 options = [] 6703 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6704 option = self._parse_var(any_token=True) 6705 prev = self._prev.text.upper() 6706 6707 # Different dialects might separate options and values by white space, "=" and "AS" 6708 self._match(TokenType.EQ) 6709 self._match(TokenType.ALIAS) 6710 6711 param = self.expression(exp.CopyParameter, this=option) 6712 6713 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6714 TokenType.L_PAREN, advance=False 6715 ): 6716 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6717 param.set("expressions", self._parse_wrapped_options()) 6718 elif prev == "FILE_FORMAT": 6719 # T-SQL's external file format case 6720 param.set("expression", self._parse_field()) 6721 else: 6722 param.set("expression", self._parse_unquoted_field()) 6723 6724 options.append(param) 6725 self._match(sep) 6726 6727 return options 6728 6729 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6730 expr = self.expression(exp.Credentials) 6731 6732 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6733 expr.set("storage", self._parse_field()) 6734 if self._match_text_seq("CREDENTIALS"): 6735 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6736 creds = ( 6737 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6738 ) 6739 expr.set("credentials", creds) 6740 if self._match_text_seq("ENCRYPTION"): 6741 expr.set("encryption", self._parse_wrapped_options()) 6742 if self._match_text_seq("IAM_ROLE"): 6743 expr.set("iam_role", self._parse_field()) 6744 if self._match_text_seq("REGION"): 6745 expr.set("region", self._parse_field()) 6746 6747 return expr 6748 6749 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6750 return self._parse_field() 6751 6752 def _parse_copy(self) -> exp.Copy | exp.Command: 6753 start = self._prev 6754 6755 self._match(TokenType.INTO) 6756 6757 this = ( 6758 self._parse_select(nested=True, parse_subquery_alias=False) 6759 if self._match(TokenType.L_PAREN, advance=False) 6760 else self._parse_table(schema=True) 6761 ) 6762 6763 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6764 6765 files = self._parse_csv(self._parse_file_location) 6766 credentials = self._parse_credentials() 6767 6768 self._match_text_seq("WITH") 6769 6770 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6771 6772 # Fallback case 6773 if self._curr: 6774 return self._parse_as_command(start) 6775 6776 return self.expression( 6777 exp.Copy, 6778 this=this, 6779 kind=kind, 6780 credentials=credentials, 6781 files=files, 6782 params=params, 6783 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "MOD": build_mod, 162 "TIME_TO_TIME_STR": lambda args: exp.Cast( 163 this=seq_get(args, 0), 164 to=exp.DataType(this=exp.DataType.Type.TEXT), 165 ), 166 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 167 this=exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 start=exp.Literal.number(1), 172 length=exp.Literal.number(10), 173 ), 174 "VAR_MAP": build_var_map, 175 "LOWER": build_lower, 176 "UPPER": build_upper, 177 "HEX": build_hex, 178 "TO_HEX": build_hex, 179 } 180 181 NO_PAREN_FUNCTIONS = { 182 TokenType.CURRENT_DATE: exp.CurrentDate, 183 TokenType.CURRENT_DATETIME: exp.CurrentDate, 184 TokenType.CURRENT_TIME: exp.CurrentTime, 185 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 186 TokenType.CURRENT_USER: exp.CurrentUser, 187 } 188 189 STRUCT_TYPE_TOKENS = { 190 TokenType.NESTED, 191 TokenType.OBJECT, 192 TokenType.STRUCT, 193 } 194 195 NESTED_TYPE_TOKENS = { 196 TokenType.ARRAY, 197 TokenType.LOWCARDINALITY, 198 TokenType.MAP, 199 TokenType.NULLABLE, 200 *STRUCT_TYPE_TOKENS, 201 } 202 203 ENUM_TYPE_TOKENS = { 204 TokenType.ENUM, 205 TokenType.ENUM8, 206 TokenType.ENUM16, 207 } 208 209 AGGREGATE_TYPE_TOKENS = { 210 TokenType.AGGREGATEFUNCTION, 211 TokenType.SIMPLEAGGREGATEFUNCTION, 212 } 213 214 TYPE_TOKENS = { 215 TokenType.BIT, 216 TokenType.BOOLEAN, 217 TokenType.TINYINT, 218 TokenType.UTINYINT, 219 TokenType.SMALLINT, 220 TokenType.USMALLINT, 221 TokenType.INT, 222 TokenType.UINT, 223 TokenType.BIGINT, 224 TokenType.UBIGINT, 225 TokenType.INT128, 226 TokenType.UINT128, 227 TokenType.INT256, 228 TokenType.UINT256, 229 TokenType.MEDIUMINT, 230 TokenType.UMEDIUMINT, 231 TokenType.FIXEDSTRING, 232 TokenType.FLOAT, 233 TokenType.DOUBLE, 234 TokenType.CHAR, 235 TokenType.NCHAR, 236 TokenType.VARCHAR, 237 TokenType.NVARCHAR, 238 TokenType.BPCHAR, 239 TokenType.TEXT, 240 TokenType.MEDIUMTEXT, 241 TokenType.LONGTEXT, 242 TokenType.MEDIUMBLOB, 243 TokenType.LONGBLOB, 244 TokenType.BINARY, 245 TokenType.VARBINARY, 246 TokenType.JSON, 247 TokenType.JSONB, 248 TokenType.INTERVAL, 249 TokenType.TINYBLOB, 250 TokenType.TINYTEXT, 251 TokenType.TIME, 252 TokenType.TIMETZ, 253 TokenType.TIMESTAMP, 254 TokenType.TIMESTAMP_S, 255 TokenType.TIMESTAMP_MS, 256 TokenType.TIMESTAMP_NS, 257 TokenType.TIMESTAMPTZ, 258 TokenType.TIMESTAMPLTZ, 259 TokenType.TIMESTAMPNTZ, 260 TokenType.DATETIME, 261 TokenType.DATETIME64, 262 TokenType.DATE, 263 TokenType.DATE32, 264 TokenType.INT4RANGE, 265 TokenType.INT4MULTIRANGE, 266 TokenType.INT8RANGE, 267 TokenType.INT8MULTIRANGE, 268 TokenType.NUMRANGE, 269 TokenType.NUMMULTIRANGE, 270 TokenType.TSRANGE, 271 TokenType.TSMULTIRANGE, 272 TokenType.TSTZRANGE, 273 TokenType.TSTZMULTIRANGE, 274 TokenType.DATERANGE, 275 TokenType.DATEMULTIRANGE, 276 TokenType.DECIMAL, 277 TokenType.UDECIMAL, 278 TokenType.BIGDECIMAL, 279 TokenType.UUID, 280 TokenType.GEOGRAPHY, 281 TokenType.GEOMETRY, 282 TokenType.HLLSKETCH, 283 TokenType.HSTORE, 284 TokenType.PSEUDO_TYPE, 285 TokenType.SUPER, 286 TokenType.SERIAL, 287 TokenType.SMALLSERIAL, 288 TokenType.BIGSERIAL, 289 TokenType.XML, 290 TokenType.YEAR, 291 TokenType.UNIQUEIDENTIFIER, 292 TokenType.USERDEFINED, 293 TokenType.MONEY, 294 TokenType.SMALLMONEY, 295 TokenType.ROWVERSION, 296 TokenType.IMAGE, 297 TokenType.VARIANT, 298 TokenType.OBJECT, 299 TokenType.OBJECT_IDENTIFIER, 300 TokenType.INET, 301 TokenType.IPADDRESS, 302 TokenType.IPPREFIX, 303 TokenType.IPV4, 304 TokenType.IPV6, 305 TokenType.UNKNOWN, 306 TokenType.NULL, 307 TokenType.NAME, 308 TokenType.TDIGEST, 309 *ENUM_TYPE_TOKENS, 310 *NESTED_TYPE_TOKENS, 311 *AGGREGATE_TYPE_TOKENS, 312 } 313 314 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 315 TokenType.BIGINT: TokenType.UBIGINT, 316 TokenType.INT: TokenType.UINT, 317 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 318 TokenType.SMALLINT: TokenType.USMALLINT, 319 TokenType.TINYINT: TokenType.UTINYINT, 320 TokenType.DECIMAL: TokenType.UDECIMAL, 321 } 322 323 SUBQUERY_PREDICATES = { 324 TokenType.ANY: exp.Any, 325 TokenType.ALL: exp.All, 326 TokenType.EXISTS: exp.Exists, 327 TokenType.SOME: exp.Any, 328 } 329 330 RESERVED_TOKENS = { 331 *Tokenizer.SINGLE_TOKENS.values(), 332 TokenType.SELECT, 333 } - {TokenType.IDENTIFIER} 334 335 DB_CREATABLES = { 336 TokenType.DATABASE, 337 TokenType.DICTIONARY, 338 TokenType.MODEL, 339 TokenType.SCHEMA, 340 TokenType.SEQUENCE, 341 TokenType.STORAGE_INTEGRATION, 342 TokenType.TABLE, 343 TokenType.TAG, 344 TokenType.VIEW, 345 TokenType.WAREHOUSE, 346 TokenType.STREAMLIT, 347 } 348 349 CREATABLES = { 350 TokenType.COLUMN, 351 TokenType.CONSTRAINT, 352 TokenType.FOREIGN_KEY, 353 TokenType.FUNCTION, 354 TokenType.INDEX, 355 TokenType.PROCEDURE, 356 *DB_CREATABLES, 357 } 358 359 # Tokens that can represent identifiers 360 ID_VAR_TOKENS = { 361 TokenType.VAR, 362 TokenType.ANTI, 363 TokenType.APPLY, 364 TokenType.ASC, 365 TokenType.ASOF, 366 TokenType.AUTO_INCREMENT, 367 TokenType.BEGIN, 368 TokenType.BPCHAR, 369 TokenType.CACHE, 370 TokenType.CASE, 371 TokenType.COLLATE, 372 TokenType.COMMAND, 373 TokenType.COMMENT, 374 TokenType.COMMIT, 375 TokenType.CONSTRAINT, 376 TokenType.COPY, 377 TokenType.DEFAULT, 378 TokenType.DELETE, 379 TokenType.DESC, 380 TokenType.DESCRIBE, 381 TokenType.DICTIONARY, 382 TokenType.DIV, 383 TokenType.END, 384 TokenType.EXECUTE, 385 TokenType.ESCAPE, 386 TokenType.FALSE, 387 TokenType.FIRST, 388 TokenType.FILTER, 389 TokenType.FINAL, 390 TokenType.FORMAT, 391 TokenType.FULL, 392 TokenType.IDENTIFIER, 393 TokenType.IS, 394 TokenType.ISNULL, 395 TokenType.INTERVAL, 396 TokenType.KEEP, 397 TokenType.KILL, 398 TokenType.LEFT, 399 TokenType.LOAD, 400 TokenType.MERGE, 401 TokenType.NATURAL, 402 TokenType.NEXT, 403 TokenType.OFFSET, 404 TokenType.OPERATOR, 405 TokenType.ORDINALITY, 406 TokenType.OVERLAPS, 407 TokenType.OVERWRITE, 408 TokenType.PARTITION, 409 TokenType.PERCENT, 410 TokenType.PIVOT, 411 TokenType.PRAGMA, 412 TokenType.RANGE, 413 TokenType.RECURSIVE, 414 TokenType.REFERENCES, 415 TokenType.REFRESH, 416 TokenType.REPLACE, 417 TokenType.RIGHT, 418 TokenType.ROLLUP, 419 TokenType.ROW, 420 TokenType.ROWS, 421 TokenType.SEMI, 422 TokenType.SET, 423 TokenType.SETTINGS, 424 TokenType.SHOW, 425 TokenType.TEMPORARY, 426 TokenType.TOP, 427 TokenType.TRUE, 428 TokenType.TRUNCATE, 429 TokenType.UNIQUE, 430 TokenType.UNNEST, 431 TokenType.UNPIVOT, 432 TokenType.UPDATE, 433 TokenType.USE, 434 TokenType.VOLATILE, 435 TokenType.WINDOW, 436 *CREATABLES, 437 *SUBQUERY_PREDICATES, 438 *TYPE_TOKENS, 439 *NO_PAREN_FUNCTIONS, 440 } 441 442 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 443 444 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 445 TokenType.ANTI, 446 TokenType.APPLY, 447 TokenType.ASOF, 448 TokenType.FULL, 449 TokenType.LEFT, 450 TokenType.LOCK, 451 TokenType.NATURAL, 452 TokenType.OFFSET, 453 TokenType.RIGHT, 454 TokenType.SEMI, 455 TokenType.WINDOW, 456 } 457 458 ALIAS_TOKENS = ID_VAR_TOKENS 459 460 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 461 462 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 463 464 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 465 466 FUNC_TOKENS = { 467 TokenType.COLLATE, 468 TokenType.COMMAND, 469 TokenType.CURRENT_DATE, 470 TokenType.CURRENT_DATETIME, 471 TokenType.CURRENT_TIMESTAMP, 472 TokenType.CURRENT_TIME, 473 TokenType.CURRENT_USER, 474 TokenType.FILTER, 475 TokenType.FIRST, 476 TokenType.FORMAT, 477 TokenType.GLOB, 478 TokenType.IDENTIFIER, 479 TokenType.INDEX, 480 TokenType.ISNULL, 481 TokenType.ILIKE, 482 TokenType.INSERT, 483 TokenType.LIKE, 484 TokenType.MERGE, 485 TokenType.OFFSET, 486 TokenType.PRIMARY_KEY, 487 TokenType.RANGE, 488 TokenType.REPLACE, 489 TokenType.RLIKE, 490 TokenType.ROW, 491 TokenType.UNNEST, 492 TokenType.VAR, 493 TokenType.LEFT, 494 TokenType.RIGHT, 495 TokenType.SEQUENCE, 496 TokenType.DATE, 497 TokenType.DATETIME, 498 TokenType.TABLE, 499 TokenType.TIMESTAMP, 500 TokenType.TIMESTAMPTZ, 501 TokenType.TRUNCATE, 502 TokenType.WINDOW, 503 TokenType.XOR, 504 *TYPE_TOKENS, 505 *SUBQUERY_PREDICATES, 506 } 507 508 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 509 TokenType.AND: exp.And, 510 } 511 512 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 513 TokenType.COLON_EQ: exp.PropertyEQ, 514 } 515 516 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 517 TokenType.OR: exp.Or, 518 } 519 520 EQUALITY = { 521 TokenType.EQ: exp.EQ, 522 TokenType.NEQ: exp.NEQ, 523 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 524 } 525 526 COMPARISON = { 527 TokenType.GT: exp.GT, 528 TokenType.GTE: exp.GTE, 529 TokenType.LT: exp.LT, 530 TokenType.LTE: exp.LTE, 531 } 532 533 BITWISE = { 534 TokenType.AMP: exp.BitwiseAnd, 535 TokenType.CARET: exp.BitwiseXor, 536 TokenType.PIPE: exp.BitwiseOr, 537 } 538 539 TERM = { 540 TokenType.DASH: exp.Sub, 541 TokenType.PLUS: exp.Add, 542 TokenType.MOD: exp.Mod, 543 TokenType.COLLATE: exp.Collate, 544 } 545 546 FACTOR = { 547 TokenType.DIV: exp.IntDiv, 548 TokenType.LR_ARROW: exp.Distance, 549 TokenType.SLASH: exp.Div, 550 TokenType.STAR: exp.Mul, 551 } 552 553 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 554 555 TIMES = { 556 TokenType.TIME, 557 TokenType.TIMETZ, 558 } 559 560 TIMESTAMPS = { 561 TokenType.TIMESTAMP, 562 TokenType.TIMESTAMPTZ, 563 TokenType.TIMESTAMPLTZ, 564 *TIMES, 565 } 566 567 SET_OPERATIONS = { 568 TokenType.UNION, 569 TokenType.INTERSECT, 570 TokenType.EXCEPT, 571 } 572 573 JOIN_METHODS = { 574 TokenType.ASOF, 575 TokenType.NATURAL, 576 TokenType.POSITIONAL, 577 } 578 579 JOIN_SIDES = { 580 TokenType.LEFT, 581 TokenType.RIGHT, 582 TokenType.FULL, 583 } 584 585 JOIN_KINDS = { 586 TokenType.INNER, 587 TokenType.OUTER, 588 TokenType.CROSS, 589 TokenType.SEMI, 590 TokenType.ANTI, 591 } 592 593 JOIN_HINTS: t.Set[str] = set() 594 595 LAMBDAS = { 596 TokenType.ARROW: lambda self, expressions: self.expression( 597 exp.Lambda, 598 this=self._replace_lambda( 599 self._parse_assignment(), 600 expressions, 601 ), 602 expressions=expressions, 603 ), 604 TokenType.FARROW: lambda self, expressions: self.expression( 605 exp.Kwarg, 606 this=exp.var(expressions[0].name), 607 expression=self._parse_assignment(), 608 ), 609 } 610 611 COLUMN_OPERATORS = { 612 TokenType.DOT: None, 613 TokenType.DCOLON: lambda self, this, to: self.expression( 614 exp.Cast if self.STRICT_CAST else exp.TryCast, 615 this=this, 616 to=to, 617 ), 618 TokenType.ARROW: lambda self, this, path: self.expression( 619 exp.JSONExtract, 620 this=this, 621 expression=self.dialect.to_json_path(path), 622 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 623 ), 624 TokenType.DARROW: lambda self, this, path: self.expression( 625 exp.JSONExtractScalar, 626 this=this, 627 expression=self.dialect.to_json_path(path), 628 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 629 ), 630 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 631 exp.JSONBExtract, 632 this=this, 633 expression=path, 634 ), 635 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 636 exp.JSONBExtractScalar, 637 this=this, 638 expression=path, 639 ), 640 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 641 exp.JSONBContains, 642 this=this, 643 expression=key, 644 ), 645 } 646 647 EXPRESSION_PARSERS = { 648 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 649 exp.Column: lambda self: self._parse_column(), 650 exp.Condition: lambda self: self._parse_assignment(), 651 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 652 exp.Expression: lambda self: self._parse_expression(), 653 exp.From: lambda self: self._parse_from(joins=True), 654 exp.Group: lambda self: self._parse_group(), 655 exp.Having: lambda self: self._parse_having(), 656 exp.Identifier: lambda self: self._parse_id_var(), 657 exp.Join: lambda self: self._parse_join(), 658 exp.Lambda: lambda self: self._parse_lambda(), 659 exp.Lateral: lambda self: self._parse_lateral(), 660 exp.Limit: lambda self: self._parse_limit(), 661 exp.Offset: lambda self: self._parse_offset(), 662 exp.Order: lambda self: self._parse_order(), 663 exp.Ordered: lambda self: self._parse_ordered(), 664 exp.Properties: lambda self: self._parse_properties(), 665 exp.Qualify: lambda self: self._parse_qualify(), 666 exp.Returning: lambda self: self._parse_returning(), 667 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 668 exp.Table: lambda self: self._parse_table_parts(), 669 exp.TableAlias: lambda self: self._parse_table_alias(), 670 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 671 exp.Where: lambda self: self._parse_where(), 672 exp.Window: lambda self: self._parse_named_window(), 673 exp.With: lambda self: self._parse_with(), 674 "JOIN_TYPE": lambda self: self._parse_join_parts(), 675 } 676 677 STATEMENT_PARSERS = { 678 TokenType.ALTER: lambda self: self._parse_alter(), 679 TokenType.BEGIN: lambda self: self._parse_transaction(), 680 TokenType.CACHE: lambda self: self._parse_cache(), 681 TokenType.COMMENT: lambda self: self._parse_comment(), 682 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 683 TokenType.COPY: lambda self: self._parse_copy(), 684 TokenType.CREATE: lambda self: self._parse_create(), 685 TokenType.DELETE: lambda self: self._parse_delete(), 686 TokenType.DESC: lambda self: self._parse_describe(), 687 TokenType.DESCRIBE: lambda self: self._parse_describe(), 688 TokenType.DROP: lambda self: self._parse_drop(), 689 TokenType.INSERT: lambda self: self._parse_insert(), 690 TokenType.KILL: lambda self: self._parse_kill(), 691 TokenType.LOAD: lambda self: self._parse_load(), 692 TokenType.MERGE: lambda self: self._parse_merge(), 693 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 694 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 695 TokenType.REFRESH: lambda self: self._parse_refresh(), 696 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 697 TokenType.SET: lambda self: self._parse_set(), 698 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 699 TokenType.UNCACHE: lambda self: self._parse_uncache(), 700 TokenType.UPDATE: lambda self: self._parse_update(), 701 TokenType.USE: lambda self: self.expression( 702 exp.Use, 703 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 704 this=self._parse_table(schema=False), 705 ), 706 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 707 } 708 709 UNARY_PARSERS = { 710 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 711 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 712 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 713 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 714 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 715 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 716 } 717 718 STRING_PARSERS = { 719 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 720 exp.RawString, this=token.text 721 ), 722 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 723 exp.National, this=token.text 724 ), 725 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 726 TokenType.STRING: lambda self, token: self.expression( 727 exp.Literal, this=token.text, is_string=True 728 ), 729 TokenType.UNICODE_STRING: lambda self, token: self.expression( 730 exp.UnicodeString, 731 this=token.text, 732 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 733 ), 734 } 735 736 NUMERIC_PARSERS = { 737 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 738 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 739 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 740 TokenType.NUMBER: lambda self, token: self.expression( 741 exp.Literal, this=token.text, is_string=False 742 ), 743 } 744 745 PRIMARY_PARSERS = { 746 **STRING_PARSERS, 747 **NUMERIC_PARSERS, 748 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 749 TokenType.NULL: lambda self, _: self.expression(exp.Null), 750 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 751 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 752 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 753 TokenType.STAR: lambda self, _: self.expression( 754 exp.Star, 755 **{ 756 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 757 "replace": self._parse_star_op("REPLACE"), 758 "rename": self._parse_star_op("RENAME"), 759 }, 760 ), 761 } 762 763 PLACEHOLDER_PARSERS = { 764 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 765 TokenType.PARAMETER: lambda self: self._parse_parameter(), 766 TokenType.COLON: lambda self: ( 767 self.expression(exp.Placeholder, this=self._prev.text) 768 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 769 else None 770 ), 771 } 772 773 RANGE_PARSERS = { 774 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 775 TokenType.GLOB: binary_range_parser(exp.Glob), 776 TokenType.ILIKE: binary_range_parser(exp.ILike), 777 TokenType.IN: lambda self, this: self._parse_in(this), 778 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 779 TokenType.IS: lambda self, this: self._parse_is(this), 780 TokenType.LIKE: binary_range_parser(exp.Like), 781 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 782 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 783 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 784 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 785 } 786 787 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 788 "ALLOWED_VALUES": lambda self: self.expression( 789 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 790 ), 791 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 792 "AUTO": lambda self: self._parse_auto_property(), 793 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 794 "BACKUP": lambda self: self.expression( 795 exp.BackupProperty, this=self._parse_var(any_token=True) 796 ), 797 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 798 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 799 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 800 "CHECKSUM": lambda self: self._parse_checksum(), 801 "CLUSTER BY": lambda self: self._parse_cluster(), 802 "CLUSTERED": lambda self: self._parse_clustered_by(), 803 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 804 exp.CollateProperty, **kwargs 805 ), 806 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 807 "CONTAINS": lambda self: self._parse_contains_property(), 808 "COPY": lambda self: self._parse_copy_property(), 809 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 810 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 811 "DEFINER": lambda self: self._parse_definer(), 812 "DETERMINISTIC": lambda self: self.expression( 813 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 814 ), 815 "DISTKEY": lambda self: self._parse_distkey(), 816 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 817 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 818 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 819 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 820 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 821 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "FREESPACE": lambda self: self._parse_freespace(), 823 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 824 "HEAP": lambda self: self.expression(exp.HeapProperty), 825 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 826 "IMMUTABLE": lambda self: self.expression( 827 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 828 ), 829 "INHERITS": lambda self: self.expression( 830 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 831 ), 832 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 833 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 834 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 835 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 836 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 837 "LIKE": lambda self: self._parse_create_like(), 838 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 839 "LOCK": lambda self: self._parse_locking(), 840 "LOCKING": lambda self: self._parse_locking(), 841 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 842 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 843 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 844 "MODIFIES": lambda self: self._parse_modifies_property(), 845 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 846 "NO": lambda self: self._parse_no_property(), 847 "ON": lambda self: self._parse_on_property(), 848 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 849 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 850 "PARTITION": lambda self: self._parse_partitioned_of(), 851 "PARTITION BY": lambda self: self._parse_partitioned_by(), 852 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 853 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 854 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 855 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 856 "READS": lambda self: self._parse_reads_property(), 857 "REMOTE": lambda self: self._parse_remote_with_connection(), 858 "RETURNS": lambda self: self._parse_returns(), 859 "STRICT": lambda self: self.expression(exp.StrictProperty), 860 "ROW": lambda self: self._parse_row(), 861 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 862 "SAMPLE": lambda self: self.expression( 863 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 864 ), 865 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 866 "SETTINGS": lambda self: self.expression( 867 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 868 ), 869 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 870 "SORTKEY": lambda self: self._parse_sortkey(), 871 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 872 "STABLE": lambda self: self.expression( 873 exp.StabilityProperty, this=exp.Literal.string("STABLE") 874 ), 875 "STORED": lambda self: self._parse_stored(), 876 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 877 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 878 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 879 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 880 "TO": lambda self: self._parse_to_table(), 881 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 882 "TRANSFORM": lambda self: self.expression( 883 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 884 ), 885 "TTL": lambda self: self._parse_ttl(), 886 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 887 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 888 "VOLATILE": lambda self: self._parse_volatile_property(), 889 "WITH": lambda self: self._parse_with_property(), 890 } 891 892 CONSTRAINT_PARSERS = { 893 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 894 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 895 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 896 "CHARACTER SET": lambda self: self.expression( 897 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 898 ), 899 "CHECK": lambda self: self.expression( 900 exp.CheckColumnConstraint, 901 this=self._parse_wrapped(self._parse_assignment), 902 enforced=self._match_text_seq("ENFORCED"), 903 ), 904 "COLLATE": lambda self: self.expression( 905 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 906 ), 907 "COMMENT": lambda self: self.expression( 908 exp.CommentColumnConstraint, this=self._parse_string() 909 ), 910 "COMPRESS": lambda self: self._parse_compress(), 911 "CLUSTERED": lambda self: self.expression( 912 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 913 ), 914 "NONCLUSTERED": lambda self: self.expression( 915 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 916 ), 917 "DEFAULT": lambda self: self.expression( 918 exp.DefaultColumnConstraint, this=self._parse_bitwise() 919 ), 920 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 921 "EPHEMERAL": lambda self: self.expression( 922 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 923 ), 924 "EXCLUDE": lambda self: self.expression( 925 exp.ExcludeColumnConstraint, this=self._parse_index_params() 926 ), 927 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 928 "FORMAT": lambda self: self.expression( 929 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 930 ), 931 "GENERATED": lambda self: self._parse_generated_as_identity(), 932 "IDENTITY": lambda self: self._parse_auto_increment(), 933 "INLINE": lambda self: self._parse_inline(), 934 "LIKE": lambda self: self._parse_create_like(), 935 "NOT": lambda self: self._parse_not_constraint(), 936 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 937 "ON": lambda self: ( 938 self._match(TokenType.UPDATE) 939 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 940 ) 941 or self.expression(exp.OnProperty, this=self._parse_id_var()), 942 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 943 "PERIOD": lambda self: self._parse_period_for_system_time(), 944 "PRIMARY KEY": lambda self: self._parse_primary_key(), 945 "REFERENCES": lambda self: self._parse_references(match=False), 946 "TITLE": lambda self: self.expression( 947 exp.TitleColumnConstraint, this=self._parse_var_or_string() 948 ), 949 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 950 "UNIQUE": lambda self: self._parse_unique(), 951 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 952 "WITH": lambda self: self.expression( 953 exp.Properties, expressions=self._parse_wrapped_properties() 954 ), 955 } 956 957 ALTER_PARSERS = { 958 "ADD": lambda self: self._parse_alter_table_add(), 959 "ALTER": lambda self: self._parse_alter_table_alter(), 960 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 961 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 962 "DROP": lambda self: self._parse_alter_table_drop(), 963 "RENAME": lambda self: self._parse_alter_table_rename(), 964 "SET": lambda self: self._parse_alter_table_set(), 965 } 966 967 ALTER_ALTER_PARSERS = { 968 "DISTKEY": lambda self: self._parse_alter_diststyle(), 969 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 970 "SORTKEY": lambda self: self._parse_alter_sortkey(), 971 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 972 } 973 974 SCHEMA_UNNAMED_CONSTRAINTS = { 975 "CHECK", 976 "EXCLUDE", 977 "FOREIGN KEY", 978 "LIKE", 979 "PERIOD", 980 "PRIMARY KEY", 981 "UNIQUE", 982 } 983 984 NO_PAREN_FUNCTION_PARSERS = { 985 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 986 "CASE": lambda self: self._parse_case(), 987 "IF": lambda self: self._parse_if(), 988 "NEXT": lambda self: self._parse_next_value_for(), 989 } 990 991 INVALID_FUNC_NAME_TOKENS = { 992 TokenType.IDENTIFIER, 993 TokenType.STRING, 994 } 995 996 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 997 998 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 999 1000 FUNCTION_PARSERS = { 1001 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1002 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1003 "DECODE": lambda self: self._parse_decode(), 1004 "EXTRACT": lambda self: self._parse_extract(), 1005 "GAP_FILL": lambda self: self._parse_gap_fill(), 1006 "JSON_OBJECT": lambda self: self._parse_json_object(), 1007 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1008 "JSON_TABLE": lambda self: self._parse_json_table(), 1009 "MATCH": lambda self: self._parse_match_against(), 1010 "OPENJSON": lambda self: self._parse_open_json(), 1011 "POSITION": lambda self: self._parse_position(), 1012 "PREDICT": lambda self: self._parse_predict(), 1013 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1014 "STRING_AGG": lambda self: self._parse_string_agg(), 1015 "SUBSTRING": lambda self: self._parse_substring(), 1016 "TRIM": lambda self: self._parse_trim(), 1017 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1018 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1019 } 1020 1021 QUERY_MODIFIER_PARSERS = { 1022 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1023 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1024 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1025 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1026 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1027 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1028 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1029 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1030 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1031 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1032 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1033 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1034 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1035 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1036 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1037 TokenType.CLUSTER_BY: lambda self: ( 1038 "cluster", 1039 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1040 ), 1041 TokenType.DISTRIBUTE_BY: lambda self: ( 1042 "distribute", 1043 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1044 ), 1045 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1046 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1047 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1048 } 1049 1050 SET_PARSERS = { 1051 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1052 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1053 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1054 "TRANSACTION": lambda self: self._parse_set_transaction(), 1055 } 1056 1057 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1058 1059 TYPE_LITERAL_PARSERS = { 1060 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1061 } 1062 1063 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1064 1065 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1066 1067 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1068 1069 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1070 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1071 "ISOLATION": ( 1072 ("LEVEL", "REPEATABLE", "READ"), 1073 ("LEVEL", "READ", "COMMITTED"), 1074 ("LEVEL", "READ", "UNCOMITTED"), 1075 ("LEVEL", "SERIALIZABLE"), 1076 ), 1077 "READ": ("WRITE", "ONLY"), 1078 } 1079 1080 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1081 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1082 ) 1083 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1084 1085 CREATE_SEQUENCE: OPTIONS_TYPE = { 1086 "SCALE": ("EXTEND", "NOEXTEND"), 1087 "SHARD": ("EXTEND", "NOEXTEND"), 1088 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1089 **dict.fromkeys( 1090 ( 1091 "SESSION", 1092 "GLOBAL", 1093 "KEEP", 1094 "NOKEEP", 1095 "ORDER", 1096 "NOORDER", 1097 "NOCACHE", 1098 "CYCLE", 1099 "NOCYCLE", 1100 "NOMINVALUE", 1101 "NOMAXVALUE", 1102 "NOSCALE", 1103 "NOSHARD", 1104 ), 1105 tuple(), 1106 ), 1107 } 1108 1109 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1110 1111 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1112 1113 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1114 1115 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1116 1117 CLONE_KEYWORDS = {"CLONE", "COPY"} 1118 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1119 1120 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1121 1122 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1123 1124 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1125 1126 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1127 1128 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1129 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1130 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1131 1132 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1133 1134 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1135 1136 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1137 1138 DISTINCT_TOKENS = {TokenType.DISTINCT} 1139 1140 NULL_TOKENS = {TokenType.NULL} 1141 1142 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1143 1144 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1145 1146 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1147 1148 STRICT_CAST = True 1149 1150 PREFIXED_PIVOT_COLUMNS = False 1151 IDENTIFY_PIVOT_STRINGS = False 1152 1153 LOG_DEFAULTS_TO_LN = False 1154 1155 # Whether ADD is present for each column added by ALTER TABLE 1156 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1157 1158 # Whether the table sample clause expects CSV syntax 1159 TABLESAMPLE_CSV = False 1160 1161 # The default method used for table sampling 1162 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1163 1164 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1165 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1166 1167 # Whether the TRIM function expects the characters to trim as its first argument 1168 TRIM_PATTERN_FIRST = False 1169 1170 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1171 STRING_ALIASES = False 1172 1173 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1174 MODIFIERS_ATTACHED_TO_UNION = True 1175 UNION_MODIFIERS = {"order", "limit", "offset"} 1176 1177 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1178 NO_PAREN_IF_COMMANDS = True 1179 1180 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1181 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1182 1183 # Whether the `:` operator is used to extract a value from a JSON document 1184 COLON_IS_JSON_EXTRACT = False 1185 1186 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1187 # If this is True and '(' is not found, the keyword will be treated as an identifier 1188 VALUES_FOLLOWED_BY_PAREN = True 1189 1190 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1191 SUPPORTS_IMPLICIT_UNNEST = False 1192 1193 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1194 INTERVAL_SPANS = True 1195 1196 # Whether a PARTITION clause can follow a table reference 1197 SUPPORTS_PARTITION_SELECTION = False 1198 1199 __slots__ = ( 1200 "error_level", 1201 "error_message_context", 1202 "max_errors", 1203 "dialect", 1204 "sql", 1205 "errors", 1206 "_tokens", 1207 "_index", 1208 "_curr", 1209 "_next", 1210 "_prev", 1211 "_prev_comments", 1212 ) 1213 1214 # Autofilled 1215 SHOW_TRIE: t.Dict = {} 1216 SET_TRIE: t.Dict = {} 1217 1218 def __init__( 1219 self, 1220 error_level: t.Optional[ErrorLevel] = None, 1221 error_message_context: int = 100, 1222 max_errors: int = 3, 1223 dialect: DialectType = None, 1224 ): 1225 from sqlglot.dialects import Dialect 1226 1227 self.error_level = error_level or ErrorLevel.IMMEDIATE 1228 self.error_message_context = error_message_context 1229 self.max_errors = max_errors 1230 self.dialect = Dialect.get_or_raise(dialect) 1231 self.reset() 1232 1233 def reset(self): 1234 self.sql = "" 1235 self.errors = [] 1236 self._tokens = [] 1237 self._index = 0 1238 self._curr = None 1239 self._next = None 1240 self._prev = None 1241 self._prev_comments = None 1242 1243 def parse( 1244 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1245 ) -> t.List[t.Optional[exp.Expression]]: 1246 """ 1247 Parses a list of tokens and returns a list of syntax trees, one tree 1248 per parsed SQL statement. 1249 1250 Args: 1251 raw_tokens: The list of tokens. 1252 sql: The original SQL string, used to produce helpful debug messages. 1253 1254 Returns: 1255 The list of the produced syntax trees. 1256 """ 1257 return self._parse( 1258 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1259 ) 1260 1261 def parse_into( 1262 self, 1263 expression_types: exp.IntoType, 1264 raw_tokens: t.List[Token], 1265 sql: t.Optional[str] = None, 1266 ) -> t.List[t.Optional[exp.Expression]]: 1267 """ 1268 Parses a list of tokens into a given Expression type. If a collection of Expression 1269 types is given instead, this method will try to parse the token list into each one 1270 of them, stopping at the first for which the parsing succeeds. 1271 1272 Args: 1273 expression_types: The expression type(s) to try and parse the token list into. 1274 raw_tokens: The list of tokens. 1275 sql: The original SQL string, used to produce helpful debug messages. 1276 1277 Returns: 1278 The target Expression. 1279 """ 1280 errors = [] 1281 for expression_type in ensure_list(expression_types): 1282 parser = self.EXPRESSION_PARSERS.get(expression_type) 1283 if not parser: 1284 raise TypeError(f"No parser registered for {expression_type}") 1285 1286 try: 1287 return self._parse(parser, raw_tokens, sql) 1288 except ParseError as e: 1289 e.errors[0]["into_expression"] = expression_type 1290 errors.append(e) 1291 1292 raise ParseError( 1293 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1294 errors=merge_errors(errors), 1295 ) from errors[-1] 1296 1297 def _parse( 1298 self, 1299 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1300 raw_tokens: t.List[Token], 1301 sql: t.Optional[str] = None, 1302 ) -> t.List[t.Optional[exp.Expression]]: 1303 self.reset() 1304 self.sql = sql or "" 1305 1306 total = len(raw_tokens) 1307 chunks: t.List[t.List[Token]] = [[]] 1308 1309 for i, token in enumerate(raw_tokens): 1310 if token.token_type == TokenType.SEMICOLON: 1311 if token.comments: 1312 chunks.append([token]) 1313 1314 if i < total - 1: 1315 chunks.append([]) 1316 else: 1317 chunks[-1].append(token) 1318 1319 expressions = [] 1320 1321 for tokens in chunks: 1322 self._index = -1 1323 self._tokens = tokens 1324 self._advance() 1325 1326 expressions.append(parse_method(self)) 1327 1328 if self._index < len(self._tokens): 1329 self.raise_error("Invalid expression / Unexpected token") 1330 1331 self.check_errors() 1332 1333 return expressions 1334 1335 def check_errors(self) -> None: 1336 """Logs or raises any found errors, depending on the chosen error level setting.""" 1337 if self.error_level == ErrorLevel.WARN: 1338 for error in self.errors: 1339 logger.error(str(error)) 1340 elif self.error_level == ErrorLevel.RAISE and self.errors: 1341 raise ParseError( 1342 concat_messages(self.errors, self.max_errors), 1343 errors=merge_errors(self.errors), 1344 ) 1345 1346 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1347 """ 1348 Appends an error in the list of recorded errors or raises it, depending on the chosen 1349 error level setting. 1350 """ 1351 token = token or self._curr or self._prev or Token.string("") 1352 start = token.start 1353 end = token.end + 1 1354 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1355 highlight = self.sql[start:end] 1356 end_context = self.sql[end : end + self.error_message_context] 1357 1358 error = ParseError.new( 1359 f"{message}. Line {token.line}, Col: {token.col}.\n" 1360 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1361 description=message, 1362 line=token.line, 1363 col=token.col, 1364 start_context=start_context, 1365 highlight=highlight, 1366 end_context=end_context, 1367 ) 1368 1369 if self.error_level == ErrorLevel.IMMEDIATE: 1370 raise error 1371 1372 self.errors.append(error) 1373 1374 def expression( 1375 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1376 ) -> E: 1377 """ 1378 Creates a new, validated Expression. 1379 1380 Args: 1381 exp_class: The expression class to instantiate. 1382 comments: An optional list of comments to attach to the expression. 1383 kwargs: The arguments to set for the expression along with their respective values. 1384 1385 Returns: 1386 The target expression. 1387 """ 1388 instance = exp_class(**kwargs) 1389 instance.add_comments(comments) if comments else self._add_comments(instance) 1390 return self.validate_expression(instance) 1391 1392 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1393 if expression and self._prev_comments: 1394 expression.add_comments(self._prev_comments) 1395 self._prev_comments = None 1396 1397 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1398 """ 1399 Validates an Expression, making sure that all its mandatory arguments are set. 1400 1401 Args: 1402 expression: The expression to validate. 1403 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1404 1405 Returns: 1406 The validated expression. 1407 """ 1408 if self.error_level != ErrorLevel.IGNORE: 1409 for error_message in expression.error_messages(args): 1410 self.raise_error(error_message) 1411 1412 return expression 1413 1414 def _find_sql(self, start: Token, end: Token) -> str: 1415 return self.sql[start.start : end.end + 1] 1416 1417 def _is_connected(self) -> bool: 1418 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1419 1420 def _advance(self, times: int = 1) -> None: 1421 self._index += times 1422 self._curr = seq_get(self._tokens, self._index) 1423 self._next = seq_get(self._tokens, self._index + 1) 1424 1425 if self._index > 0: 1426 self._prev = self._tokens[self._index - 1] 1427 self._prev_comments = self._prev.comments 1428 else: 1429 self._prev = None 1430 self._prev_comments = None 1431 1432 def _retreat(self, index: int) -> None: 1433 if index != self._index: 1434 self._advance(index - self._index) 1435 1436 def _warn_unsupported(self) -> None: 1437 if len(self._tokens) <= 1: 1438 return 1439 1440 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1441 # interested in emitting a warning for the one being currently processed. 1442 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1443 1444 logger.warning( 1445 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1446 ) 1447 1448 def _parse_command(self) -> exp.Command: 1449 self._warn_unsupported() 1450 return self.expression( 1451 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1452 ) 1453 1454 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1455 """ 1456 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1457 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1458 the parser state accordingly 1459 """ 1460 index = self._index 1461 error_level = self.error_level 1462 1463 self.error_level = ErrorLevel.IMMEDIATE 1464 try: 1465 this = parse_method() 1466 except ParseError: 1467 this = None 1468 finally: 1469 if not this or retreat: 1470 self._retreat(index) 1471 self.error_level = error_level 1472 1473 return this 1474 1475 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1476 start = self._prev 1477 exists = self._parse_exists() if allow_exists else None 1478 1479 self._match(TokenType.ON) 1480 1481 materialized = self._match_text_seq("MATERIALIZED") 1482 kind = self._match_set(self.CREATABLES) and self._prev 1483 if not kind: 1484 return self._parse_as_command(start) 1485 1486 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1487 this = self._parse_user_defined_function(kind=kind.token_type) 1488 elif kind.token_type == TokenType.TABLE: 1489 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1490 elif kind.token_type == TokenType.COLUMN: 1491 this = self._parse_column() 1492 else: 1493 this = self._parse_id_var() 1494 1495 self._match(TokenType.IS) 1496 1497 return self.expression( 1498 exp.Comment, 1499 this=this, 1500 kind=kind.text, 1501 expression=self._parse_string(), 1502 exists=exists, 1503 materialized=materialized, 1504 ) 1505 1506 def _parse_to_table( 1507 self, 1508 ) -> exp.ToTableProperty: 1509 table = self._parse_table_parts(schema=True) 1510 return self.expression(exp.ToTableProperty, this=table) 1511 1512 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1513 def _parse_ttl(self) -> exp.Expression: 1514 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1515 this = self._parse_bitwise() 1516 1517 if self._match_text_seq("DELETE"): 1518 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1519 if self._match_text_seq("RECOMPRESS"): 1520 return self.expression( 1521 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1522 ) 1523 if self._match_text_seq("TO", "DISK"): 1524 return self.expression( 1525 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1526 ) 1527 if self._match_text_seq("TO", "VOLUME"): 1528 return self.expression( 1529 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1530 ) 1531 1532 return this 1533 1534 expressions = self._parse_csv(_parse_ttl_action) 1535 where = self._parse_where() 1536 group = self._parse_group() 1537 1538 aggregates = None 1539 if group and self._match(TokenType.SET): 1540 aggregates = self._parse_csv(self._parse_set_item) 1541 1542 return self.expression( 1543 exp.MergeTreeTTL, 1544 expressions=expressions, 1545 where=where, 1546 group=group, 1547 aggregates=aggregates, 1548 ) 1549 1550 def _parse_statement(self) -> t.Optional[exp.Expression]: 1551 if self._curr is None: 1552 return None 1553 1554 if self._match_set(self.STATEMENT_PARSERS): 1555 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1556 1557 if self._match_set(self.dialect.tokenizer.COMMANDS): 1558 return self._parse_command() 1559 1560 expression = self._parse_expression() 1561 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1562 return self._parse_query_modifiers(expression) 1563 1564 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1565 start = self._prev 1566 temporary = self._match(TokenType.TEMPORARY) 1567 materialized = self._match_text_seq("MATERIALIZED") 1568 1569 kind = self._match_set(self.CREATABLES) and self._prev.text 1570 if not kind: 1571 return self._parse_as_command(start) 1572 1573 if_exists = exists or self._parse_exists() 1574 table = self._parse_table_parts( 1575 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1576 ) 1577 1578 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1579 1580 if self._match(TokenType.L_PAREN, advance=False): 1581 expressions = self._parse_wrapped_csv(self._parse_types) 1582 else: 1583 expressions = None 1584 1585 return self.expression( 1586 exp.Drop, 1587 comments=start.comments, 1588 exists=if_exists, 1589 this=table, 1590 expressions=expressions, 1591 kind=kind.upper(), 1592 temporary=temporary, 1593 materialized=materialized, 1594 cascade=self._match_text_seq("CASCADE"), 1595 constraints=self._match_text_seq("CONSTRAINTS"), 1596 purge=self._match_text_seq("PURGE"), 1597 cluster=cluster, 1598 ) 1599 1600 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1601 return ( 1602 self._match_text_seq("IF") 1603 and (not not_ or self._match(TokenType.NOT)) 1604 and self._match(TokenType.EXISTS) 1605 ) 1606 1607 def _parse_create(self) -> exp.Create | exp.Command: 1608 # Note: this can't be None because we've matched a statement parser 1609 start = self._prev 1610 comments = self._prev_comments 1611 1612 replace = ( 1613 start.token_type == TokenType.REPLACE 1614 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1615 or self._match_pair(TokenType.OR, TokenType.ALTER) 1616 ) 1617 1618 unique = self._match(TokenType.UNIQUE) 1619 1620 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1621 self._advance() 1622 1623 properties = None 1624 create_token = self._match_set(self.CREATABLES) and self._prev 1625 1626 if not create_token: 1627 # exp.Properties.Location.POST_CREATE 1628 properties = self._parse_properties() 1629 create_token = self._match_set(self.CREATABLES) and self._prev 1630 1631 if not properties or not create_token: 1632 return self._parse_as_command(start) 1633 1634 exists = self._parse_exists(not_=True) 1635 this = None 1636 expression: t.Optional[exp.Expression] = None 1637 indexes = None 1638 no_schema_binding = None 1639 begin = None 1640 end = None 1641 clone = None 1642 1643 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1644 nonlocal properties 1645 if properties and temp_props: 1646 properties.expressions.extend(temp_props.expressions) 1647 elif temp_props: 1648 properties = temp_props 1649 1650 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1651 this = self._parse_user_defined_function(kind=create_token.token_type) 1652 1653 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1654 extend_props(self._parse_properties()) 1655 1656 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1657 extend_props(self._parse_properties()) 1658 1659 if not expression: 1660 if self._match(TokenType.COMMAND): 1661 expression = self._parse_as_command(self._prev) 1662 else: 1663 begin = self._match(TokenType.BEGIN) 1664 return_ = self._match_text_seq("RETURN") 1665 1666 if self._match(TokenType.STRING, advance=False): 1667 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1668 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1669 expression = self._parse_string() 1670 extend_props(self._parse_properties()) 1671 else: 1672 expression = self._parse_statement() 1673 1674 end = self._match_text_seq("END") 1675 1676 if return_: 1677 expression = self.expression(exp.Return, this=expression) 1678 elif create_token.token_type == TokenType.INDEX: 1679 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1680 if not self._match(TokenType.ON): 1681 index = self._parse_id_var() 1682 anonymous = False 1683 else: 1684 index = None 1685 anonymous = True 1686 1687 this = self._parse_index(index=index, anonymous=anonymous) 1688 elif create_token.token_type in self.DB_CREATABLES: 1689 table_parts = self._parse_table_parts( 1690 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1691 ) 1692 1693 # exp.Properties.Location.POST_NAME 1694 self._match(TokenType.COMMA) 1695 extend_props(self._parse_properties(before=True)) 1696 1697 this = self._parse_schema(this=table_parts) 1698 1699 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1700 extend_props(self._parse_properties()) 1701 1702 self._match(TokenType.ALIAS) 1703 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1704 # exp.Properties.Location.POST_ALIAS 1705 extend_props(self._parse_properties()) 1706 1707 if create_token.token_type == TokenType.SEQUENCE: 1708 expression = self._parse_types() 1709 extend_props(self._parse_properties()) 1710 else: 1711 expression = self._parse_ddl_select() 1712 1713 if create_token.token_type == TokenType.TABLE: 1714 # exp.Properties.Location.POST_EXPRESSION 1715 extend_props(self._parse_properties()) 1716 1717 indexes = [] 1718 while True: 1719 index = self._parse_index() 1720 1721 # exp.Properties.Location.POST_INDEX 1722 extend_props(self._parse_properties()) 1723 1724 if not index: 1725 break 1726 else: 1727 self._match(TokenType.COMMA) 1728 indexes.append(index) 1729 elif create_token.token_type == TokenType.VIEW: 1730 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1731 no_schema_binding = True 1732 1733 shallow = self._match_text_seq("SHALLOW") 1734 1735 if self._match_texts(self.CLONE_KEYWORDS): 1736 copy = self._prev.text.lower() == "copy" 1737 clone = self.expression( 1738 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1739 ) 1740 1741 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1742 return self._parse_as_command(start) 1743 1744 return self.expression( 1745 exp.Create, 1746 comments=comments, 1747 this=this, 1748 kind=create_token.text.upper(), 1749 replace=replace, 1750 unique=unique, 1751 expression=expression, 1752 exists=exists, 1753 properties=properties, 1754 indexes=indexes, 1755 no_schema_binding=no_schema_binding, 1756 begin=begin, 1757 end=end, 1758 clone=clone, 1759 ) 1760 1761 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1762 seq = exp.SequenceProperties() 1763 1764 options = [] 1765 index = self._index 1766 1767 while self._curr: 1768 self._match(TokenType.COMMA) 1769 if self._match_text_seq("INCREMENT"): 1770 self._match_text_seq("BY") 1771 self._match_text_seq("=") 1772 seq.set("increment", self._parse_term()) 1773 elif self._match_text_seq("MINVALUE"): 1774 seq.set("minvalue", self._parse_term()) 1775 elif self._match_text_seq("MAXVALUE"): 1776 seq.set("maxvalue", self._parse_term()) 1777 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1778 self._match_text_seq("=") 1779 seq.set("start", self._parse_term()) 1780 elif self._match_text_seq("CACHE"): 1781 # T-SQL allows empty CACHE which is initialized dynamically 1782 seq.set("cache", self._parse_number() or True) 1783 elif self._match_text_seq("OWNED", "BY"): 1784 # "OWNED BY NONE" is the default 1785 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1786 else: 1787 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1788 if opt: 1789 options.append(opt) 1790 else: 1791 break 1792 1793 seq.set("options", options if options else None) 1794 return None if self._index == index else seq 1795 1796 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1797 # only used for teradata currently 1798 self._match(TokenType.COMMA) 1799 1800 kwargs = { 1801 "no": self._match_text_seq("NO"), 1802 "dual": self._match_text_seq("DUAL"), 1803 "before": self._match_text_seq("BEFORE"), 1804 "default": self._match_text_seq("DEFAULT"), 1805 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1806 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1807 "after": self._match_text_seq("AFTER"), 1808 "minimum": self._match_texts(("MIN", "MINIMUM")), 1809 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1810 } 1811 1812 if self._match_texts(self.PROPERTY_PARSERS): 1813 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1814 try: 1815 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1816 except TypeError: 1817 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1818 1819 return None 1820 1821 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1822 return self._parse_wrapped_csv(self._parse_property) 1823 1824 def _parse_property(self) -> t.Optional[exp.Expression]: 1825 if self._match_texts(self.PROPERTY_PARSERS): 1826 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1827 1828 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1829 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1830 1831 if self._match_text_seq("COMPOUND", "SORTKEY"): 1832 return self._parse_sortkey(compound=True) 1833 1834 if self._match_text_seq("SQL", "SECURITY"): 1835 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1836 1837 index = self._index 1838 key = self._parse_column() 1839 1840 if not self._match(TokenType.EQ): 1841 self._retreat(index) 1842 return self._parse_sequence_properties() 1843 1844 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1845 if isinstance(key, exp.Column): 1846 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1847 1848 value = self._parse_bitwise() or self._parse_var(any_token=True) 1849 1850 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1851 if isinstance(value, exp.Column): 1852 value = exp.var(value.name) 1853 1854 return self.expression(exp.Property, this=key, value=value) 1855 1856 def _parse_stored(self) -> exp.FileFormatProperty: 1857 self._match(TokenType.ALIAS) 1858 1859 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1860 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1861 1862 return self.expression( 1863 exp.FileFormatProperty, 1864 this=( 1865 self.expression( 1866 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1867 ) 1868 if input_format or output_format 1869 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1870 ), 1871 ) 1872 1873 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1874 field = self._parse_field() 1875 if isinstance(field, exp.Identifier) and not field.quoted: 1876 field = exp.var(field) 1877 1878 return field 1879 1880 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1881 self._match(TokenType.EQ) 1882 self._match(TokenType.ALIAS) 1883 1884 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1885 1886 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1887 properties = [] 1888 while True: 1889 if before: 1890 prop = self._parse_property_before() 1891 else: 1892 prop = self._parse_property() 1893 if not prop: 1894 break 1895 for p in ensure_list(prop): 1896 properties.append(p) 1897 1898 if properties: 1899 return self.expression(exp.Properties, expressions=properties) 1900 1901 return None 1902 1903 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1904 return self.expression( 1905 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1906 ) 1907 1908 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1909 if self._index >= 2: 1910 pre_volatile_token = self._tokens[self._index - 2] 1911 else: 1912 pre_volatile_token = None 1913 1914 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1915 return exp.VolatileProperty() 1916 1917 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1918 1919 def _parse_retention_period(self) -> exp.Var: 1920 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1921 number = self._parse_number() 1922 number_str = f"{number} " if number else "" 1923 unit = self._parse_var(any_token=True) 1924 return exp.var(f"{number_str}{unit}") 1925 1926 def _parse_system_versioning_property( 1927 self, with_: bool = False 1928 ) -> exp.WithSystemVersioningProperty: 1929 self._match(TokenType.EQ) 1930 prop = self.expression( 1931 exp.WithSystemVersioningProperty, 1932 **{ # type: ignore 1933 "on": True, 1934 "with": with_, 1935 }, 1936 ) 1937 1938 if self._match_text_seq("OFF"): 1939 prop.set("on", False) 1940 return prop 1941 1942 self._match(TokenType.ON) 1943 if self._match(TokenType.L_PAREN): 1944 while self._curr and not self._match(TokenType.R_PAREN): 1945 if self._match_text_seq("HISTORY_TABLE", "="): 1946 prop.set("this", self._parse_table_parts()) 1947 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1948 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1949 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1950 prop.set("retention_period", self._parse_retention_period()) 1951 1952 self._match(TokenType.COMMA) 1953 1954 return prop 1955 1956 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1957 self._match(TokenType.EQ) 1958 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1959 prop = self.expression(exp.DataDeletionProperty, on=on) 1960 1961 if self._match(TokenType.L_PAREN): 1962 while self._curr and not self._match(TokenType.R_PAREN): 1963 if self._match_text_seq("FILTER_COLUMN", "="): 1964 prop.set("filter_column", self._parse_column()) 1965 elif self._match_text_seq("RETENTION_PERIOD", "="): 1966 prop.set("retention_period", self._parse_retention_period()) 1967 1968 self._match(TokenType.COMMA) 1969 1970 return prop 1971 1972 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1973 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1974 prop = self._parse_system_versioning_property(with_=True) 1975 self._match_r_paren() 1976 return prop 1977 1978 if self._match(TokenType.L_PAREN, advance=False): 1979 return self._parse_wrapped_properties() 1980 1981 if self._match_text_seq("JOURNAL"): 1982 return self._parse_withjournaltable() 1983 1984 if self._match_texts(self.VIEW_ATTRIBUTES): 1985 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1986 1987 if self._match_text_seq("DATA"): 1988 return self._parse_withdata(no=False) 1989 elif self._match_text_seq("NO", "DATA"): 1990 return self._parse_withdata(no=True) 1991 1992 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 1993 return self._parse_serde_properties(with_=True) 1994 1995 if not self._next: 1996 return None 1997 1998 return self._parse_withisolatedloading() 1999 2000 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2001 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2002 self._match(TokenType.EQ) 2003 2004 user = self._parse_id_var() 2005 self._match(TokenType.PARAMETER) 2006 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2007 2008 if not user or not host: 2009 return None 2010 2011 return exp.DefinerProperty(this=f"{user}@{host}") 2012 2013 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2014 self._match(TokenType.TABLE) 2015 self._match(TokenType.EQ) 2016 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2017 2018 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2019 return self.expression(exp.LogProperty, no=no) 2020 2021 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2022 return self.expression(exp.JournalProperty, **kwargs) 2023 2024 def _parse_checksum(self) -> exp.ChecksumProperty: 2025 self._match(TokenType.EQ) 2026 2027 on = None 2028 if self._match(TokenType.ON): 2029 on = True 2030 elif self._match_text_seq("OFF"): 2031 on = False 2032 2033 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2034 2035 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2036 return self.expression( 2037 exp.Cluster, 2038 expressions=( 2039 self._parse_wrapped_csv(self._parse_ordered) 2040 if wrapped 2041 else self._parse_csv(self._parse_ordered) 2042 ), 2043 ) 2044 2045 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2046 self._match_text_seq("BY") 2047 2048 self._match_l_paren() 2049 expressions = self._parse_csv(self._parse_column) 2050 self._match_r_paren() 2051 2052 if self._match_text_seq("SORTED", "BY"): 2053 self._match_l_paren() 2054 sorted_by = self._parse_csv(self._parse_ordered) 2055 self._match_r_paren() 2056 else: 2057 sorted_by = None 2058 2059 self._match(TokenType.INTO) 2060 buckets = self._parse_number() 2061 self._match_text_seq("BUCKETS") 2062 2063 return self.expression( 2064 exp.ClusteredByProperty, 2065 expressions=expressions, 2066 sorted_by=sorted_by, 2067 buckets=buckets, 2068 ) 2069 2070 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2071 if not self._match_text_seq("GRANTS"): 2072 self._retreat(self._index - 1) 2073 return None 2074 2075 return self.expression(exp.CopyGrantsProperty) 2076 2077 def _parse_freespace(self) -> exp.FreespaceProperty: 2078 self._match(TokenType.EQ) 2079 return self.expression( 2080 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2081 ) 2082 2083 def _parse_mergeblockratio( 2084 self, no: bool = False, default: bool = False 2085 ) -> exp.MergeBlockRatioProperty: 2086 if self._match(TokenType.EQ): 2087 return self.expression( 2088 exp.MergeBlockRatioProperty, 2089 this=self._parse_number(), 2090 percent=self._match(TokenType.PERCENT), 2091 ) 2092 2093 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2094 2095 def _parse_datablocksize( 2096 self, 2097 default: t.Optional[bool] = None, 2098 minimum: t.Optional[bool] = None, 2099 maximum: t.Optional[bool] = None, 2100 ) -> exp.DataBlocksizeProperty: 2101 self._match(TokenType.EQ) 2102 size = self._parse_number() 2103 2104 units = None 2105 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2106 units = self._prev.text 2107 2108 return self.expression( 2109 exp.DataBlocksizeProperty, 2110 size=size, 2111 units=units, 2112 default=default, 2113 minimum=minimum, 2114 maximum=maximum, 2115 ) 2116 2117 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2118 self._match(TokenType.EQ) 2119 always = self._match_text_seq("ALWAYS") 2120 manual = self._match_text_seq("MANUAL") 2121 never = self._match_text_seq("NEVER") 2122 default = self._match_text_seq("DEFAULT") 2123 2124 autotemp = None 2125 if self._match_text_seq("AUTOTEMP"): 2126 autotemp = self._parse_schema() 2127 2128 return self.expression( 2129 exp.BlockCompressionProperty, 2130 always=always, 2131 manual=manual, 2132 never=never, 2133 default=default, 2134 autotemp=autotemp, 2135 ) 2136 2137 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2138 index = self._index 2139 no = self._match_text_seq("NO") 2140 concurrent = self._match_text_seq("CONCURRENT") 2141 2142 if not self._match_text_seq("ISOLATED", "LOADING"): 2143 self._retreat(index) 2144 return None 2145 2146 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2147 return self.expression( 2148 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2149 ) 2150 2151 def _parse_locking(self) -> exp.LockingProperty: 2152 if self._match(TokenType.TABLE): 2153 kind = "TABLE" 2154 elif self._match(TokenType.VIEW): 2155 kind = "VIEW" 2156 elif self._match(TokenType.ROW): 2157 kind = "ROW" 2158 elif self._match_text_seq("DATABASE"): 2159 kind = "DATABASE" 2160 else: 2161 kind = None 2162 2163 if kind in ("DATABASE", "TABLE", "VIEW"): 2164 this = self._parse_table_parts() 2165 else: 2166 this = None 2167 2168 if self._match(TokenType.FOR): 2169 for_or_in = "FOR" 2170 elif self._match(TokenType.IN): 2171 for_or_in = "IN" 2172 else: 2173 for_or_in = None 2174 2175 if self._match_text_seq("ACCESS"): 2176 lock_type = "ACCESS" 2177 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2178 lock_type = "EXCLUSIVE" 2179 elif self._match_text_seq("SHARE"): 2180 lock_type = "SHARE" 2181 elif self._match_text_seq("READ"): 2182 lock_type = "READ" 2183 elif self._match_text_seq("WRITE"): 2184 lock_type = "WRITE" 2185 elif self._match_text_seq("CHECKSUM"): 2186 lock_type = "CHECKSUM" 2187 else: 2188 lock_type = None 2189 2190 override = self._match_text_seq("OVERRIDE") 2191 2192 return self.expression( 2193 exp.LockingProperty, 2194 this=this, 2195 kind=kind, 2196 for_or_in=for_or_in, 2197 lock_type=lock_type, 2198 override=override, 2199 ) 2200 2201 def _parse_partition_by(self) -> t.List[exp.Expression]: 2202 if self._match(TokenType.PARTITION_BY): 2203 return self._parse_csv(self._parse_assignment) 2204 return [] 2205 2206 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2207 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2208 if self._match_text_seq("MINVALUE"): 2209 return exp.var("MINVALUE") 2210 if self._match_text_seq("MAXVALUE"): 2211 return exp.var("MAXVALUE") 2212 return self._parse_bitwise() 2213 2214 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2215 expression = None 2216 from_expressions = None 2217 to_expressions = None 2218 2219 if self._match(TokenType.IN): 2220 this = self._parse_wrapped_csv(self._parse_bitwise) 2221 elif self._match(TokenType.FROM): 2222 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2223 self._match_text_seq("TO") 2224 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2225 elif self._match_text_seq("WITH", "(", "MODULUS"): 2226 this = self._parse_number() 2227 self._match_text_seq(",", "REMAINDER") 2228 expression = self._parse_number() 2229 self._match_r_paren() 2230 else: 2231 self.raise_error("Failed to parse partition bound spec.") 2232 2233 return self.expression( 2234 exp.PartitionBoundSpec, 2235 this=this, 2236 expression=expression, 2237 from_expressions=from_expressions, 2238 to_expressions=to_expressions, 2239 ) 2240 2241 # https://www.postgresql.org/docs/current/sql-createtable.html 2242 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2243 if not self._match_text_seq("OF"): 2244 self._retreat(self._index - 1) 2245 return None 2246 2247 this = self._parse_table(schema=True) 2248 2249 if self._match(TokenType.DEFAULT): 2250 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2251 elif self._match_text_seq("FOR", "VALUES"): 2252 expression = self._parse_partition_bound_spec() 2253 else: 2254 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2255 2256 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2257 2258 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2259 self._match(TokenType.EQ) 2260 return self.expression( 2261 exp.PartitionedByProperty, 2262 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2263 ) 2264 2265 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2266 if self._match_text_seq("AND", "STATISTICS"): 2267 statistics = True 2268 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2269 statistics = False 2270 else: 2271 statistics = None 2272 2273 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2274 2275 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2276 if self._match_text_seq("SQL"): 2277 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2278 return None 2279 2280 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2281 if self._match_text_seq("SQL", "DATA"): 2282 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2283 return None 2284 2285 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2286 if self._match_text_seq("PRIMARY", "INDEX"): 2287 return exp.NoPrimaryIndexProperty() 2288 if self._match_text_seq("SQL"): 2289 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2290 return None 2291 2292 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2293 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2294 return exp.OnCommitProperty() 2295 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2296 return exp.OnCommitProperty(delete=True) 2297 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2298 2299 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2300 if self._match_text_seq("SQL", "DATA"): 2301 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2302 return None 2303 2304 def _parse_distkey(self) -> exp.DistKeyProperty: 2305 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2306 2307 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2308 table = self._parse_table(schema=True) 2309 2310 options = [] 2311 while self._match_texts(("INCLUDING", "EXCLUDING")): 2312 this = self._prev.text.upper() 2313 2314 id_var = self._parse_id_var() 2315 if not id_var: 2316 return None 2317 2318 options.append( 2319 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2320 ) 2321 2322 return self.expression(exp.LikeProperty, this=table, expressions=options) 2323 2324 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2325 return self.expression( 2326 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2327 ) 2328 2329 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2330 self._match(TokenType.EQ) 2331 return self.expression( 2332 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2333 ) 2334 2335 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2336 self._match_text_seq("WITH", "CONNECTION") 2337 return self.expression( 2338 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2339 ) 2340 2341 def _parse_returns(self) -> exp.ReturnsProperty: 2342 value: t.Optional[exp.Expression] 2343 null = None 2344 is_table = self._match(TokenType.TABLE) 2345 2346 if is_table: 2347 if self._match(TokenType.LT): 2348 value = self.expression( 2349 exp.Schema, 2350 this="TABLE", 2351 expressions=self._parse_csv(self._parse_struct_types), 2352 ) 2353 if not self._match(TokenType.GT): 2354 self.raise_error("Expecting >") 2355 else: 2356 value = self._parse_schema(exp.var("TABLE")) 2357 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2358 null = True 2359 value = None 2360 else: 2361 value = self._parse_types() 2362 2363 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2364 2365 def _parse_describe(self) -> exp.Describe: 2366 kind = self._match_set(self.CREATABLES) and self._prev.text 2367 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2368 if self._match(TokenType.DOT): 2369 style = None 2370 self._retreat(self._index - 2) 2371 this = self._parse_table(schema=True) 2372 properties = self._parse_properties() 2373 expressions = properties.expressions if properties else None 2374 return self.expression( 2375 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2376 ) 2377 2378 def _parse_insert(self) -> exp.Insert: 2379 comments = ensure_list(self._prev_comments) 2380 hint = self._parse_hint() 2381 overwrite = self._match(TokenType.OVERWRITE) 2382 ignore = self._match(TokenType.IGNORE) 2383 local = self._match_text_seq("LOCAL") 2384 alternative = None 2385 is_function = None 2386 2387 if self._match_text_seq("DIRECTORY"): 2388 this: t.Optional[exp.Expression] = self.expression( 2389 exp.Directory, 2390 this=self._parse_var_or_string(), 2391 local=local, 2392 row_format=self._parse_row_format(match_row=True), 2393 ) 2394 else: 2395 if self._match(TokenType.OR): 2396 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2397 2398 self._match(TokenType.INTO) 2399 comments += ensure_list(self._prev_comments) 2400 self._match(TokenType.TABLE) 2401 is_function = self._match(TokenType.FUNCTION) 2402 2403 this = ( 2404 self._parse_table(schema=True, parse_partition=True) 2405 if not is_function 2406 else self._parse_function() 2407 ) 2408 2409 returning = self._parse_returning() 2410 2411 return self.expression( 2412 exp.Insert, 2413 comments=comments, 2414 hint=hint, 2415 is_function=is_function, 2416 this=this, 2417 stored=self._match_text_seq("STORED") and self._parse_stored(), 2418 by_name=self._match_text_seq("BY", "NAME"), 2419 exists=self._parse_exists(), 2420 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2421 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2422 conflict=self._parse_on_conflict(), 2423 returning=returning or self._parse_returning(), 2424 overwrite=overwrite, 2425 alternative=alternative, 2426 ignore=ignore, 2427 ) 2428 2429 def _parse_kill(self) -> exp.Kill: 2430 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2431 2432 return self.expression( 2433 exp.Kill, 2434 this=self._parse_primary(), 2435 kind=kind, 2436 ) 2437 2438 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2439 conflict = self._match_text_seq("ON", "CONFLICT") 2440 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2441 2442 if not conflict and not duplicate: 2443 return None 2444 2445 conflict_keys = None 2446 constraint = None 2447 2448 if conflict: 2449 if self._match_text_seq("ON", "CONSTRAINT"): 2450 constraint = self._parse_id_var() 2451 elif self._match(TokenType.L_PAREN): 2452 conflict_keys = self._parse_csv(self._parse_id_var) 2453 self._match_r_paren() 2454 2455 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2456 if self._prev.token_type == TokenType.UPDATE: 2457 self._match(TokenType.SET) 2458 expressions = self._parse_csv(self._parse_equality) 2459 else: 2460 expressions = None 2461 2462 return self.expression( 2463 exp.OnConflict, 2464 duplicate=duplicate, 2465 expressions=expressions, 2466 action=action, 2467 conflict_keys=conflict_keys, 2468 constraint=constraint, 2469 ) 2470 2471 def _parse_returning(self) -> t.Optional[exp.Returning]: 2472 if not self._match(TokenType.RETURNING): 2473 return None 2474 return self.expression( 2475 exp.Returning, 2476 expressions=self._parse_csv(self._parse_expression), 2477 into=self._match(TokenType.INTO) and self._parse_table_part(), 2478 ) 2479 2480 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2481 if not self._match(TokenType.FORMAT): 2482 return None 2483 return self._parse_row_format() 2484 2485 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2486 index = self._index 2487 with_ = with_ or self._match_text_seq("WITH") 2488 2489 if not self._match(TokenType.SERDE_PROPERTIES): 2490 self._retreat(index) 2491 return None 2492 return self.expression( 2493 exp.SerdeProperties, 2494 **{ # type: ignore 2495 "expressions": self._parse_wrapped_properties(), 2496 "with": with_, 2497 }, 2498 ) 2499 2500 def _parse_row_format( 2501 self, match_row: bool = False 2502 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2503 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2504 return None 2505 2506 if self._match_text_seq("SERDE"): 2507 this = self._parse_string() 2508 2509 serde_properties = self._parse_serde_properties() 2510 2511 return self.expression( 2512 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2513 ) 2514 2515 self._match_text_seq("DELIMITED") 2516 2517 kwargs = {} 2518 2519 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2520 kwargs["fields"] = self._parse_string() 2521 if self._match_text_seq("ESCAPED", "BY"): 2522 kwargs["escaped"] = self._parse_string() 2523 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2524 kwargs["collection_items"] = self._parse_string() 2525 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2526 kwargs["map_keys"] = self._parse_string() 2527 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2528 kwargs["lines"] = self._parse_string() 2529 if self._match_text_seq("NULL", "DEFINED", "AS"): 2530 kwargs["null"] = self._parse_string() 2531 2532 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2533 2534 def _parse_load(self) -> exp.LoadData | exp.Command: 2535 if self._match_text_seq("DATA"): 2536 local = self._match_text_seq("LOCAL") 2537 self._match_text_seq("INPATH") 2538 inpath = self._parse_string() 2539 overwrite = self._match(TokenType.OVERWRITE) 2540 self._match_pair(TokenType.INTO, TokenType.TABLE) 2541 2542 return self.expression( 2543 exp.LoadData, 2544 this=self._parse_table(schema=True), 2545 local=local, 2546 overwrite=overwrite, 2547 inpath=inpath, 2548 partition=self._parse_partition(), 2549 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2550 serde=self._match_text_seq("SERDE") and self._parse_string(), 2551 ) 2552 return self._parse_as_command(self._prev) 2553 2554 def _parse_delete(self) -> exp.Delete: 2555 # This handles MySQL's "Multiple-Table Syntax" 2556 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2557 tables = None 2558 comments = self._prev_comments 2559 if not self._match(TokenType.FROM, advance=False): 2560 tables = self._parse_csv(self._parse_table) or None 2561 2562 returning = self._parse_returning() 2563 2564 return self.expression( 2565 exp.Delete, 2566 comments=comments, 2567 tables=tables, 2568 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2569 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2570 where=self._parse_where(), 2571 returning=returning or self._parse_returning(), 2572 limit=self._parse_limit(), 2573 ) 2574 2575 def _parse_update(self) -> exp.Update: 2576 comments = self._prev_comments 2577 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2578 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2579 returning = self._parse_returning() 2580 return self.expression( 2581 exp.Update, 2582 comments=comments, 2583 **{ # type: ignore 2584 "this": this, 2585 "expressions": expressions, 2586 "from": self._parse_from(joins=True), 2587 "where": self._parse_where(), 2588 "returning": returning or self._parse_returning(), 2589 "order": self._parse_order(), 2590 "limit": self._parse_limit(), 2591 }, 2592 ) 2593 2594 def _parse_uncache(self) -> exp.Uncache: 2595 if not self._match(TokenType.TABLE): 2596 self.raise_error("Expecting TABLE after UNCACHE") 2597 2598 return self.expression( 2599 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2600 ) 2601 2602 def _parse_cache(self) -> exp.Cache: 2603 lazy = self._match_text_seq("LAZY") 2604 self._match(TokenType.TABLE) 2605 table = self._parse_table(schema=True) 2606 2607 options = [] 2608 if self._match_text_seq("OPTIONS"): 2609 self._match_l_paren() 2610 k = self._parse_string() 2611 self._match(TokenType.EQ) 2612 v = self._parse_string() 2613 options = [k, v] 2614 self._match_r_paren() 2615 2616 self._match(TokenType.ALIAS) 2617 return self.expression( 2618 exp.Cache, 2619 this=table, 2620 lazy=lazy, 2621 options=options, 2622 expression=self._parse_select(nested=True), 2623 ) 2624 2625 def _parse_partition(self) -> t.Optional[exp.Partition]: 2626 if not self._match(TokenType.PARTITION): 2627 return None 2628 2629 return self.expression( 2630 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2631 ) 2632 2633 def _parse_value(self) -> t.Optional[exp.Tuple]: 2634 if self._match(TokenType.L_PAREN): 2635 expressions = self._parse_csv(self._parse_expression) 2636 self._match_r_paren() 2637 return self.expression(exp.Tuple, expressions=expressions) 2638 2639 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2640 expression = self._parse_expression() 2641 if expression: 2642 return self.expression(exp.Tuple, expressions=[expression]) 2643 return None 2644 2645 def _parse_projections(self) -> t.List[exp.Expression]: 2646 return self._parse_expressions() 2647 2648 def _parse_select( 2649 self, 2650 nested: bool = False, 2651 table: bool = False, 2652 parse_subquery_alias: bool = True, 2653 parse_set_operation: bool = True, 2654 ) -> t.Optional[exp.Expression]: 2655 cte = self._parse_with() 2656 2657 if cte: 2658 this = self._parse_statement() 2659 2660 if not this: 2661 self.raise_error("Failed to parse any statement following CTE") 2662 return cte 2663 2664 if "with" in this.arg_types: 2665 this.set("with", cte) 2666 else: 2667 self.raise_error(f"{this.key} does not support CTE") 2668 this = cte 2669 2670 return this 2671 2672 # duckdb supports leading with FROM x 2673 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2674 2675 if self._match(TokenType.SELECT): 2676 comments = self._prev_comments 2677 2678 hint = self._parse_hint() 2679 all_ = self._match(TokenType.ALL) 2680 distinct = self._match_set(self.DISTINCT_TOKENS) 2681 2682 kind = ( 2683 self._match(TokenType.ALIAS) 2684 and self._match_texts(("STRUCT", "VALUE")) 2685 and self._prev.text.upper() 2686 ) 2687 2688 if distinct: 2689 distinct = self.expression( 2690 exp.Distinct, 2691 on=self._parse_value() if self._match(TokenType.ON) else None, 2692 ) 2693 2694 if all_ and distinct: 2695 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2696 2697 limit = self._parse_limit(top=True) 2698 projections = self._parse_projections() 2699 2700 this = self.expression( 2701 exp.Select, 2702 kind=kind, 2703 hint=hint, 2704 distinct=distinct, 2705 expressions=projections, 2706 limit=limit, 2707 ) 2708 this.comments = comments 2709 2710 into = self._parse_into() 2711 if into: 2712 this.set("into", into) 2713 2714 if not from_: 2715 from_ = self._parse_from() 2716 2717 if from_: 2718 this.set("from", from_) 2719 2720 this = self._parse_query_modifiers(this) 2721 elif (table or nested) and self._match(TokenType.L_PAREN): 2722 if self._match(TokenType.PIVOT): 2723 this = self._parse_simplified_pivot() 2724 elif self._match(TokenType.FROM): 2725 this = exp.select("*").from_( 2726 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2727 ) 2728 else: 2729 this = ( 2730 self._parse_table() 2731 if table 2732 else self._parse_select(nested=True, parse_set_operation=False) 2733 ) 2734 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2735 2736 self._match_r_paren() 2737 2738 # We return early here so that the UNION isn't attached to the subquery by the 2739 # following call to _parse_set_operations, but instead becomes the parent node 2740 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2741 elif self._match(TokenType.VALUES, advance=False): 2742 this = self._parse_derived_table_values() 2743 elif from_: 2744 this = exp.select("*").from_(from_.this, copy=False) 2745 else: 2746 this = None 2747 2748 if parse_set_operation: 2749 return self._parse_set_operations(this) 2750 return this 2751 2752 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2753 if not skip_with_token and not self._match(TokenType.WITH): 2754 return None 2755 2756 comments = self._prev_comments 2757 recursive = self._match(TokenType.RECURSIVE) 2758 2759 expressions = [] 2760 while True: 2761 expressions.append(self._parse_cte()) 2762 2763 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2764 break 2765 else: 2766 self._match(TokenType.WITH) 2767 2768 return self.expression( 2769 exp.With, comments=comments, expressions=expressions, recursive=recursive 2770 ) 2771 2772 def _parse_cte(self) -> exp.CTE: 2773 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2774 if not alias or not alias.this: 2775 self.raise_error("Expected CTE to have alias") 2776 2777 self._match(TokenType.ALIAS) 2778 2779 if self._match_text_seq("NOT", "MATERIALIZED"): 2780 materialized = False 2781 elif self._match_text_seq("MATERIALIZED"): 2782 materialized = True 2783 else: 2784 materialized = None 2785 2786 return self.expression( 2787 exp.CTE, 2788 this=self._parse_wrapped(self._parse_statement), 2789 alias=alias, 2790 materialized=materialized, 2791 ) 2792 2793 def _parse_table_alias( 2794 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2795 ) -> t.Optional[exp.TableAlias]: 2796 any_token = self._match(TokenType.ALIAS) 2797 alias = ( 2798 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2799 or self._parse_string_as_identifier() 2800 ) 2801 2802 index = self._index 2803 if self._match(TokenType.L_PAREN): 2804 columns = self._parse_csv(self._parse_function_parameter) 2805 self._match_r_paren() if columns else self._retreat(index) 2806 else: 2807 columns = None 2808 2809 if not alias and not columns: 2810 return None 2811 2812 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2813 2814 # We bubble up comments from the Identifier to the TableAlias 2815 if isinstance(alias, exp.Identifier): 2816 table_alias.add_comments(alias.pop_comments()) 2817 2818 return table_alias 2819 2820 def _parse_subquery( 2821 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2822 ) -> t.Optional[exp.Subquery]: 2823 if not this: 2824 return None 2825 2826 return self.expression( 2827 exp.Subquery, 2828 this=this, 2829 pivots=self._parse_pivots(), 2830 alias=self._parse_table_alias() if parse_alias else None, 2831 ) 2832 2833 def _implicit_unnests_to_explicit(self, this: E) -> E: 2834 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2835 2836 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2837 for i, join in enumerate(this.args.get("joins") or []): 2838 table = join.this 2839 normalized_table = table.copy() 2840 normalized_table.meta["maybe_column"] = True 2841 normalized_table = _norm(normalized_table, dialect=self.dialect) 2842 2843 if isinstance(table, exp.Table) and not join.args.get("on"): 2844 if normalized_table.parts[0].name in refs: 2845 table_as_column = table.to_column() 2846 unnest = exp.Unnest(expressions=[table_as_column]) 2847 2848 # Table.to_column creates a parent Alias node that we want to convert to 2849 # a TableAlias and attach to the Unnest, so it matches the parser's output 2850 if isinstance(table.args.get("alias"), exp.TableAlias): 2851 table_as_column.replace(table_as_column.this) 2852 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2853 2854 table.replace(unnest) 2855 2856 refs.add(normalized_table.alias_or_name) 2857 2858 return this 2859 2860 def _parse_query_modifiers( 2861 self, this: t.Optional[exp.Expression] 2862 ) -> t.Optional[exp.Expression]: 2863 if isinstance(this, (exp.Query, exp.Table)): 2864 for join in self._parse_joins(): 2865 this.append("joins", join) 2866 for lateral in iter(self._parse_lateral, None): 2867 this.append("laterals", lateral) 2868 2869 while True: 2870 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2871 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2872 key, expression = parser(self) 2873 2874 if expression: 2875 this.set(key, expression) 2876 if key == "limit": 2877 offset = expression.args.pop("offset", None) 2878 2879 if offset: 2880 offset = exp.Offset(expression=offset) 2881 this.set("offset", offset) 2882 2883 limit_by_expressions = expression.expressions 2884 expression.set("expressions", None) 2885 offset.set("expressions", limit_by_expressions) 2886 continue 2887 break 2888 2889 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2890 this = self._implicit_unnests_to_explicit(this) 2891 2892 return this 2893 2894 def _parse_hint(self) -> t.Optional[exp.Hint]: 2895 if self._match(TokenType.HINT): 2896 hints = [] 2897 for hint in iter( 2898 lambda: self._parse_csv( 2899 lambda: self._parse_function() or self._parse_var(upper=True) 2900 ), 2901 [], 2902 ): 2903 hints.extend(hint) 2904 2905 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2906 self.raise_error("Expected */ after HINT") 2907 2908 return self.expression(exp.Hint, expressions=hints) 2909 2910 return None 2911 2912 def _parse_into(self) -> t.Optional[exp.Into]: 2913 if not self._match(TokenType.INTO): 2914 return None 2915 2916 temp = self._match(TokenType.TEMPORARY) 2917 unlogged = self._match_text_seq("UNLOGGED") 2918 self._match(TokenType.TABLE) 2919 2920 return self.expression( 2921 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2922 ) 2923 2924 def _parse_from( 2925 self, joins: bool = False, skip_from_token: bool = False 2926 ) -> t.Optional[exp.From]: 2927 if not skip_from_token and not self._match(TokenType.FROM): 2928 return None 2929 2930 return self.expression( 2931 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2932 ) 2933 2934 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2935 return self.expression( 2936 exp.MatchRecognizeMeasure, 2937 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2938 this=self._parse_expression(), 2939 ) 2940 2941 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2942 if not self._match(TokenType.MATCH_RECOGNIZE): 2943 return None 2944 2945 self._match_l_paren() 2946 2947 partition = self._parse_partition_by() 2948 order = self._parse_order() 2949 2950 measures = ( 2951 self._parse_csv(self._parse_match_recognize_measure) 2952 if self._match_text_seq("MEASURES") 2953 else None 2954 ) 2955 2956 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2957 rows = exp.var("ONE ROW PER MATCH") 2958 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2959 text = "ALL ROWS PER MATCH" 2960 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2961 text += " SHOW EMPTY MATCHES" 2962 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2963 text += " OMIT EMPTY MATCHES" 2964 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2965 text += " WITH UNMATCHED ROWS" 2966 rows = exp.var(text) 2967 else: 2968 rows = None 2969 2970 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2971 text = "AFTER MATCH SKIP" 2972 if self._match_text_seq("PAST", "LAST", "ROW"): 2973 text += " PAST LAST ROW" 2974 elif self._match_text_seq("TO", "NEXT", "ROW"): 2975 text += " TO NEXT ROW" 2976 elif self._match_text_seq("TO", "FIRST"): 2977 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2978 elif self._match_text_seq("TO", "LAST"): 2979 text += f" TO LAST {self._advance_any().text}" # type: ignore 2980 after = exp.var(text) 2981 else: 2982 after = None 2983 2984 if self._match_text_seq("PATTERN"): 2985 self._match_l_paren() 2986 2987 if not self._curr: 2988 self.raise_error("Expecting )", self._curr) 2989 2990 paren = 1 2991 start = self._curr 2992 2993 while self._curr and paren > 0: 2994 if self._curr.token_type == TokenType.L_PAREN: 2995 paren += 1 2996 if self._curr.token_type == TokenType.R_PAREN: 2997 paren -= 1 2998 2999 end = self._prev 3000 self._advance() 3001 3002 if paren > 0: 3003 self.raise_error("Expecting )", self._curr) 3004 3005 pattern = exp.var(self._find_sql(start, end)) 3006 else: 3007 pattern = None 3008 3009 define = ( 3010 self._parse_csv(self._parse_name_as_expression) 3011 if self._match_text_seq("DEFINE") 3012 else None 3013 ) 3014 3015 self._match_r_paren() 3016 3017 return self.expression( 3018 exp.MatchRecognize, 3019 partition_by=partition, 3020 order=order, 3021 measures=measures, 3022 rows=rows, 3023 after=after, 3024 pattern=pattern, 3025 define=define, 3026 alias=self._parse_table_alias(), 3027 ) 3028 3029 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3030 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3031 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3032 cross_apply = False 3033 3034 if cross_apply is not None: 3035 this = self._parse_select(table=True) 3036 view = None 3037 outer = None 3038 elif self._match(TokenType.LATERAL): 3039 this = self._parse_select(table=True) 3040 view = self._match(TokenType.VIEW) 3041 outer = self._match(TokenType.OUTER) 3042 else: 3043 return None 3044 3045 if not this: 3046 this = ( 3047 self._parse_unnest() 3048 or self._parse_function() 3049 or self._parse_id_var(any_token=False) 3050 ) 3051 3052 while self._match(TokenType.DOT): 3053 this = exp.Dot( 3054 this=this, 3055 expression=self._parse_function() or self._parse_id_var(any_token=False), 3056 ) 3057 3058 if view: 3059 table = self._parse_id_var(any_token=False) 3060 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3061 table_alias: t.Optional[exp.TableAlias] = self.expression( 3062 exp.TableAlias, this=table, columns=columns 3063 ) 3064 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3065 # We move the alias from the lateral's child node to the lateral itself 3066 table_alias = this.args["alias"].pop() 3067 else: 3068 table_alias = self._parse_table_alias() 3069 3070 return self.expression( 3071 exp.Lateral, 3072 this=this, 3073 view=view, 3074 outer=outer, 3075 alias=table_alias, 3076 cross_apply=cross_apply, 3077 ) 3078 3079 def _parse_join_parts( 3080 self, 3081 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3082 return ( 3083 self._match_set(self.JOIN_METHODS) and self._prev, 3084 self._match_set(self.JOIN_SIDES) and self._prev, 3085 self._match_set(self.JOIN_KINDS) and self._prev, 3086 ) 3087 3088 def _parse_join( 3089 self, skip_join_token: bool = False, parse_bracket: bool = False 3090 ) -> t.Optional[exp.Join]: 3091 if self._match(TokenType.COMMA): 3092 return self.expression(exp.Join, this=self._parse_table()) 3093 3094 index = self._index 3095 method, side, kind = self._parse_join_parts() 3096 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3097 join = self._match(TokenType.JOIN) 3098 3099 if not skip_join_token and not join: 3100 self._retreat(index) 3101 kind = None 3102 method = None 3103 side = None 3104 3105 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3106 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3107 3108 if not skip_join_token and not join and not outer_apply and not cross_apply: 3109 return None 3110 3111 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3112 3113 if method: 3114 kwargs["method"] = method.text 3115 if side: 3116 kwargs["side"] = side.text 3117 if kind: 3118 kwargs["kind"] = kind.text 3119 if hint: 3120 kwargs["hint"] = hint 3121 3122 if self._match(TokenType.MATCH_CONDITION): 3123 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3124 3125 if self._match(TokenType.ON): 3126 kwargs["on"] = self._parse_assignment() 3127 elif self._match(TokenType.USING): 3128 kwargs["using"] = self._parse_wrapped_id_vars() 3129 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3130 kind and kind.token_type == TokenType.CROSS 3131 ): 3132 index = self._index 3133 joins: t.Optional[list] = list(self._parse_joins()) 3134 3135 if joins and self._match(TokenType.ON): 3136 kwargs["on"] = self._parse_assignment() 3137 elif joins and self._match(TokenType.USING): 3138 kwargs["using"] = self._parse_wrapped_id_vars() 3139 else: 3140 joins = None 3141 self._retreat(index) 3142 3143 kwargs["this"].set("joins", joins if joins else None) 3144 3145 comments = [c for token in (method, side, kind) if token for c in token.comments] 3146 return self.expression(exp.Join, comments=comments, **kwargs) 3147 3148 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3149 this = self._parse_assignment() 3150 3151 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3152 return this 3153 3154 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3155 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3156 3157 return this 3158 3159 def _parse_index_params(self) -> exp.IndexParameters: 3160 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3161 3162 if self._match(TokenType.L_PAREN, advance=False): 3163 columns = self._parse_wrapped_csv(self._parse_with_operator) 3164 else: 3165 columns = None 3166 3167 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3168 partition_by = self._parse_partition_by() 3169 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3170 tablespace = ( 3171 self._parse_var(any_token=True) 3172 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3173 else None 3174 ) 3175 where = self._parse_where() 3176 3177 return self.expression( 3178 exp.IndexParameters, 3179 using=using, 3180 columns=columns, 3181 include=include, 3182 partition_by=partition_by, 3183 where=where, 3184 with_storage=with_storage, 3185 tablespace=tablespace, 3186 ) 3187 3188 def _parse_index( 3189 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3190 ) -> t.Optional[exp.Index]: 3191 if index or anonymous: 3192 unique = None 3193 primary = None 3194 amp = None 3195 3196 self._match(TokenType.ON) 3197 self._match(TokenType.TABLE) # hive 3198 table = self._parse_table_parts(schema=True) 3199 else: 3200 unique = self._match(TokenType.UNIQUE) 3201 primary = self._match_text_seq("PRIMARY") 3202 amp = self._match_text_seq("AMP") 3203 3204 if not self._match(TokenType.INDEX): 3205 return None 3206 3207 index = self._parse_id_var() 3208 table = None 3209 3210 params = self._parse_index_params() 3211 3212 return self.expression( 3213 exp.Index, 3214 this=index, 3215 table=table, 3216 unique=unique, 3217 primary=primary, 3218 amp=amp, 3219 params=params, 3220 ) 3221 3222 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3223 hints: t.List[exp.Expression] = [] 3224 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3225 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3226 hints.append( 3227 self.expression( 3228 exp.WithTableHint, 3229 expressions=self._parse_csv( 3230 lambda: self._parse_function() or self._parse_var(any_token=True) 3231 ), 3232 ) 3233 ) 3234 self._match_r_paren() 3235 else: 3236 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3237 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3238 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3239 3240 self._match_texts(("INDEX", "KEY")) 3241 if self._match(TokenType.FOR): 3242 hint.set("target", self._advance_any() and self._prev.text.upper()) 3243 3244 hint.set("expressions", self._parse_wrapped_id_vars()) 3245 hints.append(hint) 3246 3247 return hints or None 3248 3249 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3250 return ( 3251 (not schema and self._parse_function(optional_parens=False)) 3252 or self._parse_id_var(any_token=False) 3253 or self._parse_string_as_identifier() 3254 or self._parse_placeholder() 3255 ) 3256 3257 def _parse_table_parts( 3258 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3259 ) -> exp.Table: 3260 catalog = None 3261 db = None 3262 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3263 3264 while self._match(TokenType.DOT): 3265 if catalog: 3266 # This allows nesting the table in arbitrarily many dot expressions if needed 3267 table = self.expression( 3268 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3269 ) 3270 else: 3271 catalog = db 3272 db = table 3273 # "" used for tsql FROM a..b case 3274 table = self._parse_table_part(schema=schema) or "" 3275 3276 if ( 3277 wildcard 3278 and self._is_connected() 3279 and (isinstance(table, exp.Identifier) or not table) 3280 and self._match(TokenType.STAR) 3281 ): 3282 if isinstance(table, exp.Identifier): 3283 table.args["this"] += "*" 3284 else: 3285 table = exp.Identifier(this="*") 3286 3287 # We bubble up comments from the Identifier to the Table 3288 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3289 3290 if is_db_reference: 3291 catalog = db 3292 db = table 3293 table = None 3294 3295 if not table and not is_db_reference: 3296 self.raise_error(f"Expected table name but got {self._curr}") 3297 if not db and is_db_reference: 3298 self.raise_error(f"Expected database name but got {self._curr}") 3299 3300 return self.expression( 3301 exp.Table, 3302 comments=comments, 3303 this=table, 3304 db=db, 3305 catalog=catalog, 3306 pivots=self._parse_pivots(), 3307 ) 3308 3309 def _parse_table( 3310 self, 3311 schema: bool = False, 3312 joins: bool = False, 3313 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3314 parse_bracket: bool = False, 3315 is_db_reference: bool = False, 3316 parse_partition: bool = False, 3317 ) -> t.Optional[exp.Expression]: 3318 lateral = self._parse_lateral() 3319 if lateral: 3320 return lateral 3321 3322 unnest = self._parse_unnest() 3323 if unnest: 3324 return unnest 3325 3326 values = self._parse_derived_table_values() 3327 if values: 3328 return values 3329 3330 subquery = self._parse_select(table=True) 3331 if subquery: 3332 if not subquery.args.get("pivots"): 3333 subquery.set("pivots", self._parse_pivots()) 3334 return subquery 3335 3336 bracket = parse_bracket and self._parse_bracket(None) 3337 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3338 3339 only = self._match(TokenType.ONLY) 3340 3341 this = t.cast( 3342 exp.Expression, 3343 bracket 3344 or self._parse_bracket( 3345 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3346 ), 3347 ) 3348 3349 if only: 3350 this.set("only", only) 3351 3352 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3353 self._match_text_seq("*") 3354 3355 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3356 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3357 this.set("partition", self._parse_partition()) 3358 3359 if schema: 3360 return self._parse_schema(this=this) 3361 3362 version = self._parse_version() 3363 3364 if version: 3365 this.set("version", version) 3366 3367 if self.dialect.ALIAS_POST_TABLESAMPLE: 3368 table_sample = self._parse_table_sample() 3369 3370 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3371 if alias: 3372 this.set("alias", alias) 3373 3374 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3375 return self.expression( 3376 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3377 ) 3378 3379 this.set("hints", self._parse_table_hints()) 3380 3381 if not this.args.get("pivots"): 3382 this.set("pivots", self._parse_pivots()) 3383 3384 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3385 table_sample = self._parse_table_sample() 3386 3387 if table_sample: 3388 table_sample.set("this", this) 3389 this = table_sample 3390 3391 if joins: 3392 for join in self._parse_joins(): 3393 this.append("joins", join) 3394 3395 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3396 this.set("ordinality", True) 3397 this.set("alias", self._parse_table_alias()) 3398 3399 return this 3400 3401 def _parse_version(self) -> t.Optional[exp.Version]: 3402 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3403 this = "TIMESTAMP" 3404 elif self._match(TokenType.VERSION_SNAPSHOT): 3405 this = "VERSION" 3406 else: 3407 return None 3408 3409 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3410 kind = self._prev.text.upper() 3411 start = self._parse_bitwise() 3412 self._match_texts(("TO", "AND")) 3413 end = self._parse_bitwise() 3414 expression: t.Optional[exp.Expression] = self.expression( 3415 exp.Tuple, expressions=[start, end] 3416 ) 3417 elif self._match_text_seq("CONTAINED", "IN"): 3418 kind = "CONTAINED IN" 3419 expression = self.expression( 3420 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3421 ) 3422 elif self._match(TokenType.ALL): 3423 kind = "ALL" 3424 expression = None 3425 else: 3426 self._match_text_seq("AS", "OF") 3427 kind = "AS OF" 3428 expression = self._parse_type() 3429 3430 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3431 3432 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3433 if not self._match(TokenType.UNNEST): 3434 return None 3435 3436 expressions = self._parse_wrapped_csv(self._parse_equality) 3437 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3438 3439 alias = self._parse_table_alias() if with_alias else None 3440 3441 if alias: 3442 if self.dialect.UNNEST_COLUMN_ONLY: 3443 if alias.args.get("columns"): 3444 self.raise_error("Unexpected extra column alias in unnest.") 3445 3446 alias.set("columns", [alias.this]) 3447 alias.set("this", None) 3448 3449 columns = alias.args.get("columns") or [] 3450 if offset and len(expressions) < len(columns): 3451 offset = columns.pop() 3452 3453 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3454 self._match(TokenType.ALIAS) 3455 offset = self._parse_id_var( 3456 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3457 ) or exp.to_identifier("offset") 3458 3459 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3460 3461 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3462 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3463 if not is_derived and not self._match_text_seq("VALUES"): 3464 return None 3465 3466 expressions = self._parse_csv(self._parse_value) 3467 alias = self._parse_table_alias() 3468 3469 if is_derived: 3470 self._match_r_paren() 3471 3472 return self.expression( 3473 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3474 ) 3475 3476 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3477 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3478 as_modifier and self._match_text_seq("USING", "SAMPLE") 3479 ): 3480 return None 3481 3482 bucket_numerator = None 3483 bucket_denominator = None 3484 bucket_field = None 3485 percent = None 3486 size = None 3487 seed = None 3488 3489 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3490 matched_l_paren = self._match(TokenType.L_PAREN) 3491 3492 if self.TABLESAMPLE_CSV: 3493 num = None 3494 expressions = self._parse_csv(self._parse_primary) 3495 else: 3496 expressions = None 3497 num = ( 3498 self._parse_factor() 3499 if self._match(TokenType.NUMBER, advance=False) 3500 else self._parse_primary() or self._parse_placeholder() 3501 ) 3502 3503 if self._match_text_seq("BUCKET"): 3504 bucket_numerator = self._parse_number() 3505 self._match_text_seq("OUT", "OF") 3506 bucket_denominator = bucket_denominator = self._parse_number() 3507 self._match(TokenType.ON) 3508 bucket_field = self._parse_field() 3509 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3510 percent = num 3511 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3512 size = num 3513 else: 3514 percent = num 3515 3516 if matched_l_paren: 3517 self._match_r_paren() 3518 3519 if self._match(TokenType.L_PAREN): 3520 method = self._parse_var(upper=True) 3521 seed = self._match(TokenType.COMMA) and self._parse_number() 3522 self._match_r_paren() 3523 elif self._match_texts(("SEED", "REPEATABLE")): 3524 seed = self._parse_wrapped(self._parse_number) 3525 3526 if not method and self.DEFAULT_SAMPLING_METHOD: 3527 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3528 3529 return self.expression( 3530 exp.TableSample, 3531 expressions=expressions, 3532 method=method, 3533 bucket_numerator=bucket_numerator, 3534 bucket_denominator=bucket_denominator, 3535 bucket_field=bucket_field, 3536 percent=percent, 3537 size=size, 3538 seed=seed, 3539 ) 3540 3541 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3542 return list(iter(self._parse_pivot, None)) or None 3543 3544 def _parse_joins(self) -> t.Iterator[exp.Join]: 3545 return iter(self._parse_join, None) 3546 3547 # https://duckdb.org/docs/sql/statements/pivot 3548 def _parse_simplified_pivot(self) -> exp.Pivot: 3549 def _parse_on() -> t.Optional[exp.Expression]: 3550 this = self._parse_bitwise() 3551 return self._parse_in(this) if self._match(TokenType.IN) else this 3552 3553 this = self._parse_table() 3554 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3555 using = self._match(TokenType.USING) and self._parse_csv( 3556 lambda: self._parse_alias(self._parse_function()) 3557 ) 3558 group = self._parse_group() 3559 return self.expression( 3560 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3561 ) 3562 3563 def _parse_pivot_in(self) -> exp.In: 3564 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3565 this = self._parse_assignment() 3566 3567 self._match(TokenType.ALIAS) 3568 alias = self._parse_field() 3569 if alias: 3570 return self.expression(exp.PivotAlias, this=this, alias=alias) 3571 3572 return this 3573 3574 value = self._parse_column() 3575 3576 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3577 self.raise_error("Expecting IN (") 3578 3579 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3580 3581 self._match_r_paren() 3582 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3583 3584 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3585 index = self._index 3586 include_nulls = None 3587 3588 if self._match(TokenType.PIVOT): 3589 unpivot = False 3590 elif self._match(TokenType.UNPIVOT): 3591 unpivot = True 3592 3593 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3594 if self._match_text_seq("INCLUDE", "NULLS"): 3595 include_nulls = True 3596 elif self._match_text_seq("EXCLUDE", "NULLS"): 3597 include_nulls = False 3598 else: 3599 return None 3600 3601 expressions = [] 3602 3603 if not self._match(TokenType.L_PAREN): 3604 self._retreat(index) 3605 return None 3606 3607 if unpivot: 3608 expressions = self._parse_csv(self._parse_column) 3609 else: 3610 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3611 3612 if not expressions: 3613 self.raise_error("Failed to parse PIVOT's aggregation list") 3614 3615 if not self._match(TokenType.FOR): 3616 self.raise_error("Expecting FOR") 3617 3618 field = self._parse_pivot_in() 3619 3620 self._match_r_paren() 3621 3622 pivot = self.expression( 3623 exp.Pivot, 3624 expressions=expressions, 3625 field=field, 3626 unpivot=unpivot, 3627 include_nulls=include_nulls, 3628 ) 3629 3630 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3631 pivot.set("alias", self._parse_table_alias()) 3632 3633 if not unpivot: 3634 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3635 3636 columns: t.List[exp.Expression] = [] 3637 for fld in pivot.args["field"].expressions: 3638 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3639 for name in names: 3640 if self.PREFIXED_PIVOT_COLUMNS: 3641 name = f"{name}_{field_name}" if name else field_name 3642 else: 3643 name = f"{field_name}_{name}" if name else field_name 3644 3645 columns.append(exp.to_identifier(name)) 3646 3647 pivot.set("columns", columns) 3648 3649 return pivot 3650 3651 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3652 return [agg.alias for agg in aggregations] 3653 3654 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3655 if not skip_where_token and not self._match(TokenType.PREWHERE): 3656 return None 3657 3658 return self.expression( 3659 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3660 ) 3661 3662 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3663 if not skip_where_token and not self._match(TokenType.WHERE): 3664 return None 3665 3666 return self.expression( 3667 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3668 ) 3669 3670 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3671 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3672 return None 3673 3674 elements: t.Dict[str, t.Any] = defaultdict(list) 3675 3676 if self._match(TokenType.ALL): 3677 elements["all"] = True 3678 elif self._match(TokenType.DISTINCT): 3679 elements["all"] = False 3680 3681 while True: 3682 expressions = self._parse_csv( 3683 lambda: None 3684 if self._match(TokenType.ROLLUP, advance=False) 3685 else self._parse_assignment() 3686 ) 3687 if expressions: 3688 elements["expressions"].extend(expressions) 3689 3690 grouping_sets = self._parse_grouping_sets() 3691 if grouping_sets: 3692 elements["grouping_sets"].extend(grouping_sets) 3693 3694 rollup = None 3695 cube = None 3696 totals = None 3697 3698 index = self._index 3699 with_ = self._match(TokenType.WITH) 3700 if self._match(TokenType.ROLLUP): 3701 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3702 elements["rollup"].extend(ensure_list(rollup)) 3703 3704 if self._match(TokenType.CUBE): 3705 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3706 elements["cube"].extend(ensure_list(cube)) 3707 3708 if self._match_text_seq("TOTALS"): 3709 totals = True 3710 elements["totals"] = True # type: ignore 3711 3712 if not (grouping_sets or rollup or cube or totals): 3713 if with_: 3714 self._retreat(index) 3715 break 3716 3717 return self.expression(exp.Group, **elements) # type: ignore 3718 3719 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3720 if not self._match(TokenType.GROUPING_SETS): 3721 return None 3722 3723 return self._parse_wrapped_csv(self._parse_grouping_set) 3724 3725 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3726 if self._match(TokenType.L_PAREN): 3727 grouping_set = self._parse_csv(self._parse_column) 3728 self._match_r_paren() 3729 return self.expression(exp.Tuple, expressions=grouping_set) 3730 3731 return self._parse_column() 3732 3733 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3734 if not skip_having_token and not self._match(TokenType.HAVING): 3735 return None 3736 return self.expression(exp.Having, this=self._parse_assignment()) 3737 3738 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3739 if not self._match(TokenType.QUALIFY): 3740 return None 3741 return self.expression(exp.Qualify, this=self._parse_assignment()) 3742 3743 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3744 if skip_start_token: 3745 start = None 3746 elif self._match(TokenType.START_WITH): 3747 start = self._parse_assignment() 3748 else: 3749 return None 3750 3751 self._match(TokenType.CONNECT_BY) 3752 nocycle = self._match_text_seq("NOCYCLE") 3753 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3754 exp.Prior, this=self._parse_bitwise() 3755 ) 3756 connect = self._parse_assignment() 3757 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3758 3759 if not start and self._match(TokenType.START_WITH): 3760 start = self._parse_assignment() 3761 3762 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3763 3764 def _parse_name_as_expression(self) -> exp.Alias: 3765 return self.expression( 3766 exp.Alias, 3767 alias=self._parse_id_var(any_token=True), 3768 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3769 ) 3770 3771 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3772 if self._match_text_seq("INTERPOLATE"): 3773 return self._parse_wrapped_csv(self._parse_name_as_expression) 3774 return None 3775 3776 def _parse_order( 3777 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3778 ) -> t.Optional[exp.Expression]: 3779 siblings = None 3780 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3781 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3782 return this 3783 3784 siblings = True 3785 3786 return self.expression( 3787 exp.Order, 3788 this=this, 3789 expressions=self._parse_csv(self._parse_ordered), 3790 interpolate=self._parse_interpolate(), 3791 siblings=siblings, 3792 ) 3793 3794 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3795 if not self._match(token): 3796 return None 3797 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3798 3799 def _parse_ordered( 3800 self, parse_method: t.Optional[t.Callable] = None 3801 ) -> t.Optional[exp.Ordered]: 3802 this = parse_method() if parse_method else self._parse_assignment() 3803 if not this: 3804 return None 3805 3806 asc = self._match(TokenType.ASC) 3807 desc = self._match(TokenType.DESC) or (asc and False) 3808 3809 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3810 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3811 3812 nulls_first = is_nulls_first or False 3813 explicitly_null_ordered = is_nulls_first or is_nulls_last 3814 3815 if ( 3816 not explicitly_null_ordered 3817 and ( 3818 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3819 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3820 ) 3821 and self.dialect.NULL_ORDERING != "nulls_are_last" 3822 ): 3823 nulls_first = True 3824 3825 if self._match_text_seq("WITH", "FILL"): 3826 with_fill = self.expression( 3827 exp.WithFill, 3828 **{ # type: ignore 3829 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3830 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3831 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3832 }, 3833 ) 3834 else: 3835 with_fill = None 3836 3837 return self.expression( 3838 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3839 ) 3840 3841 def _parse_limit( 3842 self, 3843 this: t.Optional[exp.Expression] = None, 3844 top: bool = False, 3845 skip_limit_token: bool = False, 3846 ) -> t.Optional[exp.Expression]: 3847 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3848 comments = self._prev_comments 3849 if top: 3850 limit_paren = self._match(TokenType.L_PAREN) 3851 expression = self._parse_term() if limit_paren else self._parse_number() 3852 3853 if limit_paren: 3854 self._match_r_paren() 3855 else: 3856 expression = self._parse_term() 3857 3858 if self._match(TokenType.COMMA): 3859 offset = expression 3860 expression = self._parse_term() 3861 else: 3862 offset = None 3863 3864 limit_exp = self.expression( 3865 exp.Limit, 3866 this=this, 3867 expression=expression, 3868 offset=offset, 3869 comments=comments, 3870 expressions=self._parse_limit_by(), 3871 ) 3872 3873 return limit_exp 3874 3875 if self._match(TokenType.FETCH): 3876 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3877 direction = self._prev.text.upper() if direction else "FIRST" 3878 3879 count = self._parse_field(tokens=self.FETCH_TOKENS) 3880 percent = self._match(TokenType.PERCENT) 3881 3882 self._match_set((TokenType.ROW, TokenType.ROWS)) 3883 3884 only = self._match_text_seq("ONLY") 3885 with_ties = self._match_text_seq("WITH", "TIES") 3886 3887 if only and with_ties: 3888 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3889 3890 return self.expression( 3891 exp.Fetch, 3892 direction=direction, 3893 count=count, 3894 percent=percent, 3895 with_ties=with_ties, 3896 ) 3897 3898 return this 3899 3900 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3901 if not self._match(TokenType.OFFSET): 3902 return this 3903 3904 count = self._parse_term() 3905 self._match_set((TokenType.ROW, TokenType.ROWS)) 3906 3907 return self.expression( 3908 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3909 ) 3910 3911 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3912 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3913 3914 def _parse_locks(self) -> t.List[exp.Lock]: 3915 locks = [] 3916 while True: 3917 if self._match_text_seq("FOR", "UPDATE"): 3918 update = True 3919 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3920 "LOCK", "IN", "SHARE", "MODE" 3921 ): 3922 update = False 3923 else: 3924 break 3925 3926 expressions = None 3927 if self._match_text_seq("OF"): 3928 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3929 3930 wait: t.Optional[bool | exp.Expression] = None 3931 if self._match_text_seq("NOWAIT"): 3932 wait = True 3933 elif self._match_text_seq("WAIT"): 3934 wait = self._parse_primary() 3935 elif self._match_text_seq("SKIP", "LOCKED"): 3936 wait = False 3937 3938 locks.append( 3939 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3940 ) 3941 3942 return locks 3943 3944 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3945 while this and self._match_set(self.SET_OPERATIONS): 3946 token_type = self._prev.token_type 3947 3948 if token_type == TokenType.UNION: 3949 operation = exp.Union 3950 elif token_type == TokenType.EXCEPT: 3951 operation = exp.Except 3952 else: 3953 operation = exp.Intersect 3954 3955 comments = self._prev.comments 3956 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3957 by_name = self._match_text_seq("BY", "NAME") 3958 expression = self._parse_select(nested=True, parse_set_operation=False) 3959 3960 this = self.expression( 3961 operation, 3962 comments=comments, 3963 this=this, 3964 distinct=distinct, 3965 by_name=by_name, 3966 expression=expression, 3967 ) 3968 3969 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3970 expression = this.expression 3971 3972 if expression: 3973 for arg in self.UNION_MODIFIERS: 3974 expr = expression.args.get(arg) 3975 if expr: 3976 this.set(arg, expr.pop()) 3977 3978 return this 3979 3980 def _parse_expression(self) -> t.Optional[exp.Expression]: 3981 return self._parse_alias(self._parse_assignment()) 3982 3983 def _parse_assignment(self) -> t.Optional[exp.Expression]: 3984 this = self._parse_disjunction() 3985 3986 while self._match_set(self.ASSIGNMENT): 3987 this = self.expression( 3988 self.ASSIGNMENT[self._prev.token_type], 3989 this=this, 3990 comments=self._prev_comments, 3991 expression=self._parse_assignment(), 3992 ) 3993 3994 return this 3995 3996 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 3997 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 3998 3999 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4000 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4001 4002 def _parse_equality(self) -> t.Optional[exp.Expression]: 4003 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4004 4005 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4006 return self._parse_tokens(self._parse_range, self.COMPARISON) 4007 4008 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4009 this = this or self._parse_bitwise() 4010 negate = self._match(TokenType.NOT) 4011 4012 if self._match_set(self.RANGE_PARSERS): 4013 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4014 if not expression: 4015 return this 4016 4017 this = expression 4018 elif self._match(TokenType.ISNULL): 4019 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4020 4021 # Postgres supports ISNULL and NOTNULL for conditions. 4022 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4023 if self._match(TokenType.NOTNULL): 4024 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4025 this = self.expression(exp.Not, this=this) 4026 4027 if negate: 4028 this = self.expression(exp.Not, this=this) 4029 4030 if self._match(TokenType.IS): 4031 this = self._parse_is(this) 4032 4033 return this 4034 4035 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4036 index = self._index - 1 4037 negate = self._match(TokenType.NOT) 4038 4039 if self._match_text_seq("DISTINCT", "FROM"): 4040 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4041 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4042 4043 expression = self._parse_null() or self._parse_boolean() 4044 if not expression: 4045 self._retreat(index) 4046 return None 4047 4048 this = self.expression(exp.Is, this=this, expression=expression) 4049 return self.expression(exp.Not, this=this) if negate else this 4050 4051 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4052 unnest = self._parse_unnest(with_alias=False) 4053 if unnest: 4054 this = self.expression(exp.In, this=this, unnest=unnest) 4055 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4056 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4057 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4058 4059 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4060 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4061 else: 4062 this = self.expression(exp.In, this=this, expressions=expressions) 4063 4064 if matched_l_paren: 4065 self._match_r_paren(this) 4066 elif not self._match(TokenType.R_BRACKET, expression=this): 4067 self.raise_error("Expecting ]") 4068 else: 4069 this = self.expression(exp.In, this=this, field=self._parse_field()) 4070 4071 return this 4072 4073 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4074 low = self._parse_bitwise() 4075 self._match(TokenType.AND) 4076 high = self._parse_bitwise() 4077 return self.expression(exp.Between, this=this, low=low, high=high) 4078 4079 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4080 if not self._match(TokenType.ESCAPE): 4081 return this 4082 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4083 4084 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4085 index = self._index 4086 4087 if not self._match(TokenType.INTERVAL) and match_interval: 4088 return None 4089 4090 if self._match(TokenType.STRING, advance=False): 4091 this = self._parse_primary() 4092 else: 4093 this = self._parse_term() 4094 4095 if not this or ( 4096 isinstance(this, exp.Column) 4097 and not this.table 4098 and not this.this.quoted 4099 and this.name.upper() == "IS" 4100 ): 4101 self._retreat(index) 4102 return None 4103 4104 unit = self._parse_function() or ( 4105 not self._match(TokenType.ALIAS, advance=False) 4106 and self._parse_var(any_token=True, upper=True) 4107 ) 4108 4109 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4110 # each INTERVAL expression into this canonical form so it's easy to transpile 4111 if this and this.is_number: 4112 this = exp.Literal.string(this.name) 4113 elif this and this.is_string: 4114 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4115 if len(parts) == 1: 4116 if unit: 4117 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4118 self._retreat(self._index - 1) 4119 4120 this = exp.Literal.string(parts[0][0]) 4121 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4122 4123 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4124 unit = self.expression( 4125 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4126 ) 4127 4128 interval = self.expression(exp.Interval, this=this, unit=unit) 4129 4130 index = self._index 4131 self._match(TokenType.PLUS) 4132 4133 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4134 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4135 return self.expression( 4136 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4137 ) 4138 4139 self._retreat(index) 4140 return interval 4141 4142 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4143 this = self._parse_term() 4144 4145 while True: 4146 if self._match_set(self.BITWISE): 4147 this = self.expression( 4148 self.BITWISE[self._prev.token_type], 4149 this=this, 4150 expression=self._parse_term(), 4151 ) 4152 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4153 this = self.expression( 4154 exp.DPipe, 4155 this=this, 4156 expression=self._parse_term(), 4157 safe=not self.dialect.STRICT_STRING_CONCAT, 4158 ) 4159 elif self._match(TokenType.DQMARK): 4160 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4161 elif self._match_pair(TokenType.LT, TokenType.LT): 4162 this = self.expression( 4163 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4164 ) 4165 elif self._match_pair(TokenType.GT, TokenType.GT): 4166 this = self.expression( 4167 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4168 ) 4169 else: 4170 break 4171 4172 return this 4173 4174 def _parse_term(self) -> t.Optional[exp.Expression]: 4175 return self._parse_tokens(self._parse_factor, self.TERM) 4176 4177 def _parse_factor(self) -> t.Optional[exp.Expression]: 4178 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4179 this = parse_method() 4180 4181 while self._match_set(self.FACTOR): 4182 klass = self.FACTOR[self._prev.token_type] 4183 comments = self._prev_comments 4184 expression = parse_method() 4185 4186 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4187 self._retreat(self._index - 1) 4188 return this 4189 4190 this = self.expression(klass, this=this, comments=comments, expression=expression) 4191 4192 if isinstance(this, exp.Div): 4193 this.args["typed"] = self.dialect.TYPED_DIVISION 4194 this.args["safe"] = self.dialect.SAFE_DIVISION 4195 4196 return this 4197 4198 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4199 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4200 4201 def _parse_unary(self) -> t.Optional[exp.Expression]: 4202 if self._match_set(self.UNARY_PARSERS): 4203 return self.UNARY_PARSERS[self._prev.token_type](self) 4204 return self._parse_at_time_zone(self._parse_type()) 4205 4206 def _parse_type( 4207 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4208 ) -> t.Optional[exp.Expression]: 4209 interval = parse_interval and self._parse_interval() 4210 if interval: 4211 return interval 4212 4213 index = self._index 4214 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4215 4216 if data_type: 4217 index2 = self._index 4218 this = self._parse_primary() 4219 4220 if isinstance(this, exp.Literal): 4221 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4222 if parser: 4223 return parser(self, this, data_type) 4224 4225 return self.expression(exp.Cast, this=this, to=data_type) 4226 4227 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4228 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4229 # 4230 # If the index difference here is greater than 1, that means the parser itself must have 4231 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4232 # 4233 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4234 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4235 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4236 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4237 # 4238 # In these cases, we don't really want to return the converted type, but instead retreat 4239 # and try to parse a Column or Identifier in the section below. 4240 if data_type.expressions and index2 - index > 1: 4241 self._retreat(index2) 4242 return self._parse_column_ops(data_type) 4243 4244 self._retreat(index) 4245 4246 if fallback_to_identifier: 4247 return self._parse_id_var() 4248 4249 this = self._parse_column() 4250 return this and self._parse_column_ops(this) 4251 4252 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4253 this = self._parse_type() 4254 if not this: 4255 return None 4256 4257 if isinstance(this, exp.Column) and not this.table: 4258 this = exp.var(this.name.upper()) 4259 4260 return self.expression( 4261 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4262 ) 4263 4264 def _parse_types( 4265 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4266 ) -> t.Optional[exp.Expression]: 4267 index = self._index 4268 4269 this: t.Optional[exp.Expression] = None 4270 prefix = self._match_text_seq("SYSUDTLIB", ".") 4271 4272 if not self._match_set(self.TYPE_TOKENS): 4273 identifier = allow_identifiers and self._parse_id_var( 4274 any_token=False, tokens=(TokenType.VAR,) 4275 ) 4276 if identifier: 4277 tokens = self.dialect.tokenize(identifier.name) 4278 4279 if len(tokens) != 1: 4280 self.raise_error("Unexpected identifier", self._prev) 4281 4282 if tokens[0].token_type in self.TYPE_TOKENS: 4283 self._prev = tokens[0] 4284 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4285 type_name = identifier.name 4286 4287 while self._match(TokenType.DOT): 4288 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4289 4290 this = exp.DataType.build(type_name, udt=True) 4291 else: 4292 self._retreat(self._index - 1) 4293 return None 4294 else: 4295 return None 4296 4297 type_token = self._prev.token_type 4298 4299 if type_token == TokenType.PSEUDO_TYPE: 4300 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4301 4302 if type_token == TokenType.OBJECT_IDENTIFIER: 4303 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4304 4305 nested = type_token in self.NESTED_TYPE_TOKENS 4306 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4307 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4308 expressions = None 4309 maybe_func = False 4310 4311 if self._match(TokenType.L_PAREN): 4312 if is_struct: 4313 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4314 elif nested: 4315 expressions = self._parse_csv( 4316 lambda: self._parse_types( 4317 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4318 ) 4319 ) 4320 elif type_token in self.ENUM_TYPE_TOKENS: 4321 expressions = self._parse_csv(self._parse_equality) 4322 elif is_aggregate: 4323 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4324 any_token=False, tokens=(TokenType.VAR,) 4325 ) 4326 if not func_or_ident or not self._match(TokenType.COMMA): 4327 return None 4328 expressions = self._parse_csv( 4329 lambda: self._parse_types( 4330 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4331 ) 4332 ) 4333 expressions.insert(0, func_or_ident) 4334 else: 4335 expressions = self._parse_csv(self._parse_type_size) 4336 4337 if not expressions or not self._match(TokenType.R_PAREN): 4338 self._retreat(index) 4339 return None 4340 4341 maybe_func = True 4342 4343 values: t.Optional[t.List[exp.Expression]] = None 4344 4345 if nested and self._match(TokenType.LT): 4346 if is_struct: 4347 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4348 else: 4349 expressions = self._parse_csv( 4350 lambda: self._parse_types( 4351 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4352 ) 4353 ) 4354 4355 if not self._match(TokenType.GT): 4356 self.raise_error("Expecting >") 4357 4358 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4359 values = self._parse_csv(self._parse_assignment) 4360 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4361 4362 if type_token in self.TIMESTAMPS: 4363 if self._match_text_seq("WITH", "TIME", "ZONE"): 4364 maybe_func = False 4365 tz_type = ( 4366 exp.DataType.Type.TIMETZ 4367 if type_token in self.TIMES 4368 else exp.DataType.Type.TIMESTAMPTZ 4369 ) 4370 this = exp.DataType(this=tz_type, expressions=expressions) 4371 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4372 maybe_func = False 4373 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4374 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4375 maybe_func = False 4376 elif type_token == TokenType.INTERVAL: 4377 unit = self._parse_var(upper=True) 4378 if unit: 4379 if self._match_text_seq("TO"): 4380 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4381 4382 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4383 else: 4384 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4385 4386 if maybe_func and check_func: 4387 index2 = self._index 4388 peek = self._parse_string() 4389 4390 if not peek: 4391 self._retreat(index) 4392 return None 4393 4394 self._retreat(index2) 4395 4396 if not this: 4397 if self._match_text_seq("UNSIGNED"): 4398 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4399 if not unsigned_type_token: 4400 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4401 4402 type_token = unsigned_type_token or type_token 4403 4404 this = exp.DataType( 4405 this=exp.DataType.Type[type_token.value], 4406 expressions=expressions, 4407 nested=nested, 4408 values=values, 4409 prefix=prefix, 4410 ) 4411 elif expressions: 4412 this.set("expressions", expressions) 4413 4414 index = self._index 4415 4416 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4417 matched_array = self._match(TokenType.ARRAY) 4418 4419 while self._curr: 4420 matched_l_bracket = self._match(TokenType.L_BRACKET) 4421 if not matched_l_bracket and not matched_array: 4422 break 4423 4424 matched_array = False 4425 values = self._parse_csv(self._parse_assignment) or None 4426 if values and not schema: 4427 self._retreat(index) 4428 break 4429 4430 this = exp.DataType( 4431 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4432 ) 4433 self._match(TokenType.R_BRACKET) 4434 4435 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4436 converter = self.TYPE_CONVERTER.get(this.this) 4437 if converter: 4438 this = converter(t.cast(exp.DataType, this)) 4439 4440 return this 4441 4442 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4443 index = self._index 4444 this = ( 4445 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4446 or self._parse_id_var() 4447 ) 4448 self._match(TokenType.COLON) 4449 4450 if ( 4451 type_required 4452 and not isinstance(this, exp.DataType) 4453 and not self._match_set(self.TYPE_TOKENS, advance=False) 4454 ): 4455 self._retreat(index) 4456 return self._parse_types() 4457 4458 return self._parse_column_def(this) 4459 4460 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4461 if not self._match_text_seq("AT", "TIME", "ZONE"): 4462 return this 4463 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4464 4465 def _parse_column(self) -> t.Optional[exp.Expression]: 4466 this = self._parse_column_reference() 4467 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4468 4469 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4470 this = self._parse_field() 4471 if ( 4472 not this 4473 and self._match(TokenType.VALUES, advance=False) 4474 and self.VALUES_FOLLOWED_BY_PAREN 4475 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4476 ): 4477 this = self._parse_id_var() 4478 4479 if isinstance(this, exp.Identifier): 4480 # We bubble up comments from the Identifier to the Column 4481 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4482 4483 return this 4484 4485 def _parse_colon_as_json_extract( 4486 self, this: t.Optional[exp.Expression] 4487 ) -> t.Optional[exp.Expression]: 4488 casts = [] 4489 json_path = [] 4490 4491 while self._match(TokenType.COLON): 4492 start_index = self._index 4493 path = self._parse_column_ops(self._parse_field(any_token=True)) 4494 4495 # The cast :: operator has a lower precedence than the extraction operator :, so 4496 # we rearrange the AST appropriately to avoid casting the JSON path 4497 while isinstance(path, exp.Cast): 4498 casts.append(path.to) 4499 path = path.this 4500 4501 if casts: 4502 dcolon_offset = next( 4503 i 4504 for i, t in enumerate(self._tokens[start_index:]) 4505 if t.token_type == TokenType.DCOLON 4506 ) 4507 end_token = self._tokens[start_index + dcolon_offset - 1] 4508 else: 4509 end_token = self._prev 4510 4511 if path: 4512 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4513 4514 if json_path: 4515 this = self.expression( 4516 exp.JSONExtract, 4517 this=this, 4518 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4519 ) 4520 4521 while casts: 4522 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4523 4524 return this 4525 4526 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4527 this = self._parse_bracket(this) 4528 4529 while self._match_set(self.COLUMN_OPERATORS): 4530 op_token = self._prev.token_type 4531 op = self.COLUMN_OPERATORS.get(op_token) 4532 4533 if op_token == TokenType.DCOLON: 4534 field = self._parse_types() 4535 if not field: 4536 self.raise_error("Expected type") 4537 elif op and self._curr: 4538 field = self._parse_column_reference() 4539 else: 4540 field = self._parse_field(any_token=True, anonymous_func=True) 4541 4542 if isinstance(field, exp.Func) and this: 4543 # bigquery allows function calls like x.y.count(...) 4544 # SAFE.SUBSTR(...) 4545 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4546 this = exp.replace_tree( 4547 this, 4548 lambda n: ( 4549 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4550 if n.table 4551 else n.this 4552 ) 4553 if isinstance(n, exp.Column) 4554 else n, 4555 ) 4556 4557 if op: 4558 this = op(self, this, field) 4559 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4560 this = self.expression( 4561 exp.Column, 4562 this=field, 4563 table=this.this, 4564 db=this.args.get("table"), 4565 catalog=this.args.get("db"), 4566 ) 4567 else: 4568 this = self.expression(exp.Dot, this=this, expression=field) 4569 4570 this = self._parse_bracket(this) 4571 4572 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4573 4574 def _parse_primary(self) -> t.Optional[exp.Expression]: 4575 if self._match_set(self.PRIMARY_PARSERS): 4576 token_type = self._prev.token_type 4577 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4578 4579 if token_type == TokenType.STRING: 4580 expressions = [primary] 4581 while self._match(TokenType.STRING): 4582 expressions.append(exp.Literal.string(self._prev.text)) 4583 4584 if len(expressions) > 1: 4585 return self.expression(exp.Concat, expressions=expressions) 4586 4587 return primary 4588 4589 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4590 return exp.Literal.number(f"0.{self._prev.text}") 4591 4592 if self._match(TokenType.L_PAREN): 4593 comments = self._prev_comments 4594 query = self._parse_select() 4595 4596 if query: 4597 expressions = [query] 4598 else: 4599 expressions = self._parse_expressions() 4600 4601 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4602 4603 if not this and self._match(TokenType.R_PAREN, advance=False): 4604 this = self.expression(exp.Tuple) 4605 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4606 this = self._parse_subquery(this=this, parse_alias=False) 4607 elif isinstance(this, exp.Subquery): 4608 this = self._parse_subquery( 4609 this=self._parse_set_operations(this), parse_alias=False 4610 ) 4611 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4612 this = self.expression(exp.Tuple, expressions=expressions) 4613 else: 4614 this = self.expression(exp.Paren, this=this) 4615 4616 if this: 4617 this.add_comments(comments) 4618 4619 self._match_r_paren(expression=this) 4620 return this 4621 4622 return None 4623 4624 def _parse_field( 4625 self, 4626 any_token: bool = False, 4627 tokens: t.Optional[t.Collection[TokenType]] = None, 4628 anonymous_func: bool = False, 4629 ) -> t.Optional[exp.Expression]: 4630 if anonymous_func: 4631 field = ( 4632 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4633 or self._parse_primary() 4634 ) 4635 else: 4636 field = self._parse_primary() or self._parse_function( 4637 anonymous=anonymous_func, any_token=any_token 4638 ) 4639 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4640 4641 def _parse_function( 4642 self, 4643 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4644 anonymous: bool = False, 4645 optional_parens: bool = True, 4646 any_token: bool = False, 4647 ) -> t.Optional[exp.Expression]: 4648 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4649 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4650 fn_syntax = False 4651 if ( 4652 self._match(TokenType.L_BRACE, advance=False) 4653 and self._next 4654 and self._next.text.upper() == "FN" 4655 ): 4656 self._advance(2) 4657 fn_syntax = True 4658 4659 func = self._parse_function_call( 4660 functions=functions, 4661 anonymous=anonymous, 4662 optional_parens=optional_parens, 4663 any_token=any_token, 4664 ) 4665 4666 if fn_syntax: 4667 self._match(TokenType.R_BRACE) 4668 4669 return func 4670 4671 def _parse_function_call( 4672 self, 4673 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4674 anonymous: bool = False, 4675 optional_parens: bool = True, 4676 any_token: bool = False, 4677 ) -> t.Optional[exp.Expression]: 4678 if not self._curr: 4679 return None 4680 4681 comments = self._curr.comments 4682 token_type = self._curr.token_type 4683 this = self._curr.text 4684 upper = this.upper() 4685 4686 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4687 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4688 self._advance() 4689 return self._parse_window(parser(self)) 4690 4691 if not self._next or self._next.token_type != TokenType.L_PAREN: 4692 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4693 self._advance() 4694 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4695 4696 return None 4697 4698 if any_token: 4699 if token_type in self.RESERVED_TOKENS: 4700 return None 4701 elif token_type not in self.FUNC_TOKENS: 4702 return None 4703 4704 self._advance(2) 4705 4706 parser = self.FUNCTION_PARSERS.get(upper) 4707 if parser and not anonymous: 4708 this = parser(self) 4709 else: 4710 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4711 4712 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4713 this = self.expression(subquery_predicate, this=self._parse_select()) 4714 self._match_r_paren() 4715 return this 4716 4717 if functions is None: 4718 functions = self.FUNCTIONS 4719 4720 function = functions.get(upper) 4721 4722 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4723 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4724 4725 if alias: 4726 args = self._kv_to_prop_eq(args) 4727 4728 if function and not anonymous: 4729 if "dialect" in function.__code__.co_varnames: 4730 func = function(args, dialect=self.dialect) 4731 else: 4732 func = function(args) 4733 4734 func = self.validate_expression(func, args) 4735 if not self.dialect.NORMALIZE_FUNCTIONS: 4736 func.meta["name"] = this 4737 4738 this = func 4739 else: 4740 if token_type == TokenType.IDENTIFIER: 4741 this = exp.Identifier(this=this, quoted=True) 4742 this = self.expression(exp.Anonymous, this=this, expressions=args) 4743 4744 if isinstance(this, exp.Expression): 4745 this.add_comments(comments) 4746 4747 self._match_r_paren(this) 4748 return self._parse_window(this) 4749 4750 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4751 transformed = [] 4752 4753 for e in expressions: 4754 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4755 if isinstance(e, exp.Alias): 4756 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4757 4758 if not isinstance(e, exp.PropertyEQ): 4759 e = self.expression( 4760 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4761 ) 4762 4763 if isinstance(e.this, exp.Column): 4764 e.this.replace(e.this.this) 4765 4766 transformed.append(e) 4767 4768 return transformed 4769 4770 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4771 return self._parse_column_def(self._parse_id_var()) 4772 4773 def _parse_user_defined_function( 4774 self, kind: t.Optional[TokenType] = None 4775 ) -> t.Optional[exp.Expression]: 4776 this = self._parse_id_var() 4777 4778 while self._match(TokenType.DOT): 4779 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4780 4781 if not self._match(TokenType.L_PAREN): 4782 return this 4783 4784 expressions = self._parse_csv(self._parse_function_parameter) 4785 self._match_r_paren() 4786 return self.expression( 4787 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4788 ) 4789 4790 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4791 literal = self._parse_primary() 4792 if literal: 4793 return self.expression(exp.Introducer, this=token.text, expression=literal) 4794 4795 return self.expression(exp.Identifier, this=token.text) 4796 4797 def _parse_session_parameter(self) -> exp.SessionParameter: 4798 kind = None 4799 this = self._parse_id_var() or self._parse_primary() 4800 4801 if this and self._match(TokenType.DOT): 4802 kind = this.name 4803 this = self._parse_var() or self._parse_primary() 4804 4805 return self.expression(exp.SessionParameter, this=this, kind=kind) 4806 4807 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4808 return self._parse_id_var() 4809 4810 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4811 index = self._index 4812 4813 if self._match(TokenType.L_PAREN): 4814 expressions = t.cast( 4815 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4816 ) 4817 4818 if not self._match(TokenType.R_PAREN): 4819 self._retreat(index) 4820 else: 4821 expressions = [self._parse_lambda_arg()] 4822 4823 if self._match_set(self.LAMBDAS): 4824 return self.LAMBDAS[self._prev.token_type](self, expressions) 4825 4826 self._retreat(index) 4827 4828 this: t.Optional[exp.Expression] 4829 4830 if self._match(TokenType.DISTINCT): 4831 this = self.expression( 4832 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4833 ) 4834 else: 4835 this = self._parse_select_or_expression(alias=alias) 4836 4837 return self._parse_limit( 4838 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4839 ) 4840 4841 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4842 index = self._index 4843 if not self._match(TokenType.L_PAREN): 4844 return this 4845 4846 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4847 # expr can be of both types 4848 if self._match_set(self.SELECT_START_TOKENS): 4849 self._retreat(index) 4850 return this 4851 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4852 self._match_r_paren() 4853 return self.expression(exp.Schema, this=this, expressions=args) 4854 4855 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4856 return self._parse_column_def(self._parse_field(any_token=True)) 4857 4858 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4859 # column defs are not really columns, they're identifiers 4860 if isinstance(this, exp.Column): 4861 this = this.this 4862 4863 kind = self._parse_types(schema=True) 4864 4865 if self._match_text_seq("FOR", "ORDINALITY"): 4866 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4867 4868 constraints: t.List[exp.Expression] = [] 4869 4870 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4871 ("ALIAS", "MATERIALIZED") 4872 ): 4873 persisted = self._prev.text.upper() == "MATERIALIZED" 4874 constraints.append( 4875 self.expression( 4876 exp.ComputedColumnConstraint, 4877 this=self._parse_assignment(), 4878 persisted=persisted or self._match_text_seq("PERSISTED"), 4879 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4880 ) 4881 ) 4882 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4883 self._match(TokenType.ALIAS) 4884 constraints.append( 4885 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4886 ) 4887 4888 while True: 4889 constraint = self._parse_column_constraint() 4890 if not constraint: 4891 break 4892 constraints.append(constraint) 4893 4894 if not kind and not constraints: 4895 return this 4896 4897 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4898 4899 def _parse_auto_increment( 4900 self, 4901 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4902 start = None 4903 increment = None 4904 4905 if self._match(TokenType.L_PAREN, advance=False): 4906 args = self._parse_wrapped_csv(self._parse_bitwise) 4907 start = seq_get(args, 0) 4908 increment = seq_get(args, 1) 4909 elif self._match_text_seq("START"): 4910 start = self._parse_bitwise() 4911 self._match_text_seq("INCREMENT") 4912 increment = self._parse_bitwise() 4913 4914 if start and increment: 4915 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4916 4917 return exp.AutoIncrementColumnConstraint() 4918 4919 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4920 if not self._match_text_seq("REFRESH"): 4921 self._retreat(self._index - 1) 4922 return None 4923 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4924 4925 def _parse_compress(self) -> exp.CompressColumnConstraint: 4926 if self._match(TokenType.L_PAREN, advance=False): 4927 return self.expression( 4928 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4929 ) 4930 4931 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4932 4933 def _parse_generated_as_identity( 4934 self, 4935 ) -> ( 4936 exp.GeneratedAsIdentityColumnConstraint 4937 | exp.ComputedColumnConstraint 4938 | exp.GeneratedAsRowColumnConstraint 4939 ): 4940 if self._match_text_seq("BY", "DEFAULT"): 4941 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4942 this = self.expression( 4943 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4944 ) 4945 else: 4946 self._match_text_seq("ALWAYS") 4947 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4948 4949 self._match(TokenType.ALIAS) 4950 4951 if self._match_text_seq("ROW"): 4952 start = self._match_text_seq("START") 4953 if not start: 4954 self._match(TokenType.END) 4955 hidden = self._match_text_seq("HIDDEN") 4956 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4957 4958 identity = self._match_text_seq("IDENTITY") 4959 4960 if self._match(TokenType.L_PAREN): 4961 if self._match(TokenType.START_WITH): 4962 this.set("start", self._parse_bitwise()) 4963 if self._match_text_seq("INCREMENT", "BY"): 4964 this.set("increment", self._parse_bitwise()) 4965 if self._match_text_seq("MINVALUE"): 4966 this.set("minvalue", self._parse_bitwise()) 4967 if self._match_text_seq("MAXVALUE"): 4968 this.set("maxvalue", self._parse_bitwise()) 4969 4970 if self._match_text_seq("CYCLE"): 4971 this.set("cycle", True) 4972 elif self._match_text_seq("NO", "CYCLE"): 4973 this.set("cycle", False) 4974 4975 if not identity: 4976 this.set("expression", self._parse_range()) 4977 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4978 args = self._parse_csv(self._parse_bitwise) 4979 this.set("start", seq_get(args, 0)) 4980 this.set("increment", seq_get(args, 1)) 4981 4982 self._match_r_paren() 4983 4984 return this 4985 4986 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4987 self._match_text_seq("LENGTH") 4988 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4989 4990 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4991 if self._match_text_seq("NULL"): 4992 return self.expression(exp.NotNullColumnConstraint) 4993 if self._match_text_seq("CASESPECIFIC"): 4994 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4995 if self._match_text_seq("FOR", "REPLICATION"): 4996 return self.expression(exp.NotForReplicationColumnConstraint) 4997 return None 4998 4999 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5000 if self._match(TokenType.CONSTRAINT): 5001 this = self._parse_id_var() 5002 else: 5003 this = None 5004 5005 if self._match_texts(self.CONSTRAINT_PARSERS): 5006 return self.expression( 5007 exp.ColumnConstraint, 5008 this=this, 5009 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5010 ) 5011 5012 return this 5013 5014 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5015 if not self._match(TokenType.CONSTRAINT): 5016 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5017 5018 return self.expression( 5019 exp.Constraint, 5020 this=self._parse_id_var(), 5021 expressions=self._parse_unnamed_constraints(), 5022 ) 5023 5024 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5025 constraints = [] 5026 while True: 5027 constraint = self._parse_unnamed_constraint() or self._parse_function() 5028 if not constraint: 5029 break 5030 constraints.append(constraint) 5031 5032 return constraints 5033 5034 def _parse_unnamed_constraint( 5035 self, constraints: t.Optional[t.Collection[str]] = None 5036 ) -> t.Optional[exp.Expression]: 5037 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5038 constraints or self.CONSTRAINT_PARSERS 5039 ): 5040 return None 5041 5042 constraint = self._prev.text.upper() 5043 if constraint not in self.CONSTRAINT_PARSERS: 5044 self.raise_error(f"No parser found for schema constraint {constraint}.") 5045 5046 return self.CONSTRAINT_PARSERS[constraint](self) 5047 5048 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5049 self._match_text_seq("KEY") 5050 return self.expression( 5051 exp.UniqueColumnConstraint, 5052 this=self._parse_schema(self._parse_id_var(any_token=False)), 5053 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5054 on_conflict=self._parse_on_conflict(), 5055 ) 5056 5057 def _parse_key_constraint_options(self) -> t.List[str]: 5058 options = [] 5059 while True: 5060 if not self._curr: 5061 break 5062 5063 if self._match(TokenType.ON): 5064 action = None 5065 on = self._advance_any() and self._prev.text 5066 5067 if self._match_text_seq("NO", "ACTION"): 5068 action = "NO ACTION" 5069 elif self._match_text_seq("CASCADE"): 5070 action = "CASCADE" 5071 elif self._match_text_seq("RESTRICT"): 5072 action = "RESTRICT" 5073 elif self._match_pair(TokenType.SET, TokenType.NULL): 5074 action = "SET NULL" 5075 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5076 action = "SET DEFAULT" 5077 else: 5078 self.raise_error("Invalid key constraint") 5079 5080 options.append(f"ON {on} {action}") 5081 elif self._match_text_seq("NOT", "ENFORCED"): 5082 options.append("NOT ENFORCED") 5083 elif self._match_text_seq("DEFERRABLE"): 5084 options.append("DEFERRABLE") 5085 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5086 options.append("INITIALLY DEFERRED") 5087 elif self._match_text_seq("NORELY"): 5088 options.append("NORELY") 5089 elif self._match_text_seq("MATCH", "FULL"): 5090 options.append("MATCH FULL") 5091 else: 5092 break 5093 5094 return options 5095 5096 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5097 if match and not self._match(TokenType.REFERENCES): 5098 return None 5099 5100 expressions = None 5101 this = self._parse_table(schema=True) 5102 options = self._parse_key_constraint_options() 5103 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5104 5105 def _parse_foreign_key(self) -> exp.ForeignKey: 5106 expressions = self._parse_wrapped_id_vars() 5107 reference = self._parse_references() 5108 options = {} 5109 5110 while self._match(TokenType.ON): 5111 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5112 self.raise_error("Expected DELETE or UPDATE") 5113 5114 kind = self._prev.text.lower() 5115 5116 if self._match_text_seq("NO", "ACTION"): 5117 action = "NO ACTION" 5118 elif self._match(TokenType.SET): 5119 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5120 action = "SET " + self._prev.text.upper() 5121 else: 5122 self._advance() 5123 action = self._prev.text.upper() 5124 5125 options[kind] = action 5126 5127 return self.expression( 5128 exp.ForeignKey, 5129 expressions=expressions, 5130 reference=reference, 5131 **options, # type: ignore 5132 ) 5133 5134 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5135 return self._parse_field() 5136 5137 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5138 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5139 self._retreat(self._index - 1) 5140 return None 5141 5142 id_vars = self._parse_wrapped_id_vars() 5143 return self.expression( 5144 exp.PeriodForSystemTimeConstraint, 5145 this=seq_get(id_vars, 0), 5146 expression=seq_get(id_vars, 1), 5147 ) 5148 5149 def _parse_primary_key( 5150 self, wrapped_optional: bool = False, in_props: bool = False 5151 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5152 desc = ( 5153 self._match_set((TokenType.ASC, TokenType.DESC)) 5154 and self._prev.token_type == TokenType.DESC 5155 ) 5156 5157 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5158 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5159 5160 expressions = self._parse_wrapped_csv( 5161 self._parse_primary_key_part, optional=wrapped_optional 5162 ) 5163 options = self._parse_key_constraint_options() 5164 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5165 5166 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5167 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5168 5169 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5170 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5171 return this 5172 5173 bracket_kind = self._prev.token_type 5174 expressions = self._parse_csv( 5175 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5176 ) 5177 5178 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5179 self.raise_error("Expected ]") 5180 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5181 self.raise_error("Expected }") 5182 5183 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5184 if bracket_kind == TokenType.L_BRACE: 5185 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5186 elif not this or this.name.upper() == "ARRAY": 5187 this = self.expression(exp.Array, expressions=expressions) 5188 else: 5189 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5190 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5191 5192 self._add_comments(this) 5193 return self._parse_bracket(this) 5194 5195 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5196 if self._match(TokenType.COLON): 5197 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5198 return this 5199 5200 def _parse_case(self) -> t.Optional[exp.Expression]: 5201 ifs = [] 5202 default = None 5203 5204 comments = self._prev_comments 5205 expression = self._parse_assignment() 5206 5207 while self._match(TokenType.WHEN): 5208 this = self._parse_assignment() 5209 self._match(TokenType.THEN) 5210 then = self._parse_assignment() 5211 ifs.append(self.expression(exp.If, this=this, true=then)) 5212 5213 if self._match(TokenType.ELSE): 5214 default = self._parse_assignment() 5215 5216 if not self._match(TokenType.END): 5217 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5218 default = exp.column("interval") 5219 else: 5220 self.raise_error("Expected END after CASE", self._prev) 5221 5222 return self.expression( 5223 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5224 ) 5225 5226 def _parse_if(self) -> t.Optional[exp.Expression]: 5227 if self._match(TokenType.L_PAREN): 5228 args = self._parse_csv(self._parse_assignment) 5229 this = self.validate_expression(exp.If.from_arg_list(args), args) 5230 self._match_r_paren() 5231 else: 5232 index = self._index - 1 5233 5234 if self.NO_PAREN_IF_COMMANDS and index == 0: 5235 return self._parse_as_command(self._prev) 5236 5237 condition = self._parse_assignment() 5238 5239 if not condition: 5240 self._retreat(index) 5241 return None 5242 5243 self._match(TokenType.THEN) 5244 true = self._parse_assignment() 5245 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5246 self._match(TokenType.END) 5247 this = self.expression(exp.If, this=condition, true=true, false=false) 5248 5249 return this 5250 5251 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5252 if not self._match_text_seq("VALUE", "FOR"): 5253 self._retreat(self._index - 1) 5254 return None 5255 5256 return self.expression( 5257 exp.NextValueFor, 5258 this=self._parse_column(), 5259 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5260 ) 5261 5262 def _parse_extract(self) -> exp.Extract: 5263 this = self._parse_function() or self._parse_var() or self._parse_type() 5264 5265 if self._match(TokenType.FROM): 5266 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5267 5268 if not self._match(TokenType.COMMA): 5269 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5270 5271 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5272 5273 def _parse_gap_fill(self) -> exp.GapFill: 5274 self._match(TokenType.TABLE) 5275 this = self._parse_table() 5276 5277 self._match(TokenType.COMMA) 5278 args = [this, *self._parse_csv(lambda: self._parse_lambda())] 5279 5280 gap_fill = exp.GapFill.from_arg_list(args) 5281 return self.validate_expression(gap_fill, args) 5282 5283 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5284 this = self._parse_assignment() 5285 5286 if not self._match(TokenType.ALIAS): 5287 if self._match(TokenType.COMMA): 5288 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5289 5290 self.raise_error("Expected AS after CAST") 5291 5292 fmt = None 5293 to = self._parse_types() 5294 5295 if self._match(TokenType.FORMAT): 5296 fmt_string = self._parse_string() 5297 fmt = self._parse_at_time_zone(fmt_string) 5298 5299 if not to: 5300 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5301 if to.this in exp.DataType.TEMPORAL_TYPES: 5302 this = self.expression( 5303 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5304 this=this, 5305 format=exp.Literal.string( 5306 format_time( 5307 fmt_string.this if fmt_string else "", 5308 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5309 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5310 ) 5311 ), 5312 ) 5313 5314 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5315 this.set("zone", fmt.args["zone"]) 5316 return this 5317 elif not to: 5318 self.raise_error("Expected TYPE after CAST") 5319 elif isinstance(to, exp.Identifier): 5320 to = exp.DataType.build(to.name, udt=True) 5321 elif to.this == exp.DataType.Type.CHAR: 5322 if self._match(TokenType.CHARACTER_SET): 5323 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5324 5325 return self.expression( 5326 exp.Cast if strict else exp.TryCast, 5327 this=this, 5328 to=to, 5329 format=fmt, 5330 safe=safe, 5331 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5332 ) 5333 5334 def _parse_string_agg(self) -> exp.Expression: 5335 if self._match(TokenType.DISTINCT): 5336 args: t.List[t.Optional[exp.Expression]] = [ 5337 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5338 ] 5339 if self._match(TokenType.COMMA): 5340 args.extend(self._parse_csv(self._parse_assignment)) 5341 else: 5342 args = self._parse_csv(self._parse_assignment) # type: ignore 5343 5344 index = self._index 5345 if not self._match(TokenType.R_PAREN) and args: 5346 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5347 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5348 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5349 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5350 5351 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5352 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5353 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5354 if not self._match_text_seq("WITHIN", "GROUP"): 5355 self._retreat(index) 5356 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5357 5358 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5359 order = self._parse_order(this=seq_get(args, 0)) 5360 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5361 5362 def _parse_convert( 5363 self, strict: bool, safe: t.Optional[bool] = None 5364 ) -> t.Optional[exp.Expression]: 5365 this = self._parse_bitwise() 5366 5367 if self._match(TokenType.USING): 5368 to: t.Optional[exp.Expression] = self.expression( 5369 exp.CharacterSet, this=self._parse_var() 5370 ) 5371 elif self._match(TokenType.COMMA): 5372 to = self._parse_types() 5373 else: 5374 to = None 5375 5376 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5377 5378 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5379 """ 5380 There are generally two variants of the DECODE function: 5381 5382 - DECODE(bin, charset) 5383 - DECODE(expression, search, result [, search, result] ... [, default]) 5384 5385 The second variant will always be parsed into a CASE expression. Note that NULL 5386 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5387 instead of relying on pattern matching. 5388 """ 5389 args = self._parse_csv(self._parse_assignment) 5390 5391 if len(args) < 3: 5392 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5393 5394 expression, *expressions = args 5395 if not expression: 5396 return None 5397 5398 ifs = [] 5399 for search, result in zip(expressions[::2], expressions[1::2]): 5400 if not search or not result: 5401 return None 5402 5403 if isinstance(search, exp.Literal): 5404 ifs.append( 5405 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5406 ) 5407 elif isinstance(search, exp.Null): 5408 ifs.append( 5409 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5410 ) 5411 else: 5412 cond = exp.or_( 5413 exp.EQ(this=expression.copy(), expression=search), 5414 exp.and_( 5415 exp.Is(this=expression.copy(), expression=exp.Null()), 5416 exp.Is(this=search.copy(), expression=exp.Null()), 5417 copy=False, 5418 ), 5419 copy=False, 5420 ) 5421 ifs.append(exp.If(this=cond, true=result)) 5422 5423 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5424 5425 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5426 self._match_text_seq("KEY") 5427 key = self._parse_column() 5428 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5429 self._match_text_seq("VALUE") 5430 value = self._parse_bitwise() 5431 5432 if not key and not value: 5433 return None 5434 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5435 5436 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5437 if not this or not self._match_text_seq("FORMAT", "JSON"): 5438 return this 5439 5440 return self.expression(exp.FormatJson, this=this) 5441 5442 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5443 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5444 for value in values: 5445 if self._match_text_seq(value, "ON", on): 5446 return f"{value} ON {on}" 5447 5448 return None 5449 5450 @t.overload 5451 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5452 5453 @t.overload 5454 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5455 5456 def _parse_json_object(self, agg=False): 5457 star = self._parse_star() 5458 expressions = ( 5459 [star] 5460 if star 5461 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5462 ) 5463 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5464 5465 unique_keys = None 5466 if self._match_text_seq("WITH", "UNIQUE"): 5467 unique_keys = True 5468 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5469 unique_keys = False 5470 5471 self._match_text_seq("KEYS") 5472 5473 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5474 self._parse_type() 5475 ) 5476 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5477 5478 return self.expression( 5479 exp.JSONObjectAgg if agg else exp.JSONObject, 5480 expressions=expressions, 5481 null_handling=null_handling, 5482 unique_keys=unique_keys, 5483 return_type=return_type, 5484 encoding=encoding, 5485 ) 5486 5487 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5488 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5489 if not self._match_text_seq("NESTED"): 5490 this = self._parse_id_var() 5491 kind = self._parse_types(allow_identifiers=False) 5492 nested = None 5493 else: 5494 this = None 5495 kind = None 5496 nested = True 5497 5498 path = self._match_text_seq("PATH") and self._parse_string() 5499 nested_schema = nested and self._parse_json_schema() 5500 5501 return self.expression( 5502 exp.JSONColumnDef, 5503 this=this, 5504 kind=kind, 5505 path=path, 5506 nested_schema=nested_schema, 5507 ) 5508 5509 def _parse_json_schema(self) -> exp.JSONSchema: 5510 self._match_text_seq("COLUMNS") 5511 return self.expression( 5512 exp.JSONSchema, 5513 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5514 ) 5515 5516 def _parse_json_table(self) -> exp.JSONTable: 5517 this = self._parse_format_json(self._parse_bitwise()) 5518 path = self._match(TokenType.COMMA) and self._parse_string() 5519 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5520 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5521 schema = self._parse_json_schema() 5522 5523 return exp.JSONTable( 5524 this=this, 5525 schema=schema, 5526 path=path, 5527 error_handling=error_handling, 5528 empty_handling=empty_handling, 5529 ) 5530 5531 def _parse_match_against(self) -> exp.MatchAgainst: 5532 expressions = self._parse_csv(self._parse_column) 5533 5534 self._match_text_seq(")", "AGAINST", "(") 5535 5536 this = self._parse_string() 5537 5538 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5539 modifier = "IN NATURAL LANGUAGE MODE" 5540 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5541 modifier = f"{modifier} WITH QUERY EXPANSION" 5542 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5543 modifier = "IN BOOLEAN MODE" 5544 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5545 modifier = "WITH QUERY EXPANSION" 5546 else: 5547 modifier = None 5548 5549 return self.expression( 5550 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5551 ) 5552 5553 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5554 def _parse_open_json(self) -> exp.OpenJSON: 5555 this = self._parse_bitwise() 5556 path = self._match(TokenType.COMMA) and self._parse_string() 5557 5558 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5559 this = self._parse_field(any_token=True) 5560 kind = self._parse_types() 5561 path = self._parse_string() 5562 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5563 5564 return self.expression( 5565 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5566 ) 5567 5568 expressions = None 5569 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5570 self._match_l_paren() 5571 expressions = self._parse_csv(_parse_open_json_column_def) 5572 5573 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5574 5575 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5576 args = self._parse_csv(self._parse_bitwise) 5577 5578 if self._match(TokenType.IN): 5579 return self.expression( 5580 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5581 ) 5582 5583 if haystack_first: 5584 haystack = seq_get(args, 0) 5585 needle = seq_get(args, 1) 5586 else: 5587 needle = seq_get(args, 0) 5588 haystack = seq_get(args, 1) 5589 5590 return self.expression( 5591 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5592 ) 5593 5594 def _parse_predict(self) -> exp.Predict: 5595 self._match_text_seq("MODEL") 5596 this = self._parse_table() 5597 5598 self._match(TokenType.COMMA) 5599 self._match_text_seq("TABLE") 5600 5601 return self.expression( 5602 exp.Predict, 5603 this=this, 5604 expression=self._parse_table(), 5605 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5606 ) 5607 5608 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5609 args = self._parse_csv(self._parse_table) 5610 return exp.JoinHint(this=func_name.upper(), expressions=args) 5611 5612 def _parse_substring(self) -> exp.Substring: 5613 # Postgres supports the form: substring(string [from int] [for int]) 5614 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5615 5616 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5617 5618 if self._match(TokenType.FROM): 5619 args.append(self._parse_bitwise()) 5620 if self._match(TokenType.FOR): 5621 if len(args) == 1: 5622 args.append(exp.Literal.number(1)) 5623 args.append(self._parse_bitwise()) 5624 5625 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5626 5627 def _parse_trim(self) -> exp.Trim: 5628 # https://www.w3resource.com/sql/character-functions/trim.php 5629 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5630 5631 position = None 5632 collation = None 5633 expression = None 5634 5635 if self._match_texts(self.TRIM_TYPES): 5636 position = self._prev.text.upper() 5637 5638 this = self._parse_bitwise() 5639 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5640 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5641 expression = self._parse_bitwise() 5642 5643 if invert_order: 5644 this, expression = expression, this 5645 5646 if self._match(TokenType.COLLATE): 5647 collation = self._parse_bitwise() 5648 5649 return self.expression( 5650 exp.Trim, this=this, position=position, expression=expression, collation=collation 5651 ) 5652 5653 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5654 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5655 5656 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5657 return self._parse_window(self._parse_id_var(), alias=True) 5658 5659 def _parse_respect_or_ignore_nulls( 5660 self, this: t.Optional[exp.Expression] 5661 ) -> t.Optional[exp.Expression]: 5662 if self._match_text_seq("IGNORE", "NULLS"): 5663 return self.expression(exp.IgnoreNulls, this=this) 5664 if self._match_text_seq("RESPECT", "NULLS"): 5665 return self.expression(exp.RespectNulls, this=this) 5666 return this 5667 5668 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5669 if self._match(TokenType.HAVING): 5670 self._match_texts(("MAX", "MIN")) 5671 max = self._prev.text.upper() != "MIN" 5672 return self.expression( 5673 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5674 ) 5675 5676 return this 5677 5678 def _parse_window( 5679 self, this: t.Optional[exp.Expression], alias: bool = False 5680 ) -> t.Optional[exp.Expression]: 5681 func = this 5682 comments = func.comments if isinstance(func, exp.Expression) else None 5683 5684 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5685 self._match(TokenType.WHERE) 5686 this = self.expression( 5687 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5688 ) 5689 self._match_r_paren() 5690 5691 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5692 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5693 if self._match_text_seq("WITHIN", "GROUP"): 5694 order = self._parse_wrapped(self._parse_order) 5695 this = self.expression(exp.WithinGroup, this=this, expression=order) 5696 5697 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5698 # Some dialects choose to implement and some do not. 5699 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5700 5701 # There is some code above in _parse_lambda that handles 5702 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5703 5704 # The below changes handle 5705 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5706 5707 # Oracle allows both formats 5708 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5709 # and Snowflake chose to do the same for familiarity 5710 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5711 if isinstance(this, exp.AggFunc): 5712 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5713 5714 if ignore_respect and ignore_respect is not this: 5715 ignore_respect.replace(ignore_respect.this) 5716 this = self.expression(ignore_respect.__class__, this=this) 5717 5718 this = self._parse_respect_or_ignore_nulls(this) 5719 5720 # bigquery select from window x AS (partition by ...) 5721 if alias: 5722 over = None 5723 self._match(TokenType.ALIAS) 5724 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5725 return this 5726 else: 5727 over = self._prev.text.upper() 5728 5729 if comments and isinstance(func, exp.Expression): 5730 func.pop_comments() 5731 5732 if not self._match(TokenType.L_PAREN): 5733 return self.expression( 5734 exp.Window, 5735 comments=comments, 5736 this=this, 5737 alias=self._parse_id_var(False), 5738 over=over, 5739 ) 5740 5741 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5742 5743 first = self._match(TokenType.FIRST) 5744 if self._match_text_seq("LAST"): 5745 first = False 5746 5747 partition, order = self._parse_partition_and_order() 5748 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5749 5750 if kind: 5751 self._match(TokenType.BETWEEN) 5752 start = self._parse_window_spec() 5753 self._match(TokenType.AND) 5754 end = self._parse_window_spec() 5755 5756 spec = self.expression( 5757 exp.WindowSpec, 5758 kind=kind, 5759 start=start["value"], 5760 start_side=start["side"], 5761 end=end["value"], 5762 end_side=end["side"], 5763 ) 5764 else: 5765 spec = None 5766 5767 self._match_r_paren() 5768 5769 window = self.expression( 5770 exp.Window, 5771 comments=comments, 5772 this=this, 5773 partition_by=partition, 5774 order=order, 5775 spec=spec, 5776 alias=window_alias, 5777 over=over, 5778 first=first, 5779 ) 5780 5781 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5782 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5783 return self._parse_window(window, alias=alias) 5784 5785 return window 5786 5787 def _parse_partition_and_order( 5788 self, 5789 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5790 return self._parse_partition_by(), self._parse_order() 5791 5792 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5793 self._match(TokenType.BETWEEN) 5794 5795 return { 5796 "value": ( 5797 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5798 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5799 or self._parse_bitwise() 5800 ), 5801 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5802 } 5803 5804 def _parse_alias( 5805 self, this: t.Optional[exp.Expression], explicit: bool = False 5806 ) -> t.Optional[exp.Expression]: 5807 any_token = self._match(TokenType.ALIAS) 5808 comments = self._prev_comments or [] 5809 5810 if explicit and not any_token: 5811 return this 5812 5813 if self._match(TokenType.L_PAREN): 5814 aliases = self.expression( 5815 exp.Aliases, 5816 comments=comments, 5817 this=this, 5818 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5819 ) 5820 self._match_r_paren(aliases) 5821 return aliases 5822 5823 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5824 self.STRING_ALIASES and self._parse_string_as_identifier() 5825 ) 5826 5827 if alias: 5828 comments.extend(alias.pop_comments()) 5829 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5830 column = this.this 5831 5832 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5833 if not this.comments and column and column.comments: 5834 this.comments = column.pop_comments() 5835 5836 return this 5837 5838 def _parse_id_var( 5839 self, 5840 any_token: bool = True, 5841 tokens: t.Optional[t.Collection[TokenType]] = None, 5842 ) -> t.Optional[exp.Expression]: 5843 expression = self._parse_identifier() 5844 if not expression and ( 5845 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5846 ): 5847 quoted = self._prev.token_type == TokenType.STRING 5848 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5849 5850 return expression 5851 5852 def _parse_string(self) -> t.Optional[exp.Expression]: 5853 if self._match_set(self.STRING_PARSERS): 5854 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5855 return self._parse_placeholder() 5856 5857 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5858 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5859 5860 def _parse_number(self) -> t.Optional[exp.Expression]: 5861 if self._match_set(self.NUMERIC_PARSERS): 5862 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5863 return self._parse_placeholder() 5864 5865 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5866 if self._match(TokenType.IDENTIFIER): 5867 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5868 return self._parse_placeholder() 5869 5870 def _parse_var( 5871 self, 5872 any_token: bool = False, 5873 tokens: t.Optional[t.Collection[TokenType]] = None, 5874 upper: bool = False, 5875 ) -> t.Optional[exp.Expression]: 5876 if ( 5877 (any_token and self._advance_any()) 5878 or self._match(TokenType.VAR) 5879 or (self._match_set(tokens) if tokens else False) 5880 ): 5881 return self.expression( 5882 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5883 ) 5884 return self._parse_placeholder() 5885 5886 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5887 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5888 self._advance() 5889 return self._prev 5890 return None 5891 5892 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5893 return self._parse_var() or self._parse_string() 5894 5895 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5896 return self._parse_primary() or self._parse_var(any_token=True) 5897 5898 def _parse_null(self) -> t.Optional[exp.Expression]: 5899 if self._match_set(self.NULL_TOKENS): 5900 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5901 return self._parse_placeholder() 5902 5903 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5904 if self._match(TokenType.TRUE): 5905 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5906 if self._match(TokenType.FALSE): 5907 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5908 return self._parse_placeholder() 5909 5910 def _parse_star(self) -> t.Optional[exp.Expression]: 5911 if self._match(TokenType.STAR): 5912 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5913 return self._parse_placeholder() 5914 5915 def _parse_parameter(self) -> exp.Parameter: 5916 this = self._parse_identifier() or self._parse_primary_or_var() 5917 return self.expression(exp.Parameter, this=this) 5918 5919 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5920 if self._match_set(self.PLACEHOLDER_PARSERS): 5921 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5922 if placeholder: 5923 return placeholder 5924 self._advance(-1) 5925 return None 5926 5927 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5928 if not self._match_texts(keywords): 5929 return None 5930 if self._match(TokenType.L_PAREN, advance=False): 5931 return self._parse_wrapped_csv(self._parse_expression) 5932 5933 expression = self._parse_expression() 5934 return [expression] if expression else None 5935 5936 def _parse_csv( 5937 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5938 ) -> t.List[exp.Expression]: 5939 parse_result = parse_method() 5940 items = [parse_result] if parse_result is not None else [] 5941 5942 while self._match(sep): 5943 self._add_comments(parse_result) 5944 parse_result = parse_method() 5945 if parse_result is not None: 5946 items.append(parse_result) 5947 5948 return items 5949 5950 def _parse_tokens( 5951 self, parse_method: t.Callable, expressions: t.Dict 5952 ) -> t.Optional[exp.Expression]: 5953 this = parse_method() 5954 5955 while self._match_set(expressions): 5956 this = self.expression( 5957 expressions[self._prev.token_type], 5958 this=this, 5959 comments=self._prev_comments, 5960 expression=parse_method(), 5961 ) 5962 5963 return this 5964 5965 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5966 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5967 5968 def _parse_wrapped_csv( 5969 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5970 ) -> t.List[exp.Expression]: 5971 return self._parse_wrapped( 5972 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5973 ) 5974 5975 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5976 wrapped = self._match(TokenType.L_PAREN) 5977 if not wrapped and not optional: 5978 self.raise_error("Expecting (") 5979 parse_result = parse_method() 5980 if wrapped: 5981 self._match_r_paren() 5982 return parse_result 5983 5984 def _parse_expressions(self) -> t.List[exp.Expression]: 5985 return self._parse_csv(self._parse_expression) 5986 5987 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5988 return self._parse_select() or self._parse_set_operations( 5989 self._parse_expression() if alias else self._parse_assignment() 5990 ) 5991 5992 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5993 return self._parse_query_modifiers( 5994 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5995 ) 5996 5997 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5998 this = None 5999 if self._match_texts(self.TRANSACTION_KIND): 6000 this = self._prev.text 6001 6002 self._match_texts(("TRANSACTION", "WORK")) 6003 6004 modes = [] 6005 while True: 6006 mode = [] 6007 while self._match(TokenType.VAR): 6008 mode.append(self._prev.text) 6009 6010 if mode: 6011 modes.append(" ".join(mode)) 6012 if not self._match(TokenType.COMMA): 6013 break 6014 6015 return self.expression(exp.Transaction, this=this, modes=modes) 6016 6017 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6018 chain = None 6019 savepoint = None 6020 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6021 6022 self._match_texts(("TRANSACTION", "WORK")) 6023 6024 if self._match_text_seq("TO"): 6025 self._match_text_seq("SAVEPOINT") 6026 savepoint = self._parse_id_var() 6027 6028 if self._match(TokenType.AND): 6029 chain = not self._match_text_seq("NO") 6030 self._match_text_seq("CHAIN") 6031 6032 if is_rollback: 6033 return self.expression(exp.Rollback, savepoint=savepoint) 6034 6035 return self.expression(exp.Commit, chain=chain) 6036 6037 def _parse_refresh(self) -> exp.Refresh: 6038 self._match(TokenType.TABLE) 6039 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6040 6041 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6042 if not self._match_text_seq("ADD"): 6043 return None 6044 6045 self._match(TokenType.COLUMN) 6046 exists_column = self._parse_exists(not_=True) 6047 expression = self._parse_field_def() 6048 6049 if expression: 6050 expression.set("exists", exists_column) 6051 6052 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6053 if self._match_texts(("FIRST", "AFTER")): 6054 position = self._prev.text 6055 column_position = self.expression( 6056 exp.ColumnPosition, this=self._parse_column(), position=position 6057 ) 6058 expression.set("position", column_position) 6059 6060 return expression 6061 6062 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6063 drop = self._match(TokenType.DROP) and self._parse_drop() 6064 if drop and not isinstance(drop, exp.Command): 6065 drop.set("kind", drop.args.get("kind", "COLUMN")) 6066 return drop 6067 6068 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6069 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6070 return self.expression( 6071 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6072 ) 6073 6074 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6075 index = self._index - 1 6076 6077 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6078 return self._parse_csv( 6079 lambda: self.expression( 6080 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6081 ) 6082 ) 6083 6084 self._retreat(index) 6085 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6086 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6087 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6088 6089 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6090 if self._match_texts(self.ALTER_ALTER_PARSERS): 6091 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6092 6093 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6094 # keyword after ALTER we default to parsing this statement 6095 self._match(TokenType.COLUMN) 6096 column = self._parse_field(any_token=True) 6097 6098 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6099 return self.expression(exp.AlterColumn, this=column, drop=True) 6100 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6101 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6102 if self._match(TokenType.COMMENT): 6103 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6104 if self._match_text_seq("DROP", "NOT", "NULL"): 6105 return self.expression( 6106 exp.AlterColumn, 6107 this=column, 6108 drop=True, 6109 allow_null=True, 6110 ) 6111 if self._match_text_seq("SET", "NOT", "NULL"): 6112 return self.expression( 6113 exp.AlterColumn, 6114 this=column, 6115 allow_null=False, 6116 ) 6117 self._match_text_seq("SET", "DATA") 6118 self._match_text_seq("TYPE") 6119 return self.expression( 6120 exp.AlterColumn, 6121 this=column, 6122 dtype=self._parse_types(), 6123 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6124 using=self._match(TokenType.USING) and self._parse_assignment(), 6125 ) 6126 6127 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6128 if self._match_texts(("ALL", "EVEN", "AUTO")): 6129 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6130 6131 self._match_text_seq("KEY", "DISTKEY") 6132 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6133 6134 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6135 if compound: 6136 self._match_text_seq("SORTKEY") 6137 6138 if self._match(TokenType.L_PAREN, advance=False): 6139 return self.expression( 6140 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6141 ) 6142 6143 self._match_texts(("AUTO", "NONE")) 6144 return self.expression( 6145 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6146 ) 6147 6148 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6149 index = self._index - 1 6150 6151 partition_exists = self._parse_exists() 6152 if self._match(TokenType.PARTITION, advance=False): 6153 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6154 6155 self._retreat(index) 6156 return self._parse_csv(self._parse_drop_column) 6157 6158 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6159 if self._match(TokenType.COLUMN): 6160 exists = self._parse_exists() 6161 old_column = self._parse_column() 6162 to = self._match_text_seq("TO") 6163 new_column = self._parse_column() 6164 6165 if old_column is None or to is None or new_column is None: 6166 return None 6167 6168 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6169 6170 self._match_text_seq("TO") 6171 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6172 6173 def _parse_alter_table_set(self) -> exp.AlterSet: 6174 alter_set = self.expression(exp.AlterSet) 6175 6176 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6177 "TABLE", "PROPERTIES" 6178 ): 6179 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6180 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6181 alter_set.set("expressions", [self._parse_assignment()]) 6182 elif self._match_texts(("LOGGED", "UNLOGGED")): 6183 alter_set.set("option", exp.var(self._prev.text.upper())) 6184 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6185 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6186 elif self._match_text_seq("LOCATION"): 6187 alter_set.set("location", self._parse_field()) 6188 elif self._match_text_seq("ACCESS", "METHOD"): 6189 alter_set.set("access_method", self._parse_field()) 6190 elif self._match_text_seq("TABLESPACE"): 6191 alter_set.set("tablespace", self._parse_field()) 6192 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6193 alter_set.set("file_format", [self._parse_field()]) 6194 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6195 alter_set.set("file_format", self._parse_wrapped_options()) 6196 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6197 alter_set.set("copy_options", self._parse_wrapped_options()) 6198 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6199 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6200 else: 6201 if self._match_text_seq("SERDE"): 6202 alter_set.set("serde", self._parse_field()) 6203 6204 alter_set.set("expressions", [self._parse_properties()]) 6205 6206 return alter_set 6207 6208 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6209 start = self._prev 6210 6211 if not self._match(TokenType.TABLE): 6212 return self._parse_as_command(start) 6213 6214 exists = self._parse_exists() 6215 only = self._match_text_seq("ONLY") 6216 this = self._parse_table(schema=True) 6217 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6218 6219 if self._next: 6220 self._advance() 6221 6222 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6223 if parser: 6224 actions = ensure_list(parser(self)) 6225 options = self._parse_csv(self._parse_property) 6226 6227 if not self._curr and actions: 6228 return self.expression( 6229 exp.AlterTable, 6230 this=this, 6231 exists=exists, 6232 actions=actions, 6233 only=only, 6234 options=options, 6235 cluster=cluster, 6236 ) 6237 6238 return self._parse_as_command(start) 6239 6240 def _parse_merge(self) -> exp.Merge: 6241 self._match(TokenType.INTO) 6242 target = self._parse_table() 6243 6244 if target and self._match(TokenType.ALIAS, advance=False): 6245 target.set("alias", self._parse_table_alias()) 6246 6247 self._match(TokenType.USING) 6248 using = self._parse_table() 6249 6250 self._match(TokenType.ON) 6251 on = self._parse_assignment() 6252 6253 return self.expression( 6254 exp.Merge, 6255 this=target, 6256 using=using, 6257 on=on, 6258 expressions=self._parse_when_matched(), 6259 ) 6260 6261 def _parse_when_matched(self) -> t.List[exp.When]: 6262 whens = [] 6263 6264 while self._match(TokenType.WHEN): 6265 matched = not self._match(TokenType.NOT) 6266 self._match_text_seq("MATCHED") 6267 source = ( 6268 False 6269 if self._match_text_seq("BY", "TARGET") 6270 else self._match_text_seq("BY", "SOURCE") 6271 ) 6272 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6273 6274 self._match(TokenType.THEN) 6275 6276 if self._match(TokenType.INSERT): 6277 _this = self._parse_star() 6278 if _this: 6279 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6280 else: 6281 then = self.expression( 6282 exp.Insert, 6283 this=self._parse_value(), 6284 expression=self._match_text_seq("VALUES") and self._parse_value(), 6285 ) 6286 elif self._match(TokenType.UPDATE): 6287 expressions = self._parse_star() 6288 if expressions: 6289 then = self.expression(exp.Update, expressions=expressions) 6290 else: 6291 then = self.expression( 6292 exp.Update, 6293 expressions=self._match(TokenType.SET) 6294 and self._parse_csv(self._parse_equality), 6295 ) 6296 elif self._match(TokenType.DELETE): 6297 then = self.expression(exp.Var, this=self._prev.text) 6298 else: 6299 then = None 6300 6301 whens.append( 6302 self.expression( 6303 exp.When, 6304 matched=matched, 6305 source=source, 6306 condition=condition, 6307 then=then, 6308 ) 6309 ) 6310 return whens 6311 6312 def _parse_show(self) -> t.Optional[exp.Expression]: 6313 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6314 if parser: 6315 return parser(self) 6316 return self._parse_as_command(self._prev) 6317 6318 def _parse_set_item_assignment( 6319 self, kind: t.Optional[str] = None 6320 ) -> t.Optional[exp.Expression]: 6321 index = self._index 6322 6323 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6324 return self._parse_set_transaction(global_=kind == "GLOBAL") 6325 6326 left = self._parse_primary() or self._parse_column() 6327 assignment_delimiter = self._match_texts(("=", "TO")) 6328 6329 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6330 self._retreat(index) 6331 return None 6332 6333 right = self._parse_statement() or self._parse_id_var() 6334 if isinstance(right, (exp.Column, exp.Identifier)): 6335 right = exp.var(right.name) 6336 6337 this = self.expression(exp.EQ, this=left, expression=right) 6338 return self.expression(exp.SetItem, this=this, kind=kind) 6339 6340 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6341 self._match_text_seq("TRANSACTION") 6342 characteristics = self._parse_csv( 6343 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6344 ) 6345 return self.expression( 6346 exp.SetItem, 6347 expressions=characteristics, 6348 kind="TRANSACTION", 6349 **{"global": global_}, # type: ignore 6350 ) 6351 6352 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6353 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6354 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6355 6356 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6357 index = self._index 6358 set_ = self.expression( 6359 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6360 ) 6361 6362 if self._curr: 6363 self._retreat(index) 6364 return self._parse_as_command(self._prev) 6365 6366 return set_ 6367 6368 def _parse_var_from_options( 6369 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6370 ) -> t.Optional[exp.Var]: 6371 start = self._curr 6372 if not start: 6373 return None 6374 6375 option = start.text.upper() 6376 continuations = options.get(option) 6377 6378 index = self._index 6379 self._advance() 6380 for keywords in continuations or []: 6381 if isinstance(keywords, str): 6382 keywords = (keywords,) 6383 6384 if self._match_text_seq(*keywords): 6385 option = f"{option} {' '.join(keywords)}" 6386 break 6387 else: 6388 if continuations or continuations is None: 6389 if raise_unmatched: 6390 self.raise_error(f"Unknown option {option}") 6391 6392 self._retreat(index) 6393 return None 6394 6395 return exp.var(option) 6396 6397 def _parse_as_command(self, start: Token) -> exp.Command: 6398 while self._curr: 6399 self._advance() 6400 text = self._find_sql(start, self._prev) 6401 size = len(start.text) 6402 self._warn_unsupported() 6403 return exp.Command(this=text[:size], expression=text[size:]) 6404 6405 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6406 settings = [] 6407 6408 self._match_l_paren() 6409 kind = self._parse_id_var() 6410 6411 if self._match(TokenType.L_PAREN): 6412 while True: 6413 key = self._parse_id_var() 6414 value = self._parse_primary() 6415 6416 if not key and value is None: 6417 break 6418 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6419 self._match(TokenType.R_PAREN) 6420 6421 self._match_r_paren() 6422 6423 return self.expression( 6424 exp.DictProperty, 6425 this=this, 6426 kind=kind.this if kind else None, 6427 settings=settings, 6428 ) 6429 6430 def _parse_dict_range(self, this: str) -> exp.DictRange: 6431 self._match_l_paren() 6432 has_min = self._match_text_seq("MIN") 6433 if has_min: 6434 min = self._parse_var() or self._parse_primary() 6435 self._match_text_seq("MAX") 6436 max = self._parse_var() or self._parse_primary() 6437 else: 6438 max = self._parse_var() or self._parse_primary() 6439 min = exp.Literal.number(0) 6440 self._match_r_paren() 6441 return self.expression(exp.DictRange, this=this, min=min, max=max) 6442 6443 def _parse_comprehension( 6444 self, this: t.Optional[exp.Expression] 6445 ) -> t.Optional[exp.Comprehension]: 6446 index = self._index 6447 expression = self._parse_column() 6448 if not self._match(TokenType.IN): 6449 self._retreat(index - 1) 6450 return None 6451 iterator = self._parse_column() 6452 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6453 return self.expression( 6454 exp.Comprehension, 6455 this=this, 6456 expression=expression, 6457 iterator=iterator, 6458 condition=condition, 6459 ) 6460 6461 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6462 if self._match(TokenType.HEREDOC_STRING): 6463 return self.expression(exp.Heredoc, this=self._prev.text) 6464 6465 if not self._match_text_seq("$"): 6466 return None 6467 6468 tags = ["$"] 6469 tag_text = None 6470 6471 if self._is_connected(): 6472 self._advance() 6473 tags.append(self._prev.text.upper()) 6474 else: 6475 self.raise_error("No closing $ found") 6476 6477 if tags[-1] != "$": 6478 if self._is_connected() and self._match_text_seq("$"): 6479 tag_text = tags[-1] 6480 tags.append("$") 6481 else: 6482 self.raise_error("No closing $ found") 6483 6484 heredoc_start = self._curr 6485 6486 while self._curr: 6487 if self._match_text_seq(*tags, advance=False): 6488 this = self._find_sql(heredoc_start, self._prev) 6489 self._advance(len(tags)) 6490 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6491 6492 self._advance() 6493 6494 self.raise_error(f"No closing {''.join(tags)} found") 6495 return None 6496 6497 def _find_parser( 6498 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6499 ) -> t.Optional[t.Callable]: 6500 if not self._curr: 6501 return None 6502 6503 index = self._index 6504 this = [] 6505 while True: 6506 # The current token might be multiple words 6507 curr = self._curr.text.upper() 6508 key = curr.split(" ") 6509 this.append(curr) 6510 6511 self._advance() 6512 result, trie = in_trie(trie, key) 6513 if result == TrieResult.FAILED: 6514 break 6515 6516 if result == TrieResult.EXISTS: 6517 subparser = parsers[" ".join(this)] 6518 return subparser 6519 6520 self._retreat(index) 6521 return None 6522 6523 def _match(self, token_type, advance=True, expression=None): 6524 if not self._curr: 6525 return None 6526 6527 if self._curr.token_type == token_type: 6528 if advance: 6529 self._advance() 6530 self._add_comments(expression) 6531 return True 6532 6533 return None 6534 6535 def _match_set(self, types, advance=True): 6536 if not self._curr: 6537 return None 6538 6539 if self._curr.token_type in types: 6540 if advance: 6541 self._advance() 6542 return True 6543 6544 return None 6545 6546 def _match_pair(self, token_type_a, token_type_b, advance=True): 6547 if not self._curr or not self._next: 6548 return None 6549 6550 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6551 if advance: 6552 self._advance(2) 6553 return True 6554 6555 return None 6556 6557 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6558 if not self._match(TokenType.L_PAREN, expression=expression): 6559 self.raise_error("Expecting (") 6560 6561 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6562 if not self._match(TokenType.R_PAREN, expression=expression): 6563 self.raise_error("Expecting )") 6564 6565 def _match_texts(self, texts, advance=True): 6566 if self._curr and self._curr.text.upper() in texts: 6567 if advance: 6568 self._advance() 6569 return True 6570 return None 6571 6572 def _match_text_seq(self, *texts, advance=True): 6573 index = self._index 6574 for text in texts: 6575 if self._curr and self._curr.text.upper() == text: 6576 self._advance() 6577 else: 6578 self._retreat(index) 6579 return None 6580 6581 if not advance: 6582 self._retreat(index) 6583 6584 return True 6585 6586 def _replace_lambda( 6587 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6588 ) -> t.Optional[exp.Expression]: 6589 if not node: 6590 return node 6591 6592 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6593 6594 for column in node.find_all(exp.Column): 6595 typ = lambda_types.get(column.parts[0].name) 6596 if typ is not None: 6597 dot_or_id = column.to_dot() if column.table else column.this 6598 6599 if typ: 6600 dot_or_id = self.expression( 6601 exp.Cast, 6602 this=dot_or_id, 6603 to=typ, 6604 ) 6605 6606 parent = column.parent 6607 6608 while isinstance(parent, exp.Dot): 6609 if not isinstance(parent.parent, exp.Dot): 6610 parent.replace(dot_or_id) 6611 break 6612 parent = parent.parent 6613 else: 6614 if column is node: 6615 node = dot_or_id 6616 else: 6617 column.replace(dot_or_id) 6618 return node 6619 6620 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6621 start = self._prev 6622 6623 # Not to be confused with TRUNCATE(number, decimals) function call 6624 if self._match(TokenType.L_PAREN): 6625 self._retreat(self._index - 2) 6626 return self._parse_function() 6627 6628 # Clickhouse supports TRUNCATE DATABASE as well 6629 is_database = self._match(TokenType.DATABASE) 6630 6631 self._match(TokenType.TABLE) 6632 6633 exists = self._parse_exists(not_=False) 6634 6635 expressions = self._parse_csv( 6636 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6637 ) 6638 6639 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6640 6641 if self._match_text_seq("RESTART", "IDENTITY"): 6642 identity = "RESTART" 6643 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6644 identity = "CONTINUE" 6645 else: 6646 identity = None 6647 6648 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6649 option = self._prev.text 6650 else: 6651 option = None 6652 6653 partition = self._parse_partition() 6654 6655 # Fallback case 6656 if self._curr: 6657 return self._parse_as_command(start) 6658 6659 return self.expression( 6660 exp.TruncateTable, 6661 expressions=expressions, 6662 is_database=is_database, 6663 exists=exists, 6664 cluster=cluster, 6665 identity=identity, 6666 option=option, 6667 partition=partition, 6668 ) 6669 6670 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6671 this = self._parse_ordered(self._parse_opclass) 6672 6673 if not self._match(TokenType.WITH): 6674 return this 6675 6676 op = self._parse_var(any_token=True) 6677 6678 return self.expression(exp.WithOperator, this=this, op=op) 6679 6680 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6681 self._match(TokenType.EQ) 6682 self._match(TokenType.L_PAREN) 6683 6684 opts: t.List[t.Optional[exp.Expression]] = [] 6685 while self._curr and not self._match(TokenType.R_PAREN): 6686 if self._match_text_seq("FORMAT_NAME", "="): 6687 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6688 # so we parse it separately to use _parse_field() 6689 prop = self.expression( 6690 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6691 ) 6692 opts.append(prop) 6693 else: 6694 opts.append(self._parse_property()) 6695 6696 self._match(TokenType.COMMA) 6697 6698 return opts 6699 6700 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6701 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6702 6703 options = [] 6704 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6705 option = self._parse_var(any_token=True) 6706 prev = self._prev.text.upper() 6707 6708 # Different dialects might separate options and values by white space, "=" and "AS" 6709 self._match(TokenType.EQ) 6710 self._match(TokenType.ALIAS) 6711 6712 param = self.expression(exp.CopyParameter, this=option) 6713 6714 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6715 TokenType.L_PAREN, advance=False 6716 ): 6717 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6718 param.set("expressions", self._parse_wrapped_options()) 6719 elif prev == "FILE_FORMAT": 6720 # T-SQL's external file format case 6721 param.set("expression", self._parse_field()) 6722 else: 6723 param.set("expression", self._parse_unquoted_field()) 6724 6725 options.append(param) 6726 self._match(sep) 6727 6728 return options 6729 6730 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6731 expr = self.expression(exp.Credentials) 6732 6733 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6734 expr.set("storage", self._parse_field()) 6735 if self._match_text_seq("CREDENTIALS"): 6736 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6737 creds = ( 6738 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6739 ) 6740 expr.set("credentials", creds) 6741 if self._match_text_seq("ENCRYPTION"): 6742 expr.set("encryption", self._parse_wrapped_options()) 6743 if self._match_text_seq("IAM_ROLE"): 6744 expr.set("iam_role", self._parse_field()) 6745 if self._match_text_seq("REGION"): 6746 expr.set("region", self._parse_field()) 6747 6748 return expr 6749 6750 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6751 return self._parse_field() 6752 6753 def _parse_copy(self) -> exp.Copy | exp.Command: 6754 start = self._prev 6755 6756 self._match(TokenType.INTO) 6757 6758 this = ( 6759 self._parse_select(nested=True, parse_subquery_alias=False) 6760 if self._match(TokenType.L_PAREN, advance=False) 6761 else self._parse_table(schema=True) 6762 ) 6763 6764 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6765 6766 files = self._parse_csv(self._parse_file_location) 6767 credentials = self._parse_credentials() 6768 6769 self._match_text_seq("WITH") 6770 6771 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6772 6773 # Fallback case 6774 if self._curr: 6775 return self._parse_as_command(start) 6776 6777 return self.expression( 6778 exp.Copy, 6779 this=this, 6780 kind=kind, 6781 credentials=credentials, 6782 files=files, 6783 params=params, 6784 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1218 def __init__( 1219 self, 1220 error_level: t.Optional[ErrorLevel] = None, 1221 error_message_context: int = 100, 1222 max_errors: int = 3, 1223 dialect: DialectType = None, 1224 ): 1225 from sqlglot.dialects import Dialect 1226 1227 self.error_level = error_level or ErrorLevel.IMMEDIATE 1228 self.error_message_context = error_message_context 1229 self.max_errors = max_errors 1230 self.dialect = Dialect.get_or_raise(dialect) 1231 self.reset()
1243 def parse( 1244 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1245 ) -> t.List[t.Optional[exp.Expression]]: 1246 """ 1247 Parses a list of tokens and returns a list of syntax trees, one tree 1248 per parsed SQL statement. 1249 1250 Args: 1251 raw_tokens: The list of tokens. 1252 sql: The original SQL string, used to produce helpful debug messages. 1253 1254 Returns: 1255 The list of the produced syntax trees. 1256 """ 1257 return self._parse( 1258 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1259 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1261 def parse_into( 1262 self, 1263 expression_types: exp.IntoType, 1264 raw_tokens: t.List[Token], 1265 sql: t.Optional[str] = None, 1266 ) -> t.List[t.Optional[exp.Expression]]: 1267 """ 1268 Parses a list of tokens into a given Expression type. If a collection of Expression 1269 types is given instead, this method will try to parse the token list into each one 1270 of them, stopping at the first for which the parsing succeeds. 1271 1272 Args: 1273 expression_types: The expression type(s) to try and parse the token list into. 1274 raw_tokens: The list of tokens. 1275 sql: The original SQL string, used to produce helpful debug messages. 1276 1277 Returns: 1278 The target Expression. 1279 """ 1280 errors = [] 1281 for expression_type in ensure_list(expression_types): 1282 parser = self.EXPRESSION_PARSERS.get(expression_type) 1283 if not parser: 1284 raise TypeError(f"No parser registered for {expression_type}") 1285 1286 try: 1287 return self._parse(parser, raw_tokens, sql) 1288 except ParseError as e: 1289 e.errors[0]["into_expression"] = expression_type 1290 errors.append(e) 1291 1292 raise ParseError( 1293 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1294 errors=merge_errors(errors), 1295 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1335 def check_errors(self) -> None: 1336 """Logs or raises any found errors, depending on the chosen error level setting.""" 1337 if self.error_level == ErrorLevel.WARN: 1338 for error in self.errors: 1339 logger.error(str(error)) 1340 elif self.error_level == ErrorLevel.RAISE and self.errors: 1341 raise ParseError( 1342 concat_messages(self.errors, self.max_errors), 1343 errors=merge_errors(self.errors), 1344 )
Logs or raises any found errors, depending on the chosen error level setting.
1346 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1347 """ 1348 Appends an error in the list of recorded errors or raises it, depending on the chosen 1349 error level setting. 1350 """ 1351 token = token or self._curr or self._prev or Token.string("") 1352 start = token.start 1353 end = token.end + 1 1354 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1355 highlight = self.sql[start:end] 1356 end_context = self.sql[end : end + self.error_message_context] 1357 1358 error = ParseError.new( 1359 f"{message}. Line {token.line}, Col: {token.col}.\n" 1360 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1361 description=message, 1362 line=token.line, 1363 col=token.col, 1364 start_context=start_context, 1365 highlight=highlight, 1366 end_context=end_context, 1367 ) 1368 1369 if self.error_level == ErrorLevel.IMMEDIATE: 1370 raise error 1371 1372 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1374 def expression( 1375 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1376 ) -> E: 1377 """ 1378 Creates a new, validated Expression. 1379 1380 Args: 1381 exp_class: The expression class to instantiate. 1382 comments: An optional list of comments to attach to the expression. 1383 kwargs: The arguments to set for the expression along with their respective values. 1384 1385 Returns: 1386 The target expression. 1387 """ 1388 instance = exp_class(**kwargs) 1389 instance.add_comments(comments) if comments else self._add_comments(instance) 1390 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1397 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1398 """ 1399 Validates an Expression, making sure that all its mandatory arguments are set. 1400 1401 Args: 1402 expression: The expression to validate. 1403 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1404 1405 Returns: 1406 The validated expression. 1407 """ 1408 if self.error_level != ErrorLevel.IGNORE: 1409 for error_message in expression.error_messages(args): 1410 self.raise_error(error_message) 1411 1412 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.