sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 142 trunc = date_trunc_to_time(args) 143 trunc.set("unit", map_date_part(trunc.args["unit"])) 144 return trunc 145 146 147def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 148 """ 149 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 150 so we need to unqualify them. 151 152 Example: 153 >>> from sqlglot import parse_one 154 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 155 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 156 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 157 """ 158 if isinstance(expression, exp.Pivot) and expression.unpivot: 159 expression = transforms.unqualify_columns(expression) 160 161 return expression 162 163 164def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 165 assert isinstance(expression, exp.Create) 166 167 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 168 if expression.this in exp.DataType.NESTED_TYPES: 169 expression.set("expressions", None) 170 return expression 171 172 props = expression.args.get("properties") 173 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 174 for schema_expression in expression.this.expressions: 175 if isinstance(schema_expression, exp.ColumnDef): 176 column_type = schema_expression.kind 177 if isinstance(column_type, exp.DataType): 178 column_type.transform(_flatten_structured_type, copy=False) 179 180 return expression 181 182 183def _unnest_generate_date_array(expression: exp.Expression) -> exp.Expression: 184 if isinstance(expression, exp.Select): 185 for unnest in expression.find_all(exp.Unnest): 186 if ( 187 isinstance(unnest.parent, (exp.From, exp.Join)) 188 and len(unnest.expressions) == 1 189 and isinstance(unnest.expressions[0], exp.GenerateDateArray) 190 ): 191 generate_date_array = unnest.expressions[0] 192 start = generate_date_array.args.get("start") 193 end = generate_date_array.args.get("end") 194 step = generate_date_array.args.get("step") 195 196 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 197 continue 198 199 unit = step.args.get("unit") 200 201 unnest_alias = unnest.args.get("alias") 202 if unnest_alias: 203 unnest_alias = unnest_alias.copy() 204 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 205 else: 206 sequence_value_name = "value" 207 208 # We'll add the next sequence value to the starting date and project the result 209 date_add = _build_date_time_add(exp.DateAdd)( 210 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 211 ).as_(sequence_value_name) 212 213 # We use DATEDIFF to compute the number of sequence values needed 214 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 215 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 216 ) 217 218 unnest.set("expressions", [number_sequence]) 219 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 220 221 return expression 222 223 224class Snowflake(Dialect): 225 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 226 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 227 NULL_ORDERING = "nulls_are_large" 228 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 229 SUPPORTS_USER_DEFINED_TYPES = False 230 SUPPORTS_SEMI_ANTI_JOIN = False 231 PREFER_CTE_ALIAS_COLUMN = True 232 TABLESAMPLE_SIZE_IS_PERCENT = True 233 COPY_PARAMS_ARE_CSV = False 234 235 TIME_MAPPING = { 236 "YYYY": "%Y", 237 "yyyy": "%Y", 238 "YY": "%y", 239 "yy": "%y", 240 "MMMM": "%B", 241 "mmmm": "%B", 242 "MON": "%b", 243 "mon": "%b", 244 "MM": "%m", 245 "mm": "%m", 246 "DD": "%d", 247 "dd": "%-d", 248 "DY": "%a", 249 "dy": "%w", 250 "HH24": "%H", 251 "hh24": "%H", 252 "HH12": "%I", 253 "hh12": "%I", 254 "MI": "%M", 255 "mi": "%M", 256 "SS": "%S", 257 "ss": "%S", 258 "FF": "%f", 259 "ff": "%f", 260 "FF6": "%f", 261 "ff6": "%f", 262 } 263 264 def quote_identifier(self, expression: E, identify: bool = True) -> E: 265 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 266 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 267 if ( 268 isinstance(expression, exp.Identifier) 269 and isinstance(expression.parent, exp.Table) 270 and expression.name.lower() == "dual" 271 ): 272 return expression # type: ignore 273 274 return super().quote_identifier(expression, identify=identify) 275 276 class Parser(parser.Parser): 277 IDENTIFY_PIVOT_STRINGS = True 278 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 279 COLON_IS_VARIANT_EXTRACT = True 280 281 ID_VAR_TOKENS = { 282 *parser.Parser.ID_VAR_TOKENS, 283 TokenType.MATCH_CONDITION, 284 } 285 286 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 287 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 288 289 FUNCTIONS = { 290 **parser.Parser.FUNCTIONS, 291 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 292 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 293 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 294 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 295 this=seq_get(args, 1), expression=seq_get(args, 0) 296 ), 297 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 298 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 299 start=seq_get(args, 0), 300 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 301 step=seq_get(args, 2), 302 ), 303 "BITXOR": binary_from_function(exp.BitwiseXor), 304 "BIT_XOR": binary_from_function(exp.BitwiseXor), 305 "BOOLXOR": binary_from_function(exp.Xor), 306 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 307 "DATE_TRUNC": _date_trunc_to_time, 308 "DATEADD": _build_date_time_add(exp.DateAdd), 309 "DATEDIFF": _build_datediff, 310 "DIV0": _build_if_from_div0, 311 "FLATTEN": exp.Explode.from_arg_list, 312 "GET_PATH": lambda args, dialect: exp.JSONExtract( 313 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 314 ), 315 "IFF": exp.If.from_arg_list, 316 "LAST_DAY": lambda args: exp.LastDay( 317 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 318 ), 319 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 320 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 321 "LISTAGG": exp.GroupConcat.from_arg_list, 322 "MEDIAN": lambda args: exp.PercentileCont( 323 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 324 ), 325 "NULLIFZERO": _build_if_from_nullifzero, 326 "OBJECT_CONSTRUCT": _build_object_construct, 327 "REGEXP_REPLACE": _build_regexp_replace, 328 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 329 "RLIKE": exp.RegexpLike.from_arg_list, 330 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 331 "TIMEADD": _build_date_time_add(exp.TimeAdd), 332 "TIMEDIFF": _build_datediff, 333 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 334 "TIMESTAMPDIFF": _build_datediff, 335 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 336 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 337 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 338 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 339 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 340 "TO_NUMBER": lambda args: exp.ToNumber( 341 this=seq_get(args, 0), 342 format=seq_get(args, 1), 343 precision=seq_get(args, 2), 344 scale=seq_get(args, 3), 345 ), 346 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 347 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 348 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 349 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 350 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 351 "TO_VARCHAR": exp.ToChar.from_arg_list, 352 "ZEROIFNULL": _build_if_from_zeroifnull, 353 } 354 355 FUNCTION_PARSERS = { 356 **parser.Parser.FUNCTION_PARSERS, 357 "DATE_PART": lambda self: self._parse_date_part(), 358 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 359 } 360 FUNCTION_PARSERS.pop("TRIM") 361 362 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 363 364 RANGE_PARSERS = { 365 **parser.Parser.RANGE_PARSERS, 366 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 367 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 368 } 369 370 ALTER_PARSERS = { 371 **parser.Parser.ALTER_PARSERS, 372 "UNSET": lambda self: self.expression( 373 exp.Set, 374 tag=self._match_text_seq("TAG"), 375 expressions=self._parse_csv(self._parse_id_var), 376 unset=True, 377 ), 378 "SWAP": lambda self: self._parse_alter_table_swap(), 379 } 380 381 STATEMENT_PARSERS = { 382 **parser.Parser.STATEMENT_PARSERS, 383 TokenType.SHOW: lambda self: self._parse_show(), 384 } 385 386 PROPERTY_PARSERS = { 387 **parser.Parser.PROPERTY_PARSERS, 388 "LOCATION": lambda self: self._parse_location_property(), 389 } 390 391 TYPE_CONVERTERS = { 392 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 393 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 394 } 395 396 SHOW_PARSERS = { 397 "SCHEMAS": _show_parser("SCHEMAS"), 398 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 399 "OBJECTS": _show_parser("OBJECTS"), 400 "TERSE OBJECTS": _show_parser("OBJECTS"), 401 "TABLES": _show_parser("TABLES"), 402 "TERSE TABLES": _show_parser("TABLES"), 403 "VIEWS": _show_parser("VIEWS"), 404 "TERSE VIEWS": _show_parser("VIEWS"), 405 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 406 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 407 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 408 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 409 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 410 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 411 "SEQUENCES": _show_parser("SEQUENCES"), 412 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 413 "COLUMNS": _show_parser("COLUMNS"), 414 "USERS": _show_parser("USERS"), 415 "TERSE USERS": _show_parser("USERS"), 416 } 417 418 CONSTRAINT_PARSERS = { 419 **parser.Parser.CONSTRAINT_PARSERS, 420 "WITH": lambda self: self._parse_with_constraint(), 421 "MASKING": lambda self: self._parse_with_constraint(), 422 "PROJECTION": lambda self: self._parse_with_constraint(), 423 "TAG": lambda self: self._parse_with_constraint(), 424 } 425 426 STAGED_FILE_SINGLE_TOKENS = { 427 TokenType.DOT, 428 TokenType.MOD, 429 TokenType.SLASH, 430 } 431 432 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 433 434 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 435 436 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 437 438 LAMBDAS = { 439 **parser.Parser.LAMBDAS, 440 TokenType.ARROW: lambda self, expressions: self.expression( 441 exp.Lambda, 442 this=self._replace_lambda( 443 self._parse_assignment(), 444 expressions, 445 ), 446 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 447 ), 448 } 449 450 def _negate_range( 451 self, this: t.Optional[exp.Expression] = None 452 ) -> t.Optional[exp.Expression]: 453 if not this: 454 return this 455 456 query = this.args.get("query") 457 if isinstance(this, exp.In) and isinstance(query, exp.Query): 458 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 459 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 460 # which can produce different results (most likely a SnowFlake bug). 461 # 462 # https://docs.snowflake.com/en/sql-reference/functions/in 463 # Context: https://github.com/tobymao/sqlglot/issues/3890 464 return self.expression( 465 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 466 ) 467 468 return self.expression(exp.Not, this=this) 469 470 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 471 if self._prev.token_type != TokenType.WITH: 472 self._retreat(self._index - 1) 473 474 if self._match_text_seq("MASKING", "POLICY"): 475 policy = self._parse_column() 476 return self.expression( 477 exp.MaskingPolicyColumnConstraint, 478 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 479 expressions=self._match(TokenType.USING) 480 and self._parse_wrapped_csv(self._parse_id_var), 481 ) 482 if self._match_text_seq("PROJECTION", "POLICY"): 483 policy = self._parse_column() 484 return self.expression( 485 exp.ProjectionPolicyColumnConstraint, 486 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 487 ) 488 if self._match(TokenType.TAG): 489 return self.expression( 490 exp.TagColumnConstraint, 491 expressions=self._parse_wrapped_csv(self._parse_property), 492 ) 493 494 return None 495 496 def _parse_create(self) -> exp.Create | exp.Command: 497 expression = super()._parse_create() 498 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 499 # Replace the Table node with the enclosed Identifier 500 expression.this.replace(expression.this.this) 501 502 return expression 503 504 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 505 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 506 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 507 this = self._parse_var() or self._parse_type() 508 509 if not this: 510 return None 511 512 self._match(TokenType.COMMA) 513 expression = self._parse_bitwise() 514 this = map_date_part(this) 515 name = this.name.upper() 516 517 if name.startswith("EPOCH"): 518 if name == "EPOCH_MILLISECOND": 519 scale = 10**3 520 elif name == "EPOCH_MICROSECOND": 521 scale = 10**6 522 elif name == "EPOCH_NANOSECOND": 523 scale = 10**9 524 else: 525 scale = None 526 527 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 528 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 529 530 if scale: 531 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 532 533 return to_unix 534 535 return self.expression(exp.Extract, this=this, expression=expression) 536 537 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 538 if is_map: 539 # Keys are strings in Snowflake's objects, see also: 540 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 541 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 542 return self._parse_slice(self._parse_string()) 543 544 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 545 546 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 547 lateral = super()._parse_lateral() 548 if not lateral: 549 return lateral 550 551 if isinstance(lateral.this, exp.Explode): 552 table_alias = lateral.args.get("alias") 553 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 554 if table_alias and not table_alias.args.get("columns"): 555 table_alias.set("columns", columns) 556 elif not table_alias: 557 exp.alias_(lateral, "_flattened", table=columns, copy=False) 558 559 return lateral 560 561 def _parse_table_parts( 562 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 563 ) -> exp.Table: 564 # https://docs.snowflake.com/en/user-guide/querying-stage 565 if self._match(TokenType.STRING, advance=False): 566 table = self._parse_string() 567 elif self._match_text_seq("@", advance=False): 568 table = self._parse_location_path() 569 else: 570 table = None 571 572 if table: 573 file_format = None 574 pattern = None 575 576 wrapped = self._match(TokenType.L_PAREN) 577 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 578 if self._match_text_seq("FILE_FORMAT", "=>"): 579 file_format = self._parse_string() or super()._parse_table_parts( 580 is_db_reference=is_db_reference 581 ) 582 elif self._match_text_seq("PATTERN", "=>"): 583 pattern = self._parse_string() 584 else: 585 break 586 587 self._match(TokenType.COMMA) 588 589 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 590 else: 591 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 592 593 return table 594 595 def _parse_id_var( 596 self, 597 any_token: bool = True, 598 tokens: t.Optional[t.Collection[TokenType]] = None, 599 ) -> t.Optional[exp.Expression]: 600 if self._match_text_seq("IDENTIFIER", "("): 601 identifier = ( 602 super()._parse_id_var(any_token=any_token, tokens=tokens) 603 or self._parse_string() 604 ) 605 self._match_r_paren() 606 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 607 608 return super()._parse_id_var(any_token=any_token, tokens=tokens) 609 610 def _parse_show_snowflake(self, this: str) -> exp.Show: 611 scope = None 612 scope_kind = None 613 614 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 615 # which is syntactically valid but has no effect on the output 616 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 617 618 history = self._match_text_seq("HISTORY") 619 620 like = self._parse_string() if self._match(TokenType.LIKE) else None 621 622 if self._match(TokenType.IN): 623 if self._match_text_seq("ACCOUNT"): 624 scope_kind = "ACCOUNT" 625 elif self._match_set(self.DB_CREATABLES): 626 scope_kind = self._prev.text.upper() 627 if self._curr: 628 scope = self._parse_table_parts() 629 elif self._curr: 630 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 631 scope = self._parse_table_parts() 632 633 return self.expression( 634 exp.Show, 635 **{ 636 "terse": terse, 637 "this": this, 638 "history": history, 639 "like": like, 640 "scope": scope, 641 "scope_kind": scope_kind, 642 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 643 "limit": self._parse_limit(), 644 "from": self._parse_string() if self._match(TokenType.FROM) else None, 645 }, 646 ) 647 648 def _parse_alter_table_swap(self) -> exp.SwapTable: 649 self._match_text_seq("WITH") 650 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 651 652 def _parse_location_property(self) -> exp.LocationProperty: 653 self._match(TokenType.EQ) 654 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 655 656 def _parse_file_location(self) -> t.Optional[exp.Expression]: 657 # Parse either a subquery or a staged file 658 return ( 659 self._parse_select(table=True, parse_subquery_alias=False) 660 if self._match(TokenType.L_PAREN, advance=False) 661 else self._parse_table_parts() 662 ) 663 664 def _parse_location_path(self) -> exp.Var: 665 parts = [self._advance_any(ignore_reserved=True)] 666 667 # We avoid consuming a comma token because external tables like @foo and @bar 668 # can be joined in a query with a comma separator, as well as closing paren 669 # in case of subqueries 670 while self._is_connected() and not self._match_set( 671 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 672 ): 673 parts.append(self._advance_any(ignore_reserved=True)) 674 675 return exp.var("".join(part.text for part in parts if part)) 676 677 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 678 this = super()._parse_lambda_arg() 679 680 if not this: 681 return this 682 683 typ = self._parse_types() 684 685 if typ: 686 return self.expression(exp.Cast, this=this, to=typ) 687 688 return this 689 690 class Tokenizer(tokens.Tokenizer): 691 STRING_ESCAPES = ["\\", "'"] 692 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 693 RAW_STRINGS = ["$$"] 694 COMMENTS = ["--", "//", ("/*", "*/")] 695 696 KEYWORDS = { 697 **tokens.Tokenizer.KEYWORDS, 698 "BYTEINT": TokenType.INT, 699 "CHAR VARYING": TokenType.VARCHAR, 700 "CHARACTER VARYING": TokenType.VARCHAR, 701 "EXCLUDE": TokenType.EXCEPT, 702 "ILIKE ANY": TokenType.ILIKE_ANY, 703 "LIKE ANY": TokenType.LIKE_ANY, 704 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 705 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 706 "MINUS": TokenType.EXCEPT, 707 "NCHAR VARYING": TokenType.VARCHAR, 708 "PUT": TokenType.COMMAND, 709 "REMOVE": TokenType.COMMAND, 710 "RM": TokenType.COMMAND, 711 "SAMPLE": TokenType.TABLE_SAMPLE, 712 "SQL_DOUBLE": TokenType.DOUBLE, 713 "SQL_VARCHAR": TokenType.VARCHAR, 714 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 715 "TAG": TokenType.TAG, 716 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 717 "TOP": TokenType.TOP, 718 "WAREHOUSE": TokenType.WAREHOUSE, 719 "STREAMLIT": TokenType.STREAMLIT, 720 } 721 KEYWORDS.pop("/*+") 722 723 SINGLE_TOKENS = { 724 **tokens.Tokenizer.SINGLE_TOKENS, 725 "$": TokenType.PARAMETER, 726 } 727 728 VAR_SINGLE_TOKENS = {"$"} 729 730 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 731 732 class Generator(generator.Generator): 733 PARAMETER_TOKEN = "$" 734 MATCHED_BY_SOURCE = False 735 SINGLE_STRING_INTERVAL = True 736 JOIN_HINTS = False 737 TABLE_HINTS = False 738 QUERY_HINTS = False 739 AGGREGATE_FILTER_SUPPORTED = False 740 SUPPORTS_TABLE_COPY = False 741 COLLATE_IS_FUNC = True 742 LIMIT_ONLY_LITERALS = True 743 JSON_KEY_VALUE_PAIR_SEP = "," 744 INSERT_OVERWRITE = " OVERWRITE INTO" 745 STRUCT_DELIMITER = ("(", ")") 746 COPY_PARAMS_ARE_WRAPPED = False 747 COPY_PARAMS_EQ_REQUIRED = True 748 STAR_EXCEPT = "EXCLUDE" 749 SUPPORTS_EXPLODING_PROJECTIONS = False 750 ARRAY_CONCAT_IS_VAR_LEN = False 751 SUPPORTS_CONVERT_TIMEZONE = True 752 753 TRANSFORMS = { 754 **generator.Generator.TRANSFORMS, 755 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 756 exp.ArgMax: rename_func("MAX_BY"), 757 exp.ArgMin: rename_func("MIN_BY"), 758 exp.Array: inline_array_sql, 759 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 760 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 761 exp.AtTimeZone: lambda self, e: self.func( 762 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 763 ), 764 exp.BitwiseXor: rename_func("BITXOR"), 765 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 766 exp.DateAdd: date_delta_sql("DATEADD"), 767 exp.DateDiff: date_delta_sql("DATEDIFF"), 768 exp.DateStrToDate: datestrtodate_sql, 769 exp.DayOfMonth: rename_func("DAYOFMONTH"), 770 exp.DayOfWeek: rename_func("DAYOFWEEK"), 771 exp.DayOfYear: rename_func("DAYOFYEAR"), 772 exp.Explode: rename_func("FLATTEN"), 773 exp.Extract: rename_func("DATE_PART"), 774 exp.FromTimeZone: lambda self, e: self.func( 775 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 776 ), 777 exp.GenerateSeries: lambda self, e: self.func( 778 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 779 ), 780 exp.GroupConcat: rename_func("LISTAGG"), 781 exp.If: if_sql(name="IFF", false_value="NULL"), 782 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 783 exp.JSONExtractScalar: lambda self, e: self.func( 784 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 785 ), 786 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 787 exp.JSONPathRoot: lambda *_: "", 788 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 789 exp.LogicalOr: rename_func("BOOLOR_AGG"), 790 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 791 exp.Max: max_or_greatest, 792 exp.Min: min_or_least, 793 exp.ParseJSON: lambda self, e: self.func( 794 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 795 ), 796 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 797 exp.PercentileCont: transforms.preprocess( 798 [transforms.add_within_group_for_percentiles] 799 ), 800 exp.PercentileDisc: transforms.preprocess( 801 [transforms.add_within_group_for_percentiles] 802 ), 803 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 804 exp.RegexpILike: _regexpilike_sql, 805 exp.Rand: rename_func("RANDOM"), 806 exp.Select: transforms.preprocess( 807 [ 808 transforms.eliminate_distinct_on, 809 transforms.explode_to_unnest(), 810 transforms.eliminate_semi_and_anti_joins, 811 _unnest_generate_date_array, 812 ] 813 ), 814 exp.SHA: rename_func("SHA1"), 815 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 816 exp.StartsWith: rename_func("STARTSWITH"), 817 exp.StrPosition: lambda self, e: self.func( 818 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 819 ), 820 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 821 exp.Stuff: rename_func("INSERT"), 822 exp.TimeAdd: date_delta_sql("TIMEADD"), 823 exp.TimestampDiff: lambda self, e: self.func( 824 "TIMESTAMPDIFF", e.unit, e.expression, e.this 825 ), 826 exp.TimestampTrunc: timestamptrunc_sql(), 827 exp.TimeStrToTime: timestrtotime_sql, 828 exp.TimeToStr: lambda self, e: self.func( 829 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 830 ), 831 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 832 exp.ToArray: rename_func("TO_ARRAY"), 833 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 834 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 835 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 836 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 837 exp.TsOrDsToDate: lambda self, e: self.func( 838 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 839 ), 840 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 841 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 842 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 843 exp.Xor: rename_func("BOOLXOR"), 844 } 845 846 SUPPORTED_JSON_PATH_PARTS = { 847 exp.JSONPathKey, 848 exp.JSONPathRoot, 849 exp.JSONPathSubscript, 850 } 851 852 TYPE_MAPPING = { 853 **generator.Generator.TYPE_MAPPING, 854 exp.DataType.Type.NESTED: "OBJECT", 855 exp.DataType.Type.STRUCT: "OBJECT", 856 } 857 858 PROPERTIES_LOCATION = { 859 **generator.Generator.PROPERTIES_LOCATION, 860 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 861 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 862 } 863 864 UNSUPPORTED_VALUES_EXPRESSIONS = { 865 exp.Map, 866 exp.StarMap, 867 exp.Struct, 868 exp.VarMap, 869 } 870 871 def with_properties(self, properties: exp.Properties) -> str: 872 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 873 874 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 875 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 876 values_as_table = False 877 878 return super().values_sql(expression, values_as_table=values_as_table) 879 880 def datatype_sql(self, expression: exp.DataType) -> str: 881 expressions = expression.expressions 882 if ( 883 expressions 884 and expression.is_type(*exp.DataType.STRUCT_TYPES) 885 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 886 ): 887 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 888 return "OBJECT" 889 890 return super().datatype_sql(expression) 891 892 def tonumber_sql(self, expression: exp.ToNumber) -> str: 893 return self.func( 894 "TO_NUMBER", 895 expression.this, 896 expression.args.get("format"), 897 expression.args.get("precision"), 898 expression.args.get("scale"), 899 ) 900 901 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 902 milli = expression.args.get("milli") 903 if milli is not None: 904 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 905 expression.set("nano", milli_to_nano) 906 907 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 908 909 def trycast_sql(self, expression: exp.TryCast) -> str: 910 value = expression.this 911 912 if value.type is None: 913 from sqlglot.optimizer.annotate_types import annotate_types 914 915 value = annotate_types(value) 916 917 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 918 return super().trycast_sql(expression) 919 920 # TRY_CAST only works for string values in Snowflake 921 return self.cast_sql(expression) 922 923 def log_sql(self, expression: exp.Log) -> str: 924 if not expression.expression: 925 return self.func("LN", expression.this) 926 927 return super().log_sql(expression) 928 929 def unnest_sql(self, expression: exp.Unnest) -> str: 930 unnest_alias = expression.args.get("alias") 931 offset = expression.args.get("offset") 932 933 columns = [ 934 exp.to_identifier("seq"), 935 exp.to_identifier("key"), 936 exp.to_identifier("path"), 937 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 938 seq_get(unnest_alias.columns if unnest_alias else [], 0) 939 or exp.to_identifier("value"), 940 exp.to_identifier("this"), 941 ] 942 943 if unnest_alias: 944 unnest_alias.set("columns", columns) 945 else: 946 unnest_alias = exp.TableAlias(this="_u", columns=columns) 947 948 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 949 alias = self.sql(unnest_alias) 950 alias = f" AS {alias}" if alias else "" 951 return f"{explode}{alias}" 952 953 def show_sql(self, expression: exp.Show) -> str: 954 terse = "TERSE " if expression.args.get("terse") else "" 955 history = " HISTORY" if expression.args.get("history") else "" 956 like = self.sql(expression, "like") 957 like = f" LIKE {like}" if like else "" 958 959 scope = self.sql(expression, "scope") 960 scope = f" {scope}" if scope else "" 961 962 scope_kind = self.sql(expression, "scope_kind") 963 if scope_kind: 964 scope_kind = f" IN {scope_kind}" 965 966 starts_with = self.sql(expression, "starts_with") 967 if starts_with: 968 starts_with = f" STARTS WITH {starts_with}" 969 970 limit = self.sql(expression, "limit") 971 972 from_ = self.sql(expression, "from") 973 if from_: 974 from_ = f" FROM {from_}" 975 976 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 977 978 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 979 # Other dialects don't support all of the following parameters, so we need to 980 # generate default values as necessary to ensure the transpilation is correct 981 group = expression.args.get("group") 982 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 983 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 984 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 985 986 return self.func( 987 "REGEXP_SUBSTR", 988 expression.this, 989 expression.expression, 990 position, 991 occurrence, 992 parameters, 993 group, 994 ) 995 996 def except_op(self, expression: exp.Except) -> str: 997 if not expression.args.get("distinct"): 998 self.unsupported("EXCEPT with All is not supported in Snowflake") 999 return super().except_op(expression) 1000 1001 def intersect_op(self, expression: exp.Intersect) -> str: 1002 if not expression.args.get("distinct"): 1003 self.unsupported("INTERSECT with All is not supported in Snowflake") 1004 return super().intersect_op(expression) 1005 1006 def describe_sql(self, expression: exp.Describe) -> str: 1007 # Default to table if kind is unknown 1008 kind_value = expression.args.get("kind") or "TABLE" 1009 kind = f" {kind_value}" if kind_value else "" 1010 this = f" {self.sql(expression, 'this')}" 1011 expressions = self.expressions(expression, flat=True) 1012 expressions = f" {expressions}" if expressions else "" 1013 return f"DESCRIBE{kind}{this}{expressions}" 1014 1015 def generatedasidentitycolumnconstraint_sql( 1016 self, expression: exp.GeneratedAsIdentityColumnConstraint 1017 ) -> str: 1018 start = expression.args.get("start") 1019 start = f" START {start}" if start else "" 1020 increment = expression.args.get("increment") 1021 increment = f" INCREMENT {increment}" if increment else "" 1022 return f"AUTOINCREMENT{start}{increment}" 1023 1024 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1025 this = self.sql(expression, "this") 1026 return f"SWAP WITH {this}" 1027 1028 def cluster_sql(self, expression: exp.Cluster) -> str: 1029 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1030 1031 def struct_sql(self, expression: exp.Struct) -> str: 1032 keys = [] 1033 values = [] 1034 1035 for i, e in enumerate(expression.expressions): 1036 if isinstance(e, exp.PropertyEQ): 1037 keys.append( 1038 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1039 ) 1040 values.append(e.expression) 1041 else: 1042 keys.append(exp.Literal.string(f"_{i}")) 1043 values.append(e) 1044 1045 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1046 1047 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1048 if expression.args.get("weight") or expression.args.get("accuracy"): 1049 self.unsupported( 1050 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1051 ) 1052 1053 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1054 1055 def alterset_sql(self, expression: exp.AlterSet) -> str: 1056 exprs = self.expressions(expression, flat=True) 1057 exprs = f" {exprs}" if exprs else "" 1058 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1059 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1060 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1061 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1062 tag = self.expressions(expression, key="tag", flat=True) 1063 tag = f" TAG {tag}" if tag else "" 1064 1065 return f"SET{exprs}{file_format}{copy_options}{tag}"
225class Snowflake(Dialect): 226 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 227 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 228 NULL_ORDERING = "nulls_are_large" 229 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 230 SUPPORTS_USER_DEFINED_TYPES = False 231 SUPPORTS_SEMI_ANTI_JOIN = False 232 PREFER_CTE_ALIAS_COLUMN = True 233 TABLESAMPLE_SIZE_IS_PERCENT = True 234 COPY_PARAMS_ARE_CSV = False 235 236 TIME_MAPPING = { 237 "YYYY": "%Y", 238 "yyyy": "%Y", 239 "YY": "%y", 240 "yy": "%y", 241 "MMMM": "%B", 242 "mmmm": "%B", 243 "MON": "%b", 244 "mon": "%b", 245 "MM": "%m", 246 "mm": "%m", 247 "DD": "%d", 248 "dd": "%-d", 249 "DY": "%a", 250 "dy": "%w", 251 "HH24": "%H", 252 "hh24": "%H", 253 "HH12": "%I", 254 "hh12": "%I", 255 "MI": "%M", 256 "mi": "%M", 257 "SS": "%S", 258 "ss": "%S", 259 "FF": "%f", 260 "ff": "%f", 261 "FF6": "%f", 262 "ff6": "%f", 263 } 264 265 def quote_identifier(self, expression: E, identify: bool = True) -> E: 266 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 267 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 268 if ( 269 isinstance(expression, exp.Identifier) 270 and isinstance(expression.parent, exp.Table) 271 and expression.name.lower() == "dual" 272 ): 273 return expression # type: ignore 274 275 return super().quote_identifier(expression, identify=identify) 276 277 class Parser(parser.Parser): 278 IDENTIFY_PIVOT_STRINGS = True 279 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 280 COLON_IS_VARIANT_EXTRACT = True 281 282 ID_VAR_TOKENS = { 283 *parser.Parser.ID_VAR_TOKENS, 284 TokenType.MATCH_CONDITION, 285 } 286 287 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 288 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 289 290 FUNCTIONS = { 291 **parser.Parser.FUNCTIONS, 292 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 293 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 294 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 295 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 296 this=seq_get(args, 1), expression=seq_get(args, 0) 297 ), 298 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 299 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 300 start=seq_get(args, 0), 301 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 302 step=seq_get(args, 2), 303 ), 304 "BITXOR": binary_from_function(exp.BitwiseXor), 305 "BIT_XOR": binary_from_function(exp.BitwiseXor), 306 "BOOLXOR": binary_from_function(exp.Xor), 307 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 308 "DATE_TRUNC": _date_trunc_to_time, 309 "DATEADD": _build_date_time_add(exp.DateAdd), 310 "DATEDIFF": _build_datediff, 311 "DIV0": _build_if_from_div0, 312 "FLATTEN": exp.Explode.from_arg_list, 313 "GET_PATH": lambda args, dialect: exp.JSONExtract( 314 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 315 ), 316 "IFF": exp.If.from_arg_list, 317 "LAST_DAY": lambda args: exp.LastDay( 318 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 319 ), 320 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 321 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 322 "LISTAGG": exp.GroupConcat.from_arg_list, 323 "MEDIAN": lambda args: exp.PercentileCont( 324 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 325 ), 326 "NULLIFZERO": _build_if_from_nullifzero, 327 "OBJECT_CONSTRUCT": _build_object_construct, 328 "REGEXP_REPLACE": _build_regexp_replace, 329 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 330 "RLIKE": exp.RegexpLike.from_arg_list, 331 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 332 "TIMEADD": _build_date_time_add(exp.TimeAdd), 333 "TIMEDIFF": _build_datediff, 334 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 335 "TIMESTAMPDIFF": _build_datediff, 336 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 337 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 338 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 339 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 340 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 341 "TO_NUMBER": lambda args: exp.ToNumber( 342 this=seq_get(args, 0), 343 format=seq_get(args, 1), 344 precision=seq_get(args, 2), 345 scale=seq_get(args, 3), 346 ), 347 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 348 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 349 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 350 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 351 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 352 "TO_VARCHAR": exp.ToChar.from_arg_list, 353 "ZEROIFNULL": _build_if_from_zeroifnull, 354 } 355 356 FUNCTION_PARSERS = { 357 **parser.Parser.FUNCTION_PARSERS, 358 "DATE_PART": lambda self: self._parse_date_part(), 359 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 360 } 361 FUNCTION_PARSERS.pop("TRIM") 362 363 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 364 365 RANGE_PARSERS = { 366 **parser.Parser.RANGE_PARSERS, 367 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 368 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 369 } 370 371 ALTER_PARSERS = { 372 **parser.Parser.ALTER_PARSERS, 373 "UNSET": lambda self: self.expression( 374 exp.Set, 375 tag=self._match_text_seq("TAG"), 376 expressions=self._parse_csv(self._parse_id_var), 377 unset=True, 378 ), 379 "SWAP": lambda self: self._parse_alter_table_swap(), 380 } 381 382 STATEMENT_PARSERS = { 383 **parser.Parser.STATEMENT_PARSERS, 384 TokenType.SHOW: lambda self: self._parse_show(), 385 } 386 387 PROPERTY_PARSERS = { 388 **parser.Parser.PROPERTY_PARSERS, 389 "LOCATION": lambda self: self._parse_location_property(), 390 } 391 392 TYPE_CONVERTERS = { 393 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 394 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 395 } 396 397 SHOW_PARSERS = { 398 "SCHEMAS": _show_parser("SCHEMAS"), 399 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 400 "OBJECTS": _show_parser("OBJECTS"), 401 "TERSE OBJECTS": _show_parser("OBJECTS"), 402 "TABLES": _show_parser("TABLES"), 403 "TERSE TABLES": _show_parser("TABLES"), 404 "VIEWS": _show_parser("VIEWS"), 405 "TERSE VIEWS": _show_parser("VIEWS"), 406 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 407 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 408 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 409 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 410 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 411 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 412 "SEQUENCES": _show_parser("SEQUENCES"), 413 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 414 "COLUMNS": _show_parser("COLUMNS"), 415 "USERS": _show_parser("USERS"), 416 "TERSE USERS": _show_parser("USERS"), 417 } 418 419 CONSTRAINT_PARSERS = { 420 **parser.Parser.CONSTRAINT_PARSERS, 421 "WITH": lambda self: self._parse_with_constraint(), 422 "MASKING": lambda self: self._parse_with_constraint(), 423 "PROJECTION": lambda self: self._parse_with_constraint(), 424 "TAG": lambda self: self._parse_with_constraint(), 425 } 426 427 STAGED_FILE_SINGLE_TOKENS = { 428 TokenType.DOT, 429 TokenType.MOD, 430 TokenType.SLASH, 431 } 432 433 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 434 435 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 436 437 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 438 439 LAMBDAS = { 440 **parser.Parser.LAMBDAS, 441 TokenType.ARROW: lambda self, expressions: self.expression( 442 exp.Lambda, 443 this=self._replace_lambda( 444 self._parse_assignment(), 445 expressions, 446 ), 447 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 448 ), 449 } 450 451 def _negate_range( 452 self, this: t.Optional[exp.Expression] = None 453 ) -> t.Optional[exp.Expression]: 454 if not this: 455 return this 456 457 query = this.args.get("query") 458 if isinstance(this, exp.In) and isinstance(query, exp.Query): 459 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 460 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 461 # which can produce different results (most likely a SnowFlake bug). 462 # 463 # https://docs.snowflake.com/en/sql-reference/functions/in 464 # Context: https://github.com/tobymao/sqlglot/issues/3890 465 return self.expression( 466 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 467 ) 468 469 return self.expression(exp.Not, this=this) 470 471 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 472 if self._prev.token_type != TokenType.WITH: 473 self._retreat(self._index - 1) 474 475 if self._match_text_seq("MASKING", "POLICY"): 476 policy = self._parse_column() 477 return self.expression( 478 exp.MaskingPolicyColumnConstraint, 479 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 480 expressions=self._match(TokenType.USING) 481 and self._parse_wrapped_csv(self._parse_id_var), 482 ) 483 if self._match_text_seq("PROJECTION", "POLICY"): 484 policy = self._parse_column() 485 return self.expression( 486 exp.ProjectionPolicyColumnConstraint, 487 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 488 ) 489 if self._match(TokenType.TAG): 490 return self.expression( 491 exp.TagColumnConstraint, 492 expressions=self._parse_wrapped_csv(self._parse_property), 493 ) 494 495 return None 496 497 def _parse_create(self) -> exp.Create | exp.Command: 498 expression = super()._parse_create() 499 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 500 # Replace the Table node with the enclosed Identifier 501 expression.this.replace(expression.this.this) 502 503 return expression 504 505 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 506 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 507 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 508 this = self._parse_var() or self._parse_type() 509 510 if not this: 511 return None 512 513 self._match(TokenType.COMMA) 514 expression = self._parse_bitwise() 515 this = map_date_part(this) 516 name = this.name.upper() 517 518 if name.startswith("EPOCH"): 519 if name == "EPOCH_MILLISECOND": 520 scale = 10**3 521 elif name == "EPOCH_MICROSECOND": 522 scale = 10**6 523 elif name == "EPOCH_NANOSECOND": 524 scale = 10**9 525 else: 526 scale = None 527 528 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 529 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 530 531 if scale: 532 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 533 534 return to_unix 535 536 return self.expression(exp.Extract, this=this, expression=expression) 537 538 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 539 if is_map: 540 # Keys are strings in Snowflake's objects, see also: 541 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 542 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 543 return self._parse_slice(self._parse_string()) 544 545 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 546 547 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 548 lateral = super()._parse_lateral() 549 if not lateral: 550 return lateral 551 552 if isinstance(lateral.this, exp.Explode): 553 table_alias = lateral.args.get("alias") 554 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 555 if table_alias and not table_alias.args.get("columns"): 556 table_alias.set("columns", columns) 557 elif not table_alias: 558 exp.alias_(lateral, "_flattened", table=columns, copy=False) 559 560 return lateral 561 562 def _parse_table_parts( 563 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 564 ) -> exp.Table: 565 # https://docs.snowflake.com/en/user-guide/querying-stage 566 if self._match(TokenType.STRING, advance=False): 567 table = self._parse_string() 568 elif self._match_text_seq("@", advance=False): 569 table = self._parse_location_path() 570 else: 571 table = None 572 573 if table: 574 file_format = None 575 pattern = None 576 577 wrapped = self._match(TokenType.L_PAREN) 578 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 579 if self._match_text_seq("FILE_FORMAT", "=>"): 580 file_format = self._parse_string() or super()._parse_table_parts( 581 is_db_reference=is_db_reference 582 ) 583 elif self._match_text_seq("PATTERN", "=>"): 584 pattern = self._parse_string() 585 else: 586 break 587 588 self._match(TokenType.COMMA) 589 590 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 591 else: 592 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 593 594 return table 595 596 def _parse_id_var( 597 self, 598 any_token: bool = True, 599 tokens: t.Optional[t.Collection[TokenType]] = None, 600 ) -> t.Optional[exp.Expression]: 601 if self._match_text_seq("IDENTIFIER", "("): 602 identifier = ( 603 super()._parse_id_var(any_token=any_token, tokens=tokens) 604 or self._parse_string() 605 ) 606 self._match_r_paren() 607 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 608 609 return super()._parse_id_var(any_token=any_token, tokens=tokens) 610 611 def _parse_show_snowflake(self, this: str) -> exp.Show: 612 scope = None 613 scope_kind = None 614 615 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 616 # which is syntactically valid but has no effect on the output 617 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 618 619 history = self._match_text_seq("HISTORY") 620 621 like = self._parse_string() if self._match(TokenType.LIKE) else None 622 623 if self._match(TokenType.IN): 624 if self._match_text_seq("ACCOUNT"): 625 scope_kind = "ACCOUNT" 626 elif self._match_set(self.DB_CREATABLES): 627 scope_kind = self._prev.text.upper() 628 if self._curr: 629 scope = self._parse_table_parts() 630 elif self._curr: 631 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 632 scope = self._parse_table_parts() 633 634 return self.expression( 635 exp.Show, 636 **{ 637 "terse": terse, 638 "this": this, 639 "history": history, 640 "like": like, 641 "scope": scope, 642 "scope_kind": scope_kind, 643 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 644 "limit": self._parse_limit(), 645 "from": self._parse_string() if self._match(TokenType.FROM) else None, 646 }, 647 ) 648 649 def _parse_alter_table_swap(self) -> exp.SwapTable: 650 self._match_text_seq("WITH") 651 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 652 653 def _parse_location_property(self) -> exp.LocationProperty: 654 self._match(TokenType.EQ) 655 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 656 657 def _parse_file_location(self) -> t.Optional[exp.Expression]: 658 # Parse either a subquery or a staged file 659 return ( 660 self._parse_select(table=True, parse_subquery_alias=False) 661 if self._match(TokenType.L_PAREN, advance=False) 662 else self._parse_table_parts() 663 ) 664 665 def _parse_location_path(self) -> exp.Var: 666 parts = [self._advance_any(ignore_reserved=True)] 667 668 # We avoid consuming a comma token because external tables like @foo and @bar 669 # can be joined in a query with a comma separator, as well as closing paren 670 # in case of subqueries 671 while self._is_connected() and not self._match_set( 672 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 673 ): 674 parts.append(self._advance_any(ignore_reserved=True)) 675 676 return exp.var("".join(part.text for part in parts if part)) 677 678 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 679 this = super()._parse_lambda_arg() 680 681 if not this: 682 return this 683 684 typ = self._parse_types() 685 686 if typ: 687 return self.expression(exp.Cast, this=this, to=typ) 688 689 return this 690 691 class Tokenizer(tokens.Tokenizer): 692 STRING_ESCAPES = ["\\", "'"] 693 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 694 RAW_STRINGS = ["$$"] 695 COMMENTS = ["--", "//", ("/*", "*/")] 696 697 KEYWORDS = { 698 **tokens.Tokenizer.KEYWORDS, 699 "BYTEINT": TokenType.INT, 700 "CHAR VARYING": TokenType.VARCHAR, 701 "CHARACTER VARYING": TokenType.VARCHAR, 702 "EXCLUDE": TokenType.EXCEPT, 703 "ILIKE ANY": TokenType.ILIKE_ANY, 704 "LIKE ANY": TokenType.LIKE_ANY, 705 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 706 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 707 "MINUS": TokenType.EXCEPT, 708 "NCHAR VARYING": TokenType.VARCHAR, 709 "PUT": TokenType.COMMAND, 710 "REMOVE": TokenType.COMMAND, 711 "RM": TokenType.COMMAND, 712 "SAMPLE": TokenType.TABLE_SAMPLE, 713 "SQL_DOUBLE": TokenType.DOUBLE, 714 "SQL_VARCHAR": TokenType.VARCHAR, 715 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 716 "TAG": TokenType.TAG, 717 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 718 "TOP": TokenType.TOP, 719 "WAREHOUSE": TokenType.WAREHOUSE, 720 "STREAMLIT": TokenType.STREAMLIT, 721 } 722 KEYWORDS.pop("/*+") 723 724 SINGLE_TOKENS = { 725 **tokens.Tokenizer.SINGLE_TOKENS, 726 "$": TokenType.PARAMETER, 727 } 728 729 VAR_SINGLE_TOKENS = {"$"} 730 731 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 732 733 class Generator(generator.Generator): 734 PARAMETER_TOKEN = "$" 735 MATCHED_BY_SOURCE = False 736 SINGLE_STRING_INTERVAL = True 737 JOIN_HINTS = False 738 TABLE_HINTS = False 739 QUERY_HINTS = False 740 AGGREGATE_FILTER_SUPPORTED = False 741 SUPPORTS_TABLE_COPY = False 742 COLLATE_IS_FUNC = True 743 LIMIT_ONLY_LITERALS = True 744 JSON_KEY_VALUE_PAIR_SEP = "," 745 INSERT_OVERWRITE = " OVERWRITE INTO" 746 STRUCT_DELIMITER = ("(", ")") 747 COPY_PARAMS_ARE_WRAPPED = False 748 COPY_PARAMS_EQ_REQUIRED = True 749 STAR_EXCEPT = "EXCLUDE" 750 SUPPORTS_EXPLODING_PROJECTIONS = False 751 ARRAY_CONCAT_IS_VAR_LEN = False 752 SUPPORTS_CONVERT_TIMEZONE = True 753 754 TRANSFORMS = { 755 **generator.Generator.TRANSFORMS, 756 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 757 exp.ArgMax: rename_func("MAX_BY"), 758 exp.ArgMin: rename_func("MIN_BY"), 759 exp.Array: inline_array_sql, 760 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 761 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 762 exp.AtTimeZone: lambda self, e: self.func( 763 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 764 ), 765 exp.BitwiseXor: rename_func("BITXOR"), 766 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 767 exp.DateAdd: date_delta_sql("DATEADD"), 768 exp.DateDiff: date_delta_sql("DATEDIFF"), 769 exp.DateStrToDate: datestrtodate_sql, 770 exp.DayOfMonth: rename_func("DAYOFMONTH"), 771 exp.DayOfWeek: rename_func("DAYOFWEEK"), 772 exp.DayOfYear: rename_func("DAYOFYEAR"), 773 exp.Explode: rename_func("FLATTEN"), 774 exp.Extract: rename_func("DATE_PART"), 775 exp.FromTimeZone: lambda self, e: self.func( 776 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 777 ), 778 exp.GenerateSeries: lambda self, e: self.func( 779 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 780 ), 781 exp.GroupConcat: rename_func("LISTAGG"), 782 exp.If: if_sql(name="IFF", false_value="NULL"), 783 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 784 exp.JSONExtractScalar: lambda self, e: self.func( 785 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 786 ), 787 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 788 exp.JSONPathRoot: lambda *_: "", 789 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 790 exp.LogicalOr: rename_func("BOOLOR_AGG"), 791 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 792 exp.Max: max_or_greatest, 793 exp.Min: min_or_least, 794 exp.ParseJSON: lambda self, e: self.func( 795 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 796 ), 797 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 798 exp.PercentileCont: transforms.preprocess( 799 [transforms.add_within_group_for_percentiles] 800 ), 801 exp.PercentileDisc: transforms.preprocess( 802 [transforms.add_within_group_for_percentiles] 803 ), 804 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 805 exp.RegexpILike: _regexpilike_sql, 806 exp.Rand: rename_func("RANDOM"), 807 exp.Select: transforms.preprocess( 808 [ 809 transforms.eliminate_distinct_on, 810 transforms.explode_to_unnest(), 811 transforms.eliminate_semi_and_anti_joins, 812 _unnest_generate_date_array, 813 ] 814 ), 815 exp.SHA: rename_func("SHA1"), 816 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 817 exp.StartsWith: rename_func("STARTSWITH"), 818 exp.StrPosition: lambda self, e: self.func( 819 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 820 ), 821 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 822 exp.Stuff: rename_func("INSERT"), 823 exp.TimeAdd: date_delta_sql("TIMEADD"), 824 exp.TimestampDiff: lambda self, e: self.func( 825 "TIMESTAMPDIFF", e.unit, e.expression, e.this 826 ), 827 exp.TimestampTrunc: timestamptrunc_sql(), 828 exp.TimeStrToTime: timestrtotime_sql, 829 exp.TimeToStr: lambda self, e: self.func( 830 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 831 ), 832 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 833 exp.ToArray: rename_func("TO_ARRAY"), 834 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 835 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 836 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 837 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 838 exp.TsOrDsToDate: lambda self, e: self.func( 839 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 840 ), 841 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 842 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 843 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 844 exp.Xor: rename_func("BOOLXOR"), 845 } 846 847 SUPPORTED_JSON_PATH_PARTS = { 848 exp.JSONPathKey, 849 exp.JSONPathRoot, 850 exp.JSONPathSubscript, 851 } 852 853 TYPE_MAPPING = { 854 **generator.Generator.TYPE_MAPPING, 855 exp.DataType.Type.NESTED: "OBJECT", 856 exp.DataType.Type.STRUCT: "OBJECT", 857 } 858 859 PROPERTIES_LOCATION = { 860 **generator.Generator.PROPERTIES_LOCATION, 861 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 862 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 863 } 864 865 UNSUPPORTED_VALUES_EXPRESSIONS = { 866 exp.Map, 867 exp.StarMap, 868 exp.Struct, 869 exp.VarMap, 870 } 871 872 def with_properties(self, properties: exp.Properties) -> str: 873 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 874 875 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 876 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 877 values_as_table = False 878 879 return super().values_sql(expression, values_as_table=values_as_table) 880 881 def datatype_sql(self, expression: exp.DataType) -> str: 882 expressions = expression.expressions 883 if ( 884 expressions 885 and expression.is_type(*exp.DataType.STRUCT_TYPES) 886 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 887 ): 888 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 889 return "OBJECT" 890 891 return super().datatype_sql(expression) 892 893 def tonumber_sql(self, expression: exp.ToNumber) -> str: 894 return self.func( 895 "TO_NUMBER", 896 expression.this, 897 expression.args.get("format"), 898 expression.args.get("precision"), 899 expression.args.get("scale"), 900 ) 901 902 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 903 milli = expression.args.get("milli") 904 if milli is not None: 905 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 906 expression.set("nano", milli_to_nano) 907 908 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 909 910 def trycast_sql(self, expression: exp.TryCast) -> str: 911 value = expression.this 912 913 if value.type is None: 914 from sqlglot.optimizer.annotate_types import annotate_types 915 916 value = annotate_types(value) 917 918 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 919 return super().trycast_sql(expression) 920 921 # TRY_CAST only works for string values in Snowflake 922 return self.cast_sql(expression) 923 924 def log_sql(self, expression: exp.Log) -> str: 925 if not expression.expression: 926 return self.func("LN", expression.this) 927 928 return super().log_sql(expression) 929 930 def unnest_sql(self, expression: exp.Unnest) -> str: 931 unnest_alias = expression.args.get("alias") 932 offset = expression.args.get("offset") 933 934 columns = [ 935 exp.to_identifier("seq"), 936 exp.to_identifier("key"), 937 exp.to_identifier("path"), 938 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 939 seq_get(unnest_alias.columns if unnest_alias else [], 0) 940 or exp.to_identifier("value"), 941 exp.to_identifier("this"), 942 ] 943 944 if unnest_alias: 945 unnest_alias.set("columns", columns) 946 else: 947 unnest_alias = exp.TableAlias(this="_u", columns=columns) 948 949 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 950 alias = self.sql(unnest_alias) 951 alias = f" AS {alias}" if alias else "" 952 return f"{explode}{alias}" 953 954 def show_sql(self, expression: exp.Show) -> str: 955 terse = "TERSE " if expression.args.get("terse") else "" 956 history = " HISTORY" if expression.args.get("history") else "" 957 like = self.sql(expression, "like") 958 like = f" LIKE {like}" if like else "" 959 960 scope = self.sql(expression, "scope") 961 scope = f" {scope}" if scope else "" 962 963 scope_kind = self.sql(expression, "scope_kind") 964 if scope_kind: 965 scope_kind = f" IN {scope_kind}" 966 967 starts_with = self.sql(expression, "starts_with") 968 if starts_with: 969 starts_with = f" STARTS WITH {starts_with}" 970 971 limit = self.sql(expression, "limit") 972 973 from_ = self.sql(expression, "from") 974 if from_: 975 from_ = f" FROM {from_}" 976 977 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 978 979 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 980 # Other dialects don't support all of the following parameters, so we need to 981 # generate default values as necessary to ensure the transpilation is correct 982 group = expression.args.get("group") 983 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 984 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 985 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 986 987 return self.func( 988 "REGEXP_SUBSTR", 989 expression.this, 990 expression.expression, 991 position, 992 occurrence, 993 parameters, 994 group, 995 ) 996 997 def except_op(self, expression: exp.Except) -> str: 998 if not expression.args.get("distinct"): 999 self.unsupported("EXCEPT with All is not supported in Snowflake") 1000 return super().except_op(expression) 1001 1002 def intersect_op(self, expression: exp.Intersect) -> str: 1003 if not expression.args.get("distinct"): 1004 self.unsupported("INTERSECT with All is not supported in Snowflake") 1005 return super().intersect_op(expression) 1006 1007 def describe_sql(self, expression: exp.Describe) -> str: 1008 # Default to table if kind is unknown 1009 kind_value = expression.args.get("kind") or "TABLE" 1010 kind = f" {kind_value}" if kind_value else "" 1011 this = f" {self.sql(expression, 'this')}" 1012 expressions = self.expressions(expression, flat=True) 1013 expressions = f" {expressions}" if expressions else "" 1014 return f"DESCRIBE{kind}{this}{expressions}" 1015 1016 def generatedasidentitycolumnconstraint_sql( 1017 self, expression: exp.GeneratedAsIdentityColumnConstraint 1018 ) -> str: 1019 start = expression.args.get("start") 1020 start = f" START {start}" if start else "" 1021 increment = expression.args.get("increment") 1022 increment = f" INCREMENT {increment}" if increment else "" 1023 return f"AUTOINCREMENT{start}{increment}" 1024 1025 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1026 this = self.sql(expression, "this") 1027 return f"SWAP WITH {this}" 1028 1029 def cluster_sql(self, expression: exp.Cluster) -> str: 1030 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1031 1032 def struct_sql(self, expression: exp.Struct) -> str: 1033 keys = [] 1034 values = [] 1035 1036 for i, e in enumerate(expression.expressions): 1037 if isinstance(e, exp.PropertyEQ): 1038 keys.append( 1039 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1040 ) 1041 values.append(e.expression) 1042 else: 1043 keys.append(exp.Literal.string(f"_{i}")) 1044 values.append(e) 1045 1046 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1047 1048 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1049 if expression.args.get("weight") or expression.args.get("accuracy"): 1050 self.unsupported( 1051 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1052 ) 1053 1054 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1055 1056 def alterset_sql(self, expression: exp.AlterSet) -> str: 1057 exprs = self.expressions(expression, flat=True) 1058 exprs = f" {exprs}" if exprs else "" 1059 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1060 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1061 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1062 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1063 tag = self.expressions(expression, key="tag", flat=True) 1064 tag = f" TAG {tag}" if tag else "" 1065 1066 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
265 def quote_identifier(self, expression: E, identify: bool = True) -> E: 266 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 267 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 268 if ( 269 isinstance(expression, exp.Identifier) 270 and isinstance(expression.parent, exp.Table) 271 and expression.name.lower() == "dual" 272 ): 273 return expression # type: ignore 274 275 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
277 class Parser(parser.Parser): 278 IDENTIFY_PIVOT_STRINGS = True 279 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 280 COLON_IS_VARIANT_EXTRACT = True 281 282 ID_VAR_TOKENS = { 283 *parser.Parser.ID_VAR_TOKENS, 284 TokenType.MATCH_CONDITION, 285 } 286 287 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 288 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 289 290 FUNCTIONS = { 291 **parser.Parser.FUNCTIONS, 292 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 293 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 294 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 295 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 296 this=seq_get(args, 1), expression=seq_get(args, 0) 297 ), 298 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 299 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 300 start=seq_get(args, 0), 301 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 302 step=seq_get(args, 2), 303 ), 304 "BITXOR": binary_from_function(exp.BitwiseXor), 305 "BIT_XOR": binary_from_function(exp.BitwiseXor), 306 "BOOLXOR": binary_from_function(exp.Xor), 307 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 308 "DATE_TRUNC": _date_trunc_to_time, 309 "DATEADD": _build_date_time_add(exp.DateAdd), 310 "DATEDIFF": _build_datediff, 311 "DIV0": _build_if_from_div0, 312 "FLATTEN": exp.Explode.from_arg_list, 313 "GET_PATH": lambda args, dialect: exp.JSONExtract( 314 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 315 ), 316 "IFF": exp.If.from_arg_list, 317 "LAST_DAY": lambda args: exp.LastDay( 318 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 319 ), 320 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 321 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 322 "LISTAGG": exp.GroupConcat.from_arg_list, 323 "MEDIAN": lambda args: exp.PercentileCont( 324 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 325 ), 326 "NULLIFZERO": _build_if_from_nullifzero, 327 "OBJECT_CONSTRUCT": _build_object_construct, 328 "REGEXP_REPLACE": _build_regexp_replace, 329 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 330 "RLIKE": exp.RegexpLike.from_arg_list, 331 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 332 "TIMEADD": _build_date_time_add(exp.TimeAdd), 333 "TIMEDIFF": _build_datediff, 334 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 335 "TIMESTAMPDIFF": _build_datediff, 336 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 337 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 338 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 339 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 340 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 341 "TO_NUMBER": lambda args: exp.ToNumber( 342 this=seq_get(args, 0), 343 format=seq_get(args, 1), 344 precision=seq_get(args, 2), 345 scale=seq_get(args, 3), 346 ), 347 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 348 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 349 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 350 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 351 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 352 "TO_VARCHAR": exp.ToChar.from_arg_list, 353 "ZEROIFNULL": _build_if_from_zeroifnull, 354 } 355 356 FUNCTION_PARSERS = { 357 **parser.Parser.FUNCTION_PARSERS, 358 "DATE_PART": lambda self: self._parse_date_part(), 359 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 360 } 361 FUNCTION_PARSERS.pop("TRIM") 362 363 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 364 365 RANGE_PARSERS = { 366 **parser.Parser.RANGE_PARSERS, 367 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 368 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 369 } 370 371 ALTER_PARSERS = { 372 **parser.Parser.ALTER_PARSERS, 373 "UNSET": lambda self: self.expression( 374 exp.Set, 375 tag=self._match_text_seq("TAG"), 376 expressions=self._parse_csv(self._parse_id_var), 377 unset=True, 378 ), 379 "SWAP": lambda self: self._parse_alter_table_swap(), 380 } 381 382 STATEMENT_PARSERS = { 383 **parser.Parser.STATEMENT_PARSERS, 384 TokenType.SHOW: lambda self: self._parse_show(), 385 } 386 387 PROPERTY_PARSERS = { 388 **parser.Parser.PROPERTY_PARSERS, 389 "LOCATION": lambda self: self._parse_location_property(), 390 } 391 392 TYPE_CONVERTERS = { 393 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 394 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 395 } 396 397 SHOW_PARSERS = { 398 "SCHEMAS": _show_parser("SCHEMAS"), 399 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 400 "OBJECTS": _show_parser("OBJECTS"), 401 "TERSE OBJECTS": _show_parser("OBJECTS"), 402 "TABLES": _show_parser("TABLES"), 403 "TERSE TABLES": _show_parser("TABLES"), 404 "VIEWS": _show_parser("VIEWS"), 405 "TERSE VIEWS": _show_parser("VIEWS"), 406 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 407 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 408 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 409 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 410 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 411 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 412 "SEQUENCES": _show_parser("SEQUENCES"), 413 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 414 "COLUMNS": _show_parser("COLUMNS"), 415 "USERS": _show_parser("USERS"), 416 "TERSE USERS": _show_parser("USERS"), 417 } 418 419 CONSTRAINT_PARSERS = { 420 **parser.Parser.CONSTRAINT_PARSERS, 421 "WITH": lambda self: self._parse_with_constraint(), 422 "MASKING": lambda self: self._parse_with_constraint(), 423 "PROJECTION": lambda self: self._parse_with_constraint(), 424 "TAG": lambda self: self._parse_with_constraint(), 425 } 426 427 STAGED_FILE_SINGLE_TOKENS = { 428 TokenType.DOT, 429 TokenType.MOD, 430 TokenType.SLASH, 431 } 432 433 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 434 435 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 436 437 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 438 439 LAMBDAS = { 440 **parser.Parser.LAMBDAS, 441 TokenType.ARROW: lambda self, expressions: self.expression( 442 exp.Lambda, 443 this=self._replace_lambda( 444 self._parse_assignment(), 445 expressions, 446 ), 447 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 448 ), 449 } 450 451 def _negate_range( 452 self, this: t.Optional[exp.Expression] = None 453 ) -> t.Optional[exp.Expression]: 454 if not this: 455 return this 456 457 query = this.args.get("query") 458 if isinstance(this, exp.In) and isinstance(query, exp.Query): 459 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 460 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 461 # which can produce different results (most likely a SnowFlake bug). 462 # 463 # https://docs.snowflake.com/en/sql-reference/functions/in 464 # Context: https://github.com/tobymao/sqlglot/issues/3890 465 return self.expression( 466 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 467 ) 468 469 return self.expression(exp.Not, this=this) 470 471 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 472 if self._prev.token_type != TokenType.WITH: 473 self._retreat(self._index - 1) 474 475 if self._match_text_seq("MASKING", "POLICY"): 476 policy = self._parse_column() 477 return self.expression( 478 exp.MaskingPolicyColumnConstraint, 479 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 480 expressions=self._match(TokenType.USING) 481 and self._parse_wrapped_csv(self._parse_id_var), 482 ) 483 if self._match_text_seq("PROJECTION", "POLICY"): 484 policy = self._parse_column() 485 return self.expression( 486 exp.ProjectionPolicyColumnConstraint, 487 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 488 ) 489 if self._match(TokenType.TAG): 490 return self.expression( 491 exp.TagColumnConstraint, 492 expressions=self._parse_wrapped_csv(self._parse_property), 493 ) 494 495 return None 496 497 def _parse_create(self) -> exp.Create | exp.Command: 498 expression = super()._parse_create() 499 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 500 # Replace the Table node with the enclosed Identifier 501 expression.this.replace(expression.this.this) 502 503 return expression 504 505 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 506 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 507 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 508 this = self._parse_var() or self._parse_type() 509 510 if not this: 511 return None 512 513 self._match(TokenType.COMMA) 514 expression = self._parse_bitwise() 515 this = map_date_part(this) 516 name = this.name.upper() 517 518 if name.startswith("EPOCH"): 519 if name == "EPOCH_MILLISECOND": 520 scale = 10**3 521 elif name == "EPOCH_MICROSECOND": 522 scale = 10**6 523 elif name == "EPOCH_NANOSECOND": 524 scale = 10**9 525 else: 526 scale = None 527 528 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 529 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 530 531 if scale: 532 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 533 534 return to_unix 535 536 return self.expression(exp.Extract, this=this, expression=expression) 537 538 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 539 if is_map: 540 # Keys are strings in Snowflake's objects, see also: 541 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 542 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 543 return self._parse_slice(self._parse_string()) 544 545 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 546 547 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 548 lateral = super()._parse_lateral() 549 if not lateral: 550 return lateral 551 552 if isinstance(lateral.this, exp.Explode): 553 table_alias = lateral.args.get("alias") 554 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 555 if table_alias and not table_alias.args.get("columns"): 556 table_alias.set("columns", columns) 557 elif not table_alias: 558 exp.alias_(lateral, "_flattened", table=columns, copy=False) 559 560 return lateral 561 562 def _parse_table_parts( 563 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 564 ) -> exp.Table: 565 # https://docs.snowflake.com/en/user-guide/querying-stage 566 if self._match(TokenType.STRING, advance=False): 567 table = self._parse_string() 568 elif self._match_text_seq("@", advance=False): 569 table = self._parse_location_path() 570 else: 571 table = None 572 573 if table: 574 file_format = None 575 pattern = None 576 577 wrapped = self._match(TokenType.L_PAREN) 578 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 579 if self._match_text_seq("FILE_FORMAT", "=>"): 580 file_format = self._parse_string() or super()._parse_table_parts( 581 is_db_reference=is_db_reference 582 ) 583 elif self._match_text_seq("PATTERN", "=>"): 584 pattern = self._parse_string() 585 else: 586 break 587 588 self._match(TokenType.COMMA) 589 590 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 591 else: 592 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 593 594 return table 595 596 def _parse_id_var( 597 self, 598 any_token: bool = True, 599 tokens: t.Optional[t.Collection[TokenType]] = None, 600 ) -> t.Optional[exp.Expression]: 601 if self._match_text_seq("IDENTIFIER", "("): 602 identifier = ( 603 super()._parse_id_var(any_token=any_token, tokens=tokens) 604 or self._parse_string() 605 ) 606 self._match_r_paren() 607 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 608 609 return super()._parse_id_var(any_token=any_token, tokens=tokens) 610 611 def _parse_show_snowflake(self, this: str) -> exp.Show: 612 scope = None 613 scope_kind = None 614 615 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 616 # which is syntactically valid but has no effect on the output 617 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 618 619 history = self._match_text_seq("HISTORY") 620 621 like = self._parse_string() if self._match(TokenType.LIKE) else None 622 623 if self._match(TokenType.IN): 624 if self._match_text_seq("ACCOUNT"): 625 scope_kind = "ACCOUNT" 626 elif self._match_set(self.DB_CREATABLES): 627 scope_kind = self._prev.text.upper() 628 if self._curr: 629 scope = self._parse_table_parts() 630 elif self._curr: 631 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 632 scope = self._parse_table_parts() 633 634 return self.expression( 635 exp.Show, 636 **{ 637 "terse": terse, 638 "this": this, 639 "history": history, 640 "like": like, 641 "scope": scope, 642 "scope_kind": scope_kind, 643 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 644 "limit": self._parse_limit(), 645 "from": self._parse_string() if self._match(TokenType.FROM) else None, 646 }, 647 ) 648 649 def _parse_alter_table_swap(self) -> exp.SwapTable: 650 self._match_text_seq("WITH") 651 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 652 653 def _parse_location_property(self) -> exp.LocationProperty: 654 self._match(TokenType.EQ) 655 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 656 657 def _parse_file_location(self) -> t.Optional[exp.Expression]: 658 # Parse either a subquery or a staged file 659 return ( 660 self._parse_select(table=True, parse_subquery_alias=False) 661 if self._match(TokenType.L_PAREN, advance=False) 662 else self._parse_table_parts() 663 ) 664 665 def _parse_location_path(self) -> exp.Var: 666 parts = [self._advance_any(ignore_reserved=True)] 667 668 # We avoid consuming a comma token because external tables like @foo and @bar 669 # can be joined in a query with a comma separator, as well as closing paren 670 # in case of subqueries 671 while self._is_connected() and not self._match_set( 672 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 673 ): 674 parts.append(self._advance_any(ignore_reserved=True)) 675 676 return exp.var("".join(part.text for part in parts if part)) 677 678 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 679 this = super()._parse_lambda_arg() 680 681 if not this: 682 return this 683 684 typ = self._parse_types() 685 686 if typ: 687 return self.expression(exp.Cast, this=this, to=typ) 688 689 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
691 class Tokenizer(tokens.Tokenizer): 692 STRING_ESCAPES = ["\\", "'"] 693 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 694 RAW_STRINGS = ["$$"] 695 COMMENTS = ["--", "//", ("/*", "*/")] 696 697 KEYWORDS = { 698 **tokens.Tokenizer.KEYWORDS, 699 "BYTEINT": TokenType.INT, 700 "CHAR VARYING": TokenType.VARCHAR, 701 "CHARACTER VARYING": TokenType.VARCHAR, 702 "EXCLUDE": TokenType.EXCEPT, 703 "ILIKE ANY": TokenType.ILIKE_ANY, 704 "LIKE ANY": TokenType.LIKE_ANY, 705 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 706 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 707 "MINUS": TokenType.EXCEPT, 708 "NCHAR VARYING": TokenType.VARCHAR, 709 "PUT": TokenType.COMMAND, 710 "REMOVE": TokenType.COMMAND, 711 "RM": TokenType.COMMAND, 712 "SAMPLE": TokenType.TABLE_SAMPLE, 713 "SQL_DOUBLE": TokenType.DOUBLE, 714 "SQL_VARCHAR": TokenType.VARCHAR, 715 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 716 "TAG": TokenType.TAG, 717 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 718 "TOP": TokenType.TOP, 719 "WAREHOUSE": TokenType.WAREHOUSE, 720 "STREAMLIT": TokenType.STREAMLIT, 721 } 722 KEYWORDS.pop("/*+") 723 724 SINGLE_TOKENS = { 725 **tokens.Tokenizer.SINGLE_TOKENS, 726 "$": TokenType.PARAMETER, 727 } 728 729 VAR_SINGLE_TOKENS = {"$"} 730 731 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
733 class Generator(generator.Generator): 734 PARAMETER_TOKEN = "$" 735 MATCHED_BY_SOURCE = False 736 SINGLE_STRING_INTERVAL = True 737 JOIN_HINTS = False 738 TABLE_HINTS = False 739 QUERY_HINTS = False 740 AGGREGATE_FILTER_SUPPORTED = False 741 SUPPORTS_TABLE_COPY = False 742 COLLATE_IS_FUNC = True 743 LIMIT_ONLY_LITERALS = True 744 JSON_KEY_VALUE_PAIR_SEP = "," 745 INSERT_OVERWRITE = " OVERWRITE INTO" 746 STRUCT_DELIMITER = ("(", ")") 747 COPY_PARAMS_ARE_WRAPPED = False 748 COPY_PARAMS_EQ_REQUIRED = True 749 STAR_EXCEPT = "EXCLUDE" 750 SUPPORTS_EXPLODING_PROJECTIONS = False 751 ARRAY_CONCAT_IS_VAR_LEN = False 752 SUPPORTS_CONVERT_TIMEZONE = True 753 754 TRANSFORMS = { 755 **generator.Generator.TRANSFORMS, 756 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 757 exp.ArgMax: rename_func("MAX_BY"), 758 exp.ArgMin: rename_func("MIN_BY"), 759 exp.Array: inline_array_sql, 760 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 761 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 762 exp.AtTimeZone: lambda self, e: self.func( 763 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 764 ), 765 exp.BitwiseXor: rename_func("BITXOR"), 766 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 767 exp.DateAdd: date_delta_sql("DATEADD"), 768 exp.DateDiff: date_delta_sql("DATEDIFF"), 769 exp.DateStrToDate: datestrtodate_sql, 770 exp.DayOfMonth: rename_func("DAYOFMONTH"), 771 exp.DayOfWeek: rename_func("DAYOFWEEK"), 772 exp.DayOfYear: rename_func("DAYOFYEAR"), 773 exp.Explode: rename_func("FLATTEN"), 774 exp.Extract: rename_func("DATE_PART"), 775 exp.FromTimeZone: lambda self, e: self.func( 776 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 777 ), 778 exp.GenerateSeries: lambda self, e: self.func( 779 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 780 ), 781 exp.GroupConcat: rename_func("LISTAGG"), 782 exp.If: if_sql(name="IFF", false_value="NULL"), 783 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 784 exp.JSONExtractScalar: lambda self, e: self.func( 785 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 786 ), 787 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 788 exp.JSONPathRoot: lambda *_: "", 789 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 790 exp.LogicalOr: rename_func("BOOLOR_AGG"), 791 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 792 exp.Max: max_or_greatest, 793 exp.Min: min_or_least, 794 exp.ParseJSON: lambda self, e: self.func( 795 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 796 ), 797 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 798 exp.PercentileCont: transforms.preprocess( 799 [transforms.add_within_group_for_percentiles] 800 ), 801 exp.PercentileDisc: transforms.preprocess( 802 [transforms.add_within_group_for_percentiles] 803 ), 804 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 805 exp.RegexpILike: _regexpilike_sql, 806 exp.Rand: rename_func("RANDOM"), 807 exp.Select: transforms.preprocess( 808 [ 809 transforms.eliminate_distinct_on, 810 transforms.explode_to_unnest(), 811 transforms.eliminate_semi_and_anti_joins, 812 _unnest_generate_date_array, 813 ] 814 ), 815 exp.SHA: rename_func("SHA1"), 816 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 817 exp.StartsWith: rename_func("STARTSWITH"), 818 exp.StrPosition: lambda self, e: self.func( 819 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 820 ), 821 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 822 exp.Stuff: rename_func("INSERT"), 823 exp.TimeAdd: date_delta_sql("TIMEADD"), 824 exp.TimestampDiff: lambda self, e: self.func( 825 "TIMESTAMPDIFF", e.unit, e.expression, e.this 826 ), 827 exp.TimestampTrunc: timestamptrunc_sql(), 828 exp.TimeStrToTime: timestrtotime_sql, 829 exp.TimeToStr: lambda self, e: self.func( 830 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 831 ), 832 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 833 exp.ToArray: rename_func("TO_ARRAY"), 834 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 835 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 836 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 837 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 838 exp.TsOrDsToDate: lambda self, e: self.func( 839 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 840 ), 841 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 842 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 843 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 844 exp.Xor: rename_func("BOOLXOR"), 845 } 846 847 SUPPORTED_JSON_PATH_PARTS = { 848 exp.JSONPathKey, 849 exp.JSONPathRoot, 850 exp.JSONPathSubscript, 851 } 852 853 TYPE_MAPPING = { 854 **generator.Generator.TYPE_MAPPING, 855 exp.DataType.Type.NESTED: "OBJECT", 856 exp.DataType.Type.STRUCT: "OBJECT", 857 } 858 859 PROPERTIES_LOCATION = { 860 **generator.Generator.PROPERTIES_LOCATION, 861 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 862 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 863 } 864 865 UNSUPPORTED_VALUES_EXPRESSIONS = { 866 exp.Map, 867 exp.StarMap, 868 exp.Struct, 869 exp.VarMap, 870 } 871 872 def with_properties(self, properties: exp.Properties) -> str: 873 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 874 875 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 876 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 877 values_as_table = False 878 879 return super().values_sql(expression, values_as_table=values_as_table) 880 881 def datatype_sql(self, expression: exp.DataType) -> str: 882 expressions = expression.expressions 883 if ( 884 expressions 885 and expression.is_type(*exp.DataType.STRUCT_TYPES) 886 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 887 ): 888 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 889 return "OBJECT" 890 891 return super().datatype_sql(expression) 892 893 def tonumber_sql(self, expression: exp.ToNumber) -> str: 894 return self.func( 895 "TO_NUMBER", 896 expression.this, 897 expression.args.get("format"), 898 expression.args.get("precision"), 899 expression.args.get("scale"), 900 ) 901 902 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 903 milli = expression.args.get("milli") 904 if milli is not None: 905 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 906 expression.set("nano", milli_to_nano) 907 908 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 909 910 def trycast_sql(self, expression: exp.TryCast) -> str: 911 value = expression.this 912 913 if value.type is None: 914 from sqlglot.optimizer.annotate_types import annotate_types 915 916 value = annotate_types(value) 917 918 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 919 return super().trycast_sql(expression) 920 921 # TRY_CAST only works for string values in Snowflake 922 return self.cast_sql(expression) 923 924 def log_sql(self, expression: exp.Log) -> str: 925 if not expression.expression: 926 return self.func("LN", expression.this) 927 928 return super().log_sql(expression) 929 930 def unnest_sql(self, expression: exp.Unnest) -> str: 931 unnest_alias = expression.args.get("alias") 932 offset = expression.args.get("offset") 933 934 columns = [ 935 exp.to_identifier("seq"), 936 exp.to_identifier("key"), 937 exp.to_identifier("path"), 938 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 939 seq_get(unnest_alias.columns if unnest_alias else [], 0) 940 or exp.to_identifier("value"), 941 exp.to_identifier("this"), 942 ] 943 944 if unnest_alias: 945 unnest_alias.set("columns", columns) 946 else: 947 unnest_alias = exp.TableAlias(this="_u", columns=columns) 948 949 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 950 alias = self.sql(unnest_alias) 951 alias = f" AS {alias}" if alias else "" 952 return f"{explode}{alias}" 953 954 def show_sql(self, expression: exp.Show) -> str: 955 terse = "TERSE " if expression.args.get("terse") else "" 956 history = " HISTORY" if expression.args.get("history") else "" 957 like = self.sql(expression, "like") 958 like = f" LIKE {like}" if like else "" 959 960 scope = self.sql(expression, "scope") 961 scope = f" {scope}" if scope else "" 962 963 scope_kind = self.sql(expression, "scope_kind") 964 if scope_kind: 965 scope_kind = f" IN {scope_kind}" 966 967 starts_with = self.sql(expression, "starts_with") 968 if starts_with: 969 starts_with = f" STARTS WITH {starts_with}" 970 971 limit = self.sql(expression, "limit") 972 973 from_ = self.sql(expression, "from") 974 if from_: 975 from_ = f" FROM {from_}" 976 977 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 978 979 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 980 # Other dialects don't support all of the following parameters, so we need to 981 # generate default values as necessary to ensure the transpilation is correct 982 group = expression.args.get("group") 983 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 984 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 985 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 986 987 return self.func( 988 "REGEXP_SUBSTR", 989 expression.this, 990 expression.expression, 991 position, 992 occurrence, 993 parameters, 994 group, 995 ) 996 997 def except_op(self, expression: exp.Except) -> str: 998 if not expression.args.get("distinct"): 999 self.unsupported("EXCEPT with All is not supported in Snowflake") 1000 return super().except_op(expression) 1001 1002 def intersect_op(self, expression: exp.Intersect) -> str: 1003 if not expression.args.get("distinct"): 1004 self.unsupported("INTERSECT with All is not supported in Snowflake") 1005 return super().intersect_op(expression) 1006 1007 def describe_sql(self, expression: exp.Describe) -> str: 1008 # Default to table if kind is unknown 1009 kind_value = expression.args.get("kind") or "TABLE" 1010 kind = f" {kind_value}" if kind_value else "" 1011 this = f" {self.sql(expression, 'this')}" 1012 expressions = self.expressions(expression, flat=True) 1013 expressions = f" {expressions}" if expressions else "" 1014 return f"DESCRIBE{kind}{this}{expressions}" 1015 1016 def generatedasidentitycolumnconstraint_sql( 1017 self, expression: exp.GeneratedAsIdentityColumnConstraint 1018 ) -> str: 1019 start = expression.args.get("start") 1020 start = f" START {start}" if start else "" 1021 increment = expression.args.get("increment") 1022 increment = f" INCREMENT {increment}" if increment else "" 1023 return f"AUTOINCREMENT{start}{increment}" 1024 1025 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1026 this = self.sql(expression, "this") 1027 return f"SWAP WITH {this}" 1028 1029 def cluster_sql(self, expression: exp.Cluster) -> str: 1030 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1031 1032 def struct_sql(self, expression: exp.Struct) -> str: 1033 keys = [] 1034 values = [] 1035 1036 for i, e in enumerate(expression.expressions): 1037 if isinstance(e, exp.PropertyEQ): 1038 keys.append( 1039 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1040 ) 1041 values.append(e.expression) 1042 else: 1043 keys.append(exp.Literal.string(f"_{i}")) 1044 values.append(e) 1045 1046 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1047 1048 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1049 if expression.args.get("weight") or expression.args.get("accuracy"): 1050 self.unsupported( 1051 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1052 ) 1053 1054 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1055 1056 def alterset_sql(self, expression: exp.AlterSet) -> str: 1057 exprs = self.expressions(expression, flat=True) 1058 exprs = f" {exprs}" if exprs else "" 1059 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1060 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1061 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1062 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1063 tag = self.expressions(expression, key="tag", flat=True) 1064 tag = f" TAG {tag}" if tag else "" 1065 1066 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
881 def datatype_sql(self, expression: exp.DataType) -> str: 882 expressions = expression.expressions 883 if ( 884 expressions 885 and expression.is_type(*exp.DataType.STRUCT_TYPES) 886 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 887 ): 888 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 889 return "OBJECT" 890 891 return super().datatype_sql(expression)
902 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 903 milli = expression.args.get("milli") 904 if milli is not None: 905 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 906 expression.set("nano", milli_to_nano) 907 908 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
910 def trycast_sql(self, expression: exp.TryCast) -> str: 911 value = expression.this 912 913 if value.type is None: 914 from sqlglot.optimizer.annotate_types import annotate_types 915 916 value = annotate_types(value) 917 918 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 919 return super().trycast_sql(expression) 920 921 # TRY_CAST only works for string values in Snowflake 922 return self.cast_sql(expression)
930 def unnest_sql(self, expression: exp.Unnest) -> str: 931 unnest_alias = expression.args.get("alias") 932 offset = expression.args.get("offset") 933 934 columns = [ 935 exp.to_identifier("seq"), 936 exp.to_identifier("key"), 937 exp.to_identifier("path"), 938 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 939 seq_get(unnest_alias.columns if unnest_alias else [], 0) 940 or exp.to_identifier("value"), 941 exp.to_identifier("this"), 942 ] 943 944 if unnest_alias: 945 unnest_alias.set("columns", columns) 946 else: 947 unnest_alias = exp.TableAlias(this="_u", columns=columns) 948 949 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 950 alias = self.sql(unnest_alias) 951 alias = f" AS {alias}" if alias else "" 952 return f"{explode}{alias}"
954 def show_sql(self, expression: exp.Show) -> str: 955 terse = "TERSE " if expression.args.get("terse") else "" 956 history = " HISTORY" if expression.args.get("history") else "" 957 like = self.sql(expression, "like") 958 like = f" LIKE {like}" if like else "" 959 960 scope = self.sql(expression, "scope") 961 scope = f" {scope}" if scope else "" 962 963 scope_kind = self.sql(expression, "scope_kind") 964 if scope_kind: 965 scope_kind = f" IN {scope_kind}" 966 967 starts_with = self.sql(expression, "starts_with") 968 if starts_with: 969 starts_with = f" STARTS WITH {starts_with}" 970 971 limit = self.sql(expression, "limit") 972 973 from_ = self.sql(expression, "from") 974 if from_: 975 from_ = f" FROM {from_}" 976 977 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
979 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 980 # Other dialects don't support all of the following parameters, so we need to 981 # generate default values as necessary to ensure the transpilation is correct 982 group = expression.args.get("group") 983 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 984 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 985 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 986 987 return self.func( 988 "REGEXP_SUBSTR", 989 expression.this, 990 expression.expression, 991 position, 992 occurrence, 993 parameters, 994 group, 995 )
1007 def describe_sql(self, expression: exp.Describe) -> str: 1008 # Default to table if kind is unknown 1009 kind_value = expression.args.get("kind") or "TABLE" 1010 kind = f" {kind_value}" if kind_value else "" 1011 this = f" {self.sql(expression, 'this')}" 1012 expressions = self.expressions(expression, flat=True) 1013 expressions = f" {expressions}" if expressions else "" 1014 return f"DESCRIBE{kind}{this}{expressions}"
1016 def generatedasidentitycolumnconstraint_sql( 1017 self, expression: exp.GeneratedAsIdentityColumnConstraint 1018 ) -> str: 1019 start = expression.args.get("start") 1020 start = f" START {start}" if start else "" 1021 increment = expression.args.get("increment") 1022 increment = f" INCREMENT {increment}" if increment else "" 1023 return f"AUTOINCREMENT{start}{increment}"
1032 def struct_sql(self, expression: exp.Struct) -> str: 1033 keys = [] 1034 values = [] 1035 1036 for i, e in enumerate(expression.expressions): 1037 if isinstance(e, exp.PropertyEQ): 1038 keys.append( 1039 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1040 ) 1041 values.append(e.expression) 1042 else: 1043 keys.append(exp.Literal.string(f"_{i}")) 1044 values.append(e) 1045 1046 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1048 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1049 if expression.args.get("weight") or expression.args.get("accuracy"): 1050 self.unsupported( 1051 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1052 ) 1053 1054 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1056 def alterset_sql(self, expression: exp.AlterSet) -> str: 1057 exprs = self.expressions(expression, flat=True) 1058 exprs = f" {exprs}" if exprs else "" 1059 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1060 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1061 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1062 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1063 tag = self.expressions(expression, key="tag", flat=True) 1064 tag = f" TAG {tag}" if tag else "" 1065 1066 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql