sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 annotate_with_type_lambda, 10 build_timetostr_or_tochar, 11 binary_from_function, 12 build_default_decimal_type, 13 build_replace_with_optional_replacement, 14 build_timestamp_from_parts, 15 date_delta_sql, 16 date_trunc_to_time, 17 datestrtodate_sql, 18 build_formatted_time, 19 if_sql, 20 inline_array_sql, 21 max_or_greatest, 22 min_or_least, 23 rename_func, 24 timestamptrunc_sql, 25 timestrtotime_sql, 26 var_map_sql, 27 map_date_part, 28 no_timestamp_sql, 29 strposition_sql, 30 timestampdiff_sql, 31 no_make_interval_sql, 32 groupconcat_sql, 33) 34from sqlglot.generator import unsupported_args 35from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 36from sqlglot.optimizer.annotate_types import TypeAnnotator 37from sqlglot.optimizer.scope import build_scope, find_all_in_scope 38from sqlglot.tokens import TokenType 39 40if t.TYPE_CHECKING: 41 from sqlglot._typing import E, B 42 43 44# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 45def _build_datetime( 46 name: str, kind: exp.DataType.Type, safe: bool = False 47) -> t.Callable[[t.List], exp.Func]: 48 def _builder(args: t.List) -> exp.Func: 49 value = seq_get(args, 0) 50 scale_or_fmt = seq_get(args, 1) 51 52 int_value = value is not None and is_int(value.name) 53 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 54 55 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 56 # Converts calls like `TO_TIME('01:02:03')` into casts 57 if len(args) == 1 and value.is_string and not int_value: 58 return ( 59 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 60 if safe 61 else exp.cast(value, kind) 62 ) 63 64 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 65 # cases so we can transpile them, since they're relatively common 66 if kind == exp.DataType.Type.TIMESTAMP: 67 if not safe and (int_value or int_scale_or_fmt): 68 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 69 # it's not easily transpilable 70 return exp.UnixToTime(this=value, scale=scale_or_fmt) 71 if not int_scale_or_fmt and not is_float(value.name): 72 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 73 expr.set("safe", safe) 74 return expr 75 76 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 77 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 78 formatted_exp = build_formatted_time(klass, "snowflake")(args) 79 formatted_exp.set("safe", safe) 80 return formatted_exp 81 82 return exp.Anonymous(this=name, expressions=args) 83 84 return _builder 85 86 87def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 88 expression = parser.build_var_map(args) 89 90 if isinstance(expression, exp.StarMap): 91 return expression 92 93 return exp.Struct( 94 expressions=[ 95 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 96 ] 97 ) 98 99 100def _build_datediff(args: t.List) -> exp.DateDiff: 101 return exp.DateDiff( 102 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 103 ) 104 105 106def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 107 def _builder(args: t.List) -> E: 108 return expr_type( 109 this=seq_get(args, 2), 110 expression=seq_get(args, 1), 111 unit=map_date_part(seq_get(args, 0)), 112 ) 113 114 return _builder 115 116 117def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 118 def _builder(args: t.List) -> B | exp.Anonymous: 119 if len(args) == 3: 120 return exp.Anonymous(this=name, expressions=args) 121 122 return binary_from_function(expr_type)(args) 123 124 return _builder 125 126 127# https://docs.snowflake.com/en/sql-reference/functions/div0 128def _build_if_from_div0(args: t.List) -> exp.If: 129 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 130 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 131 132 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 133 exp.Is(this=lhs, expression=exp.null()).not_() 134 ) 135 true = exp.Literal.number(0) 136 false = exp.Div(this=lhs, expression=rhs) 137 return exp.If(this=cond, true=true, false=false) 138 139 140# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 141def _build_if_from_zeroifnull(args: t.List) -> exp.If: 142 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 143 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 144 145 146# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 147def _build_if_from_nullifzero(args: t.List) -> exp.If: 148 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 149 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 150 151 152def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 153 flag = expression.text("flag") 154 155 if "i" not in flag: 156 flag += "i" 157 158 return self.func( 159 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 160 ) 161 162 163def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 164 regexp_replace = exp.RegexpReplace.from_arg_list(args) 165 166 if not regexp_replace.args.get("replacement"): 167 regexp_replace.set("replacement", exp.Literal.string("")) 168 169 return regexp_replace 170 171 172def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 173 def _parse(self: Snowflake.Parser) -> exp.Show: 174 return self._parse_show_snowflake(*args, **kwargs) 175 176 return _parse 177 178 179def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 180 trunc = date_trunc_to_time(args) 181 trunc.set("unit", map_date_part(trunc.args["unit"])) 182 return trunc 183 184 185def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 186 """ 187 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 188 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 189 190 Example: 191 >>> from sqlglot import parse_one 192 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 193 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 194 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 195 """ 196 if isinstance(expression, exp.Pivot): 197 if expression.unpivot: 198 expression = transforms.unqualify_columns(expression) 199 else: 200 for field in expression.fields: 201 field_expr = seq_get(field.expressions if field else [], 0) 202 203 if isinstance(field_expr, exp.PivotAny): 204 unqualified_field_expr = transforms.unqualify_columns(field_expr) 205 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 206 207 return expression 208 209 210def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 211 assert isinstance(expression, exp.Create) 212 213 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 214 if expression.this in exp.DataType.NESTED_TYPES: 215 expression.set("expressions", None) 216 return expression 217 218 props = expression.args.get("properties") 219 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 220 for schema_expression in expression.this.expressions: 221 if isinstance(schema_expression, exp.ColumnDef): 222 column_type = schema_expression.kind 223 if isinstance(column_type, exp.DataType): 224 column_type.transform(_flatten_structured_type, copy=False) 225 226 return expression 227 228 229def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 230 generate_date_array = unnest.expressions[0] 231 start = generate_date_array.args.get("start") 232 end = generate_date_array.args.get("end") 233 step = generate_date_array.args.get("step") 234 235 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 236 return 237 238 unit = step.args.get("unit") 239 240 unnest_alias = unnest.args.get("alias") 241 if unnest_alias: 242 unnest_alias = unnest_alias.copy() 243 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 244 else: 245 sequence_value_name = "value" 246 247 # We'll add the next sequence value to the starting date and project the result 248 date_add = _build_date_time_add(exp.DateAdd)( 249 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 250 ) 251 252 # We use DATEDIFF to compute the number of sequence values needed 253 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 254 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 255 ) 256 257 unnest.set("expressions", [number_sequence]) 258 259 unnest_parent = unnest.parent 260 if isinstance(unnest_parent, exp.Join): 261 select = unnest_parent.parent 262 if isinstance(select, exp.Select): 263 replace_column_name = ( 264 sequence_value_name 265 if isinstance(sequence_value_name, str) 266 else sequence_value_name.name 267 ) 268 269 scope = build_scope(select) 270 if scope: 271 for column in scope.columns: 272 if column.name.lower() == replace_column_name.lower(): 273 column.replace( 274 date_add.as_(replace_column_name) 275 if isinstance(column.parent, exp.Select) 276 else date_add 277 ) 278 279 lateral = exp.Lateral(this=unnest_parent.this.pop()) 280 unnest_parent.replace(exp.Join(this=lateral)) 281 else: 282 unnest.replace( 283 exp.select(date_add.as_(sequence_value_name)) 284 .from_(unnest.copy()) 285 .subquery(unnest_alias) 286 ) 287 288 289def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 290 if isinstance(expression, exp.Select): 291 for generate_date_array in expression.find_all(exp.GenerateDateArray): 292 parent = generate_date_array.parent 293 294 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 295 # query is the following (it'll be unnested properly on the next iteration due to copy): 296 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 297 if not isinstance(parent, exp.Unnest): 298 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 299 generate_date_array.replace( 300 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 301 ) 302 303 if ( 304 isinstance(parent, exp.Unnest) 305 and isinstance(parent.parent, (exp.From, exp.Join)) 306 and len(parent.expressions) == 1 307 ): 308 _unnest_generate_date_array(parent) 309 310 return expression 311 312 313def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 314 def _builder(args: t.List) -> E: 315 return expr_type( 316 this=seq_get(args, 0), 317 expression=seq_get(args, 1), 318 position=seq_get(args, 2), 319 occurrence=seq_get(args, 3), 320 parameters=seq_get(args, 4), 321 group=seq_get(args, 5) or exp.Literal.number(0), 322 ) 323 324 return _builder 325 326 327def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 328 # Other dialects don't support all of the following parameters, so we need to 329 # generate default values as necessary to ensure the transpilation is correct 330 group = expression.args.get("group") 331 332 # To avoid generating all these default values, we set group to None if 333 # it's 0 (also default value) which doesn't trigger the following chain 334 if group and group.name == "0": 335 group = None 336 337 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 338 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 339 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 340 341 return self.func( 342 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 343 expression.this, 344 expression.expression, 345 position, 346 occurrence, 347 parameters, 348 group, 349 ) 350 351 352def _json_extract_value_array_sql( 353 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 354) -> str: 355 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 356 ident = exp.to_identifier("x") 357 358 if isinstance(expression, exp.JSONValueArray): 359 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 360 else: 361 this = exp.ParseJSON(this=f"TO_JSON({ident})") 362 363 transform_lambda = exp.Lambda(expressions=[ident], this=this) 364 365 return self.func("TRANSFORM", json_extract, transform_lambda) 366 367 368def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 369 if isinstance(expression, exp.Select): 370 scope = build_scope(expression) 371 if not scope: 372 return expression 373 374 unnests = list(scope.find_all(exp.Unnest)) 375 376 if not unnests: 377 return expression 378 379 taken_source_names = set(scope.sources) 380 column_source: t.Dict[str, exp.Identifier] = {} 381 unnest_to_identifier: t.Dict[exp.Unnest, exp.Identifier] = {} 382 383 unnest_identifier: t.Optional[exp.Identifier] = None 384 orig_expression = expression.copy() 385 386 for unnest in unnests: 387 if not isinstance(unnest.parent, (exp.From, exp.Join)): 388 continue 389 390 # Try to infer column names produced by an unnest operator. This is only possible 391 # when we can peek into the (statically known) contents of the unnested value. 392 unnest_columns: t.Set[str] = set() 393 for unnest_expr in unnest.expressions: 394 if not isinstance(unnest_expr, exp.Array): 395 continue 396 397 for array_expr in unnest_expr.expressions: 398 if not ( 399 isinstance(array_expr, exp.Struct) 400 and array_expr.expressions 401 and all( 402 isinstance(struct_expr, exp.PropertyEQ) 403 for struct_expr in array_expr.expressions 404 ) 405 ): 406 continue 407 408 unnest_columns.update( 409 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 410 ) 411 break 412 413 if unnest_columns: 414 break 415 416 unnest_alias = unnest.args.get("alias") 417 if not unnest_alias: 418 alias_name = find_new_name(taken_source_names, "value") 419 taken_source_names.add(alias_name) 420 421 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 422 # will be corrected later, when we generate SQL for the `Unnest` AST node. 423 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 424 scope.replace(unnest, aliased_unnest) 425 426 unnest_identifier = aliased_unnest.args["alias"].columns[0] 427 else: 428 alias_columns = getattr(unnest_alias, "columns", []) 429 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 430 431 if not isinstance(unnest_identifier, exp.Identifier): 432 return orig_expression 433 434 unnest_to_identifier[unnest] = unnest_identifier 435 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 436 437 for column in scope.columns: 438 if column.table: 439 continue 440 441 table = column_source.get(column.name.lower()) 442 if ( 443 unnest_identifier 444 and not table 445 and len(scope.sources) == 1 446 and column.name.lower() != unnest_identifier.name.lower() 447 ): 448 unnest_ancestor = column.find_ancestor(exp.Unnest, exp.Select) 449 ancestor_identifier = unnest_to_identifier.get(unnest_ancestor) 450 if ( 451 isinstance(unnest_ancestor, exp.Unnest) 452 and ancestor_identifier 453 and ancestor_identifier.name.lower() == unnest_identifier.name.lower() 454 ): 455 continue 456 457 table = unnest_identifier 458 459 column.set("table", table and table.copy()) 460 461 return expression 462 463 464def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 465 if isinstance(expression, exp.Select): 466 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 467 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 468 # by Snowflake's parser. 469 # 470 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 471 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 472 unnest_aliases = set() 473 for unnest in find_all_in_scope(expression, exp.Unnest): 474 unnest_alias = unnest.args.get("alias") 475 if ( 476 isinstance(unnest_alias, exp.TableAlias) 477 and not unnest_alias.this 478 and len(unnest_alias.columns) == 1 479 ): 480 unnest_aliases.add(unnest_alias.columns[0].name) 481 482 if unnest_aliases: 483 for c in find_all_in_scope(expression, exp.Column): 484 if c.table in unnest_aliases: 485 bracket_lhs = c.args["table"] 486 bracket_rhs = exp.Literal.string(c.name) 487 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 488 489 if c.parent is expression: 490 # Retain column projection names by using aliases 491 c.replace(exp.alias_(bracket, c.this.copy())) 492 else: 493 c.replace(bracket) 494 495 return expression 496 497 498def _annotate_reverse(self: TypeAnnotator, expression: exp.Reverse) -> exp.Reverse: 499 expression = self._annotate_by_args(expression, "this") 500 if expression.is_type(exp.DataType.Type.NULL): 501 # Snowflake treats REVERSE(NULL) as a VARCHAR 502 self._set_type(expression, exp.DataType.Type.VARCHAR) 503 504 return expression 505 506 507class Snowflake(Dialect): 508 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 509 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 510 NULL_ORDERING = "nulls_are_large" 511 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 512 SUPPORTS_USER_DEFINED_TYPES = False 513 SUPPORTS_SEMI_ANTI_JOIN = False 514 PREFER_CTE_ALIAS_COLUMN = True 515 TABLESAMPLE_SIZE_IS_PERCENT = True 516 COPY_PARAMS_ARE_CSV = False 517 ARRAY_AGG_INCLUDES_NULLS = None 518 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 519 TRY_CAST_REQUIRES_STRING = True 520 521 TYPE_TO_EXPRESSIONS = { 522 **Dialect.TYPE_TO_EXPRESSIONS, 523 exp.DataType.Type.INT: { 524 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 525 exp.Length, 526 }, 527 exp.DataType.Type.VARCHAR: { 528 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 529 exp.MD5, 530 exp.AIAgg, 531 exp.AISummarizeAgg, 532 exp.RegexpExtract, 533 exp.RegexpReplace, 534 exp.Repeat, 535 exp.Replace, 536 exp.SHA, 537 exp.SHA2, 538 exp.Space, 539 exp.Uuid, 540 }, 541 exp.DataType.Type.BINARY: { 542 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 543 exp.MD5Digest, 544 exp.SHA1Digest, 545 exp.SHA2Digest, 546 }, 547 exp.DataType.Type.BIGINT: { 548 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 549 exp.MD5NumberLower64, 550 exp.MD5NumberUpper64, 551 }, 552 exp.DataType.Type.ARRAY: { 553 exp.Split, 554 }, 555 } 556 557 ANNOTATORS = { 558 **Dialect.ANNOTATORS, 559 **{ 560 expr_type: annotate_with_type_lambda(data_type) 561 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 562 for expr_type in expressions 563 }, 564 **{ 565 expr_type: lambda self, e: self._annotate_by_args(e, "this") 566 for expr_type in ( 567 exp.Left, 568 exp.Right, 569 exp.Substring, 570 ) 571 }, 572 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 573 exp.Reverse: _annotate_reverse, 574 } 575 576 TIME_MAPPING = { 577 "YYYY": "%Y", 578 "yyyy": "%Y", 579 "YY": "%y", 580 "yy": "%y", 581 "MMMM": "%B", 582 "mmmm": "%B", 583 "MON": "%b", 584 "mon": "%b", 585 "MM": "%m", 586 "mm": "%m", 587 "DD": "%d", 588 "dd": "%-d", 589 "DY": "%a", 590 "dy": "%w", 591 "HH24": "%H", 592 "hh24": "%H", 593 "HH12": "%I", 594 "hh12": "%I", 595 "MI": "%M", 596 "mi": "%M", 597 "SS": "%S", 598 "ss": "%S", 599 "FF6": "%f", 600 "ff6": "%f", 601 } 602 603 DATE_PART_MAPPING = { 604 **Dialect.DATE_PART_MAPPING, 605 "ISOWEEK": "WEEKISO", 606 } 607 608 def quote_identifier(self, expression: E, identify: bool = True) -> E: 609 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 610 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 611 if ( 612 isinstance(expression, exp.Identifier) 613 and isinstance(expression.parent, exp.Table) 614 and expression.name.lower() == "dual" 615 ): 616 return expression # type: ignore 617 618 return super().quote_identifier(expression, identify=identify) 619 620 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 621 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 622 SINGLE_TOKENS.pop("$") 623 624 class Parser(parser.Parser): 625 IDENTIFY_PIVOT_STRINGS = True 626 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 627 COLON_IS_VARIANT_EXTRACT = True 628 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 629 630 ID_VAR_TOKENS = { 631 *parser.Parser.ID_VAR_TOKENS, 632 TokenType.EXCEPT, 633 TokenType.MATCH_CONDITION, 634 } 635 636 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 637 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 638 639 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 640 641 FUNCTIONS = { 642 **parser.Parser.FUNCTIONS, 643 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 644 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 645 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 646 this=seq_get(args, 1), expression=seq_get(args, 0) 647 ), 648 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 649 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 650 start=seq_get(args, 0), 651 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 652 step=seq_get(args, 2), 653 ), 654 "ARRAY_SORT": exp.SortArray.from_arg_list, 655 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 656 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 657 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 658 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 659 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 660 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 661 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 662 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 663 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 664 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 665 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 666 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 667 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 668 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 669 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 670 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 671 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 672 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 673 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 674 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 675 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 676 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 677 "DATE_TRUNC": _date_trunc_to_time, 678 "DATEADD": _build_date_time_add(exp.DateAdd), 679 "DATEDIFF": _build_datediff, 680 "DIV0": _build_if_from_div0, 681 "EDITDISTANCE": lambda args: exp.Levenshtein( 682 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 683 ), 684 "FLATTEN": exp.Explode.from_arg_list, 685 "GET": exp.GetExtract.from_arg_list, 686 "GET_PATH": lambda args, dialect: exp.JSONExtract( 687 this=seq_get(args, 0), 688 expression=dialect.to_json_path(seq_get(args, 1)), 689 requires_json=True, 690 ), 691 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 692 "IFF": exp.If.from_arg_list, 693 "MD5_HEX": exp.MD5.from_arg_list, 694 "MD5_BINARY": exp.MD5Digest.from_arg_list, 695 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 696 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 697 "LAST_DAY": lambda args: exp.LastDay( 698 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 699 ), 700 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 701 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 702 "NULLIFZERO": _build_if_from_nullifzero, 703 "OBJECT_CONSTRUCT": _build_object_construct, 704 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 705 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 706 "REGEXP_REPLACE": _build_regexp_replace, 707 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 708 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 709 "REPLACE": build_replace_with_optional_replacement, 710 "RLIKE": exp.RegexpLike.from_arg_list, 711 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 712 "SHA1_HEX": exp.SHA.from_arg_list, 713 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 714 "SHA2_HEX": exp.SHA2.from_arg_list, 715 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 716 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 717 "TIMEADD": _build_date_time_add(exp.TimeAdd), 718 "TIMEDIFF": _build_datediff, 719 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 720 "TIMESTAMPDIFF": _build_datediff, 721 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 722 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 723 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 724 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 725 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 726 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 727 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 728 "TRY_TO_TIMESTAMP": _build_datetime( 729 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 730 ), 731 "TO_CHAR": build_timetostr_or_tochar, 732 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 733 "TO_NUMBER": lambda args: exp.ToNumber( 734 this=seq_get(args, 0), 735 format=seq_get(args, 1), 736 precision=seq_get(args, 2), 737 scale=seq_get(args, 3), 738 ), 739 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 740 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 741 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 742 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 743 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 744 "TO_VARCHAR": build_timetostr_or_tochar, 745 "TO_JSON": exp.JSONFormat.from_arg_list, 746 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 747 "ZEROIFNULL": _build_if_from_zeroifnull, 748 } 749 FUNCTIONS.pop("PREDICT") 750 751 FUNCTION_PARSERS = { 752 **parser.Parser.FUNCTION_PARSERS, 753 "DATE_PART": lambda self: self._parse_date_part(), 754 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 755 "LISTAGG": lambda self: self._parse_string_agg(), 756 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 757 } 758 FUNCTION_PARSERS.pop("TRIM") 759 760 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 761 762 ALTER_PARSERS = { 763 **parser.Parser.ALTER_PARSERS, 764 "SESSION": lambda self: self._parse_alter_session(), 765 "UNSET": lambda self: self.expression( 766 exp.Set, 767 tag=self._match_text_seq("TAG"), 768 expressions=self._parse_csv(self._parse_id_var), 769 unset=True, 770 ), 771 } 772 773 STATEMENT_PARSERS = { 774 **parser.Parser.STATEMENT_PARSERS, 775 TokenType.GET: lambda self: self._parse_get(), 776 TokenType.PUT: lambda self: self._parse_put(), 777 TokenType.SHOW: lambda self: self._parse_show(), 778 } 779 780 PROPERTY_PARSERS = { 781 **parser.Parser.PROPERTY_PARSERS, 782 "CREDENTIALS": lambda self: self._parse_credentials_property(), 783 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 784 "LOCATION": lambda self: self._parse_location_property(), 785 "TAG": lambda self: self._parse_tag(), 786 "USING": lambda self: self._match_text_seq("TEMPLATE") 787 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 788 } 789 790 TYPE_CONVERTERS = { 791 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 792 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 793 } 794 795 SHOW_PARSERS = { 796 "DATABASES": _show_parser("DATABASES"), 797 "TERSE DATABASES": _show_parser("DATABASES"), 798 "SCHEMAS": _show_parser("SCHEMAS"), 799 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 800 "OBJECTS": _show_parser("OBJECTS"), 801 "TERSE OBJECTS": _show_parser("OBJECTS"), 802 "TABLES": _show_parser("TABLES"), 803 "TERSE TABLES": _show_parser("TABLES"), 804 "VIEWS": _show_parser("VIEWS"), 805 "TERSE VIEWS": _show_parser("VIEWS"), 806 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 807 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 808 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 809 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 810 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 811 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 812 "SEQUENCES": _show_parser("SEQUENCES"), 813 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 814 "STAGES": _show_parser("STAGES"), 815 "COLUMNS": _show_parser("COLUMNS"), 816 "USERS": _show_parser("USERS"), 817 "TERSE USERS": _show_parser("USERS"), 818 "FILE FORMATS": _show_parser("FILE FORMATS"), 819 "FUNCTIONS": _show_parser("FUNCTIONS"), 820 "PROCEDURES": _show_parser("PROCEDURES"), 821 "WAREHOUSES": _show_parser("WAREHOUSES"), 822 } 823 824 CONSTRAINT_PARSERS = { 825 **parser.Parser.CONSTRAINT_PARSERS, 826 "WITH": lambda self: self._parse_with_constraint(), 827 "MASKING": lambda self: self._parse_with_constraint(), 828 "PROJECTION": lambda self: self._parse_with_constraint(), 829 "TAG": lambda self: self._parse_with_constraint(), 830 } 831 832 STAGED_FILE_SINGLE_TOKENS = { 833 TokenType.DOT, 834 TokenType.MOD, 835 TokenType.SLASH, 836 } 837 838 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 839 840 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 841 842 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 843 844 LAMBDAS = { 845 **parser.Parser.LAMBDAS, 846 TokenType.ARROW: lambda self, expressions: self.expression( 847 exp.Lambda, 848 this=self._replace_lambda( 849 self._parse_assignment(), 850 expressions, 851 ), 852 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 853 ), 854 } 855 856 COLUMN_OPERATORS = { 857 **parser.Parser.COLUMN_OPERATORS, 858 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 859 exp.ModelAttribute, this=this, expression=attr 860 ), 861 } 862 863 def _parse_use(self) -> exp.Use: 864 if self._match_text_seq("SECONDARY", "ROLES"): 865 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 866 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 867 return self.expression( 868 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 869 ) 870 871 return super()._parse_use() 872 873 def _negate_range( 874 self, this: t.Optional[exp.Expression] = None 875 ) -> t.Optional[exp.Expression]: 876 if not this: 877 return this 878 879 query = this.args.get("query") 880 if isinstance(this, exp.In) and isinstance(query, exp.Query): 881 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 882 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 883 # which can produce different results (most likely a SnowFlake bug). 884 # 885 # https://docs.snowflake.com/en/sql-reference/functions/in 886 # Context: https://github.com/tobymao/sqlglot/issues/3890 887 return self.expression( 888 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 889 ) 890 891 return self.expression(exp.Not, this=this) 892 893 def _parse_tag(self) -> exp.Tags: 894 return self.expression( 895 exp.Tags, 896 expressions=self._parse_wrapped_csv(self._parse_property), 897 ) 898 899 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 900 if self._prev.token_type != TokenType.WITH: 901 self._retreat(self._index - 1) 902 903 if self._match_text_seq("MASKING", "POLICY"): 904 policy = self._parse_column() 905 return self.expression( 906 exp.MaskingPolicyColumnConstraint, 907 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 908 expressions=self._match(TokenType.USING) 909 and self._parse_wrapped_csv(self._parse_id_var), 910 ) 911 if self._match_text_seq("PROJECTION", "POLICY"): 912 policy = self._parse_column() 913 return self.expression( 914 exp.ProjectionPolicyColumnConstraint, 915 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 916 ) 917 if self._match(TokenType.TAG): 918 return self._parse_tag() 919 920 return None 921 922 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 923 if self._match(TokenType.TAG): 924 return self._parse_tag() 925 926 return super()._parse_with_property() 927 928 def _parse_create(self) -> exp.Create | exp.Command: 929 expression = super()._parse_create() 930 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 931 # Replace the Table node with the enclosed Identifier 932 expression.this.replace(expression.this.this) 933 934 return expression 935 936 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 937 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 938 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 939 this = self._parse_var() or self._parse_type() 940 941 if not this: 942 return None 943 944 self._match(TokenType.COMMA) 945 expression = self._parse_bitwise() 946 this = map_date_part(this) 947 name = this.name.upper() 948 949 if name.startswith("EPOCH"): 950 if name == "EPOCH_MILLISECOND": 951 scale = 10**3 952 elif name == "EPOCH_MICROSECOND": 953 scale = 10**6 954 elif name == "EPOCH_NANOSECOND": 955 scale = 10**9 956 else: 957 scale = None 958 959 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 960 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 961 962 if scale: 963 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 964 965 return to_unix 966 967 return self.expression(exp.Extract, this=this, expression=expression) 968 969 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 970 if is_map: 971 # Keys are strings in Snowflake's objects, see also: 972 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 973 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 974 return self._parse_slice(self._parse_string()) or self._parse_assignment() 975 976 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 977 978 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 979 lateral = super()._parse_lateral() 980 if not lateral: 981 return lateral 982 983 if isinstance(lateral.this, exp.Explode): 984 table_alias = lateral.args.get("alias") 985 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 986 if table_alias and not table_alias.args.get("columns"): 987 table_alias.set("columns", columns) 988 elif not table_alias: 989 exp.alias_(lateral, "_flattened", table=columns, copy=False) 990 991 return lateral 992 993 def _parse_table_parts( 994 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 995 ) -> exp.Table: 996 # https://docs.snowflake.com/en/user-guide/querying-stage 997 if self._match(TokenType.STRING, advance=False): 998 table = self._parse_string() 999 elif self._match_text_seq("@", advance=False): 1000 table = self._parse_location_path() 1001 else: 1002 table = None 1003 1004 if table: 1005 file_format = None 1006 pattern = None 1007 1008 wrapped = self._match(TokenType.L_PAREN) 1009 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1010 if self._match_text_seq("FILE_FORMAT", "=>"): 1011 file_format = self._parse_string() or super()._parse_table_parts( 1012 is_db_reference=is_db_reference 1013 ) 1014 elif self._match_text_seq("PATTERN", "=>"): 1015 pattern = self._parse_string() 1016 else: 1017 break 1018 1019 self._match(TokenType.COMMA) 1020 1021 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1022 else: 1023 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1024 1025 return table 1026 1027 def _parse_table( 1028 self, 1029 schema: bool = False, 1030 joins: bool = False, 1031 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1032 parse_bracket: bool = False, 1033 is_db_reference: bool = False, 1034 parse_partition: bool = False, 1035 consume_pipe: bool = False, 1036 ) -> t.Optional[exp.Expression]: 1037 table = super()._parse_table( 1038 schema=schema, 1039 joins=joins, 1040 alias_tokens=alias_tokens, 1041 parse_bracket=parse_bracket, 1042 is_db_reference=is_db_reference, 1043 parse_partition=parse_partition, 1044 ) 1045 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1046 table_from_rows = table.this 1047 for arg in exp.TableFromRows.arg_types: 1048 if arg != "this": 1049 table_from_rows.set(arg, table.args.get(arg)) 1050 1051 table = table_from_rows 1052 1053 return table 1054 1055 def _parse_id_var( 1056 self, 1057 any_token: bool = True, 1058 tokens: t.Optional[t.Collection[TokenType]] = None, 1059 ) -> t.Optional[exp.Expression]: 1060 if self._match_text_seq("IDENTIFIER", "("): 1061 identifier = ( 1062 super()._parse_id_var(any_token=any_token, tokens=tokens) 1063 or self._parse_string() 1064 ) 1065 self._match_r_paren() 1066 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1067 1068 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1069 1070 def _parse_show_snowflake(self, this: str) -> exp.Show: 1071 scope = None 1072 scope_kind = None 1073 1074 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1075 # which is syntactically valid but has no effect on the output 1076 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1077 1078 history = self._match_text_seq("HISTORY") 1079 1080 like = self._parse_string() if self._match(TokenType.LIKE) else None 1081 1082 if self._match(TokenType.IN): 1083 if self._match_text_seq("ACCOUNT"): 1084 scope_kind = "ACCOUNT" 1085 elif self._match_text_seq("CLASS"): 1086 scope_kind = "CLASS" 1087 scope = self._parse_table_parts() 1088 elif self._match_text_seq("APPLICATION"): 1089 scope_kind = "APPLICATION" 1090 if self._match_text_seq("PACKAGE"): 1091 scope_kind += " PACKAGE" 1092 scope = self._parse_table_parts() 1093 elif self._match_set(self.DB_CREATABLES): 1094 scope_kind = self._prev.text.upper() 1095 if self._curr: 1096 scope = self._parse_table_parts() 1097 elif self._curr: 1098 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1099 scope = self._parse_table_parts() 1100 1101 return self.expression( 1102 exp.Show, 1103 **{ 1104 "terse": terse, 1105 "this": this, 1106 "history": history, 1107 "like": like, 1108 "scope": scope, 1109 "scope_kind": scope_kind, 1110 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1111 "limit": self._parse_limit(), 1112 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1113 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1114 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1115 }, 1116 ) 1117 1118 def _parse_put(self) -> exp.Put | exp.Command: 1119 if self._curr.token_type != TokenType.STRING: 1120 return self._parse_as_command(self._prev) 1121 1122 return self.expression( 1123 exp.Put, 1124 this=self._parse_string(), 1125 target=self._parse_location_path(), 1126 properties=self._parse_properties(), 1127 ) 1128 1129 def _parse_get(self) -> t.Optional[exp.Expression]: 1130 start = self._prev 1131 1132 # If we detect GET( then we need to parse a function, not a statement 1133 if self._match(TokenType.L_PAREN): 1134 self._retreat(self._index - 2) 1135 return self._parse_expression() 1136 1137 target = self._parse_location_path() 1138 1139 # Parse as command if unquoted file path 1140 if self._curr.token_type == TokenType.URI_START: 1141 return self._parse_as_command(start) 1142 1143 return self.expression( 1144 exp.Get, 1145 this=self._parse_string(), 1146 target=target, 1147 properties=self._parse_properties(), 1148 ) 1149 1150 def _parse_location_property(self) -> exp.LocationProperty: 1151 self._match(TokenType.EQ) 1152 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1153 1154 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1155 # Parse either a subquery or a staged file 1156 return ( 1157 self._parse_select(table=True, parse_subquery_alias=False) 1158 if self._match(TokenType.L_PAREN, advance=False) 1159 else self._parse_table_parts() 1160 ) 1161 1162 def _parse_location_path(self) -> exp.Var: 1163 start = self._curr 1164 self._advance_any(ignore_reserved=True) 1165 1166 # We avoid consuming a comma token because external tables like @foo and @bar 1167 # can be joined in a query with a comma separator, as well as closing paren 1168 # in case of subqueries 1169 while self._is_connected() and not self._match_set( 1170 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1171 ): 1172 self._advance_any(ignore_reserved=True) 1173 1174 return exp.var(self._find_sql(start, self._prev)) 1175 1176 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1177 this = super()._parse_lambda_arg() 1178 1179 if not this: 1180 return this 1181 1182 typ = self._parse_types() 1183 1184 if typ: 1185 return self.expression(exp.Cast, this=this, to=typ) 1186 1187 return this 1188 1189 def _parse_foreign_key(self) -> exp.ForeignKey: 1190 # inlineFK, the REFERENCES columns are implied 1191 if self._match(TokenType.REFERENCES, advance=False): 1192 return self.expression(exp.ForeignKey) 1193 1194 # outoflineFK, explicitly names the columns 1195 return super()._parse_foreign_key() 1196 1197 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1198 self._match(TokenType.EQ) 1199 if self._match(TokenType.L_PAREN, advance=False): 1200 expressions = self._parse_wrapped_options() 1201 else: 1202 expressions = [self._parse_format_name()] 1203 1204 return self.expression( 1205 exp.FileFormatProperty, 1206 expressions=expressions, 1207 ) 1208 1209 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1210 return self.expression( 1211 exp.CredentialsProperty, 1212 expressions=self._parse_wrapped_options(), 1213 ) 1214 1215 def _parse_semantic_view(self) -> exp.SemanticView: 1216 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1217 1218 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1219 if self._match_text_seq("DIMENSIONS"): 1220 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1221 if self._match_text_seq("METRICS"): 1222 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1223 if self._match_text_seq("WHERE"): 1224 kwargs["where"] = self._parse_expression() 1225 1226 return self.expression(exp.SemanticView, **kwargs) 1227 1228 class Tokenizer(tokens.Tokenizer): 1229 STRING_ESCAPES = ["\\", "'"] 1230 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1231 RAW_STRINGS = ["$$"] 1232 COMMENTS = ["--", "//", ("/*", "*/")] 1233 NESTED_COMMENTS = False 1234 1235 KEYWORDS = { 1236 **tokens.Tokenizer.KEYWORDS, 1237 "BYTEINT": TokenType.INT, 1238 "FILE://": TokenType.URI_START, 1239 "FILE FORMAT": TokenType.FILE_FORMAT, 1240 "GET": TokenType.GET, 1241 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1242 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1243 "MINUS": TokenType.EXCEPT, 1244 "NCHAR VARYING": TokenType.VARCHAR, 1245 "PUT": TokenType.PUT, 1246 "REMOVE": TokenType.COMMAND, 1247 "RM": TokenType.COMMAND, 1248 "SAMPLE": TokenType.TABLE_SAMPLE, 1249 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1250 "SQL_DOUBLE": TokenType.DOUBLE, 1251 "SQL_VARCHAR": TokenType.VARCHAR, 1252 "STAGE": TokenType.STAGE, 1253 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1254 "STREAMLIT": TokenType.STREAMLIT, 1255 "TAG": TokenType.TAG, 1256 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1257 "TOP": TokenType.TOP, 1258 "WAREHOUSE": TokenType.WAREHOUSE, 1259 } 1260 KEYWORDS.pop("/*+") 1261 1262 SINGLE_TOKENS = { 1263 **tokens.Tokenizer.SINGLE_TOKENS, 1264 "$": TokenType.PARAMETER, 1265 "!": TokenType.EXCLAMATION, 1266 } 1267 1268 VAR_SINGLE_TOKENS = {"$"} 1269 1270 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1271 1272 class Generator(generator.Generator): 1273 PARAMETER_TOKEN = "$" 1274 MATCHED_BY_SOURCE = False 1275 SINGLE_STRING_INTERVAL = True 1276 JOIN_HINTS = False 1277 TABLE_HINTS = False 1278 QUERY_HINTS = False 1279 AGGREGATE_FILTER_SUPPORTED = False 1280 SUPPORTS_TABLE_COPY = False 1281 COLLATE_IS_FUNC = True 1282 LIMIT_ONLY_LITERALS = True 1283 JSON_KEY_VALUE_PAIR_SEP = "," 1284 INSERT_OVERWRITE = " OVERWRITE INTO" 1285 STRUCT_DELIMITER = ("(", ")") 1286 COPY_PARAMS_ARE_WRAPPED = False 1287 COPY_PARAMS_EQ_REQUIRED = True 1288 STAR_EXCEPT = "EXCLUDE" 1289 SUPPORTS_EXPLODING_PROJECTIONS = False 1290 ARRAY_CONCAT_IS_VAR_LEN = False 1291 SUPPORTS_CONVERT_TIMEZONE = True 1292 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1293 SUPPORTS_MEDIAN = True 1294 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1295 SUPPORTS_DECODE_CASE = True 1296 IS_BOOL_ALLOWED = False 1297 1298 TRANSFORMS = { 1299 **generator.Generator.TRANSFORMS, 1300 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1301 exp.ArgMax: rename_func("MAX_BY"), 1302 exp.ArgMin: rename_func("MIN_BY"), 1303 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1304 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1305 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1306 exp.AtTimeZone: lambda self, e: self.func( 1307 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1308 ), 1309 exp.BitwiseOr: rename_func("BITOR"), 1310 exp.BitwiseXor: rename_func("BITXOR"), 1311 exp.BitwiseAnd: rename_func("BITAND"), 1312 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1313 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1314 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1315 exp.BitwiseNot: rename_func("BITNOT"), 1316 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1317 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1318 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1319 exp.DateAdd: date_delta_sql("DATEADD"), 1320 exp.DateDiff: date_delta_sql("DATEDIFF"), 1321 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1322 exp.DatetimeDiff: timestampdiff_sql, 1323 exp.DateStrToDate: datestrtodate_sql, 1324 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1325 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1326 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1327 exp.DayOfYear: rename_func("DAYOFYEAR"), 1328 exp.Explode: rename_func("FLATTEN"), 1329 exp.Extract: lambda self, e: self.func( 1330 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1331 ), 1332 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1333 exp.FileFormatProperty: lambda self, 1334 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1335 exp.FromTimeZone: lambda self, e: self.func( 1336 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1337 ), 1338 exp.GenerateSeries: lambda self, e: self.func( 1339 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1340 ), 1341 exp.GetExtract: rename_func("GET"), 1342 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1343 exp.If: if_sql(name="IFF", false_value="NULL"), 1344 exp.JSONExtractArray: _json_extract_value_array_sql, 1345 exp.JSONExtractScalar: lambda self, e: self.func( 1346 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1347 ), 1348 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1349 exp.JSONPathRoot: lambda *_: "", 1350 exp.JSONValueArray: _json_extract_value_array_sql, 1351 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1352 rename_func("EDITDISTANCE") 1353 ), 1354 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1355 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1356 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1357 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1358 exp.MakeInterval: no_make_interval_sql, 1359 exp.Max: max_or_greatest, 1360 exp.Min: min_or_least, 1361 exp.ParseJSON: lambda self, e: self.func( 1362 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1363 ), 1364 exp.JSONFormat: rename_func("TO_JSON"), 1365 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1366 exp.PercentileCont: transforms.preprocess( 1367 [transforms.add_within_group_for_percentiles] 1368 ), 1369 exp.PercentileDisc: transforms.preprocess( 1370 [transforms.add_within_group_for_percentiles] 1371 ), 1372 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1373 exp.RegexpExtract: _regexpextract_sql, 1374 exp.RegexpExtractAll: _regexpextract_sql, 1375 exp.RegexpILike: _regexpilike_sql, 1376 exp.Rand: rename_func("RANDOM"), 1377 exp.Select: transforms.preprocess( 1378 [ 1379 transforms.eliminate_window_clause, 1380 transforms.eliminate_distinct_on, 1381 transforms.explode_projection_to_unnest(), 1382 transforms.eliminate_semi_and_anti_joins, 1383 _transform_generate_date_array, 1384 _qualify_unnested_columns, 1385 _eliminate_dot_variant_lookup, 1386 ] 1387 ), 1388 exp.SHA: rename_func("SHA1"), 1389 exp.MD5Digest: rename_func("MD5_BINARY"), 1390 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1391 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1392 exp.LowerHex: rename_func("TO_CHAR"), 1393 exp.SortArray: rename_func("ARRAY_SORT"), 1394 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1395 exp.StartsWith: rename_func("STARTSWITH"), 1396 exp.EndsWith: rename_func("ENDSWITH"), 1397 exp.StrPosition: lambda self, e: strposition_sql( 1398 self, e, func_name="CHARINDEX", supports_position=True 1399 ), 1400 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1401 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1402 exp.Stuff: rename_func("INSERT"), 1403 exp.StPoint: rename_func("ST_MAKEPOINT"), 1404 exp.TimeAdd: date_delta_sql("TIMEADD"), 1405 exp.Timestamp: no_timestamp_sql, 1406 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1407 exp.TimestampDiff: lambda self, e: self.func( 1408 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1409 ), 1410 exp.TimestampTrunc: timestamptrunc_sql(), 1411 exp.TimeStrToTime: timestrtotime_sql, 1412 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1413 exp.ToArray: rename_func("TO_ARRAY"), 1414 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1415 exp.ToDouble: rename_func("TO_DOUBLE"), 1416 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1417 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1418 exp.TsOrDsToDate: lambda self, e: self.func( 1419 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1420 ), 1421 exp.TsOrDsToTime: lambda self, e: self.func( 1422 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1423 ), 1424 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1425 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1426 exp.Uuid: rename_func("UUID_STRING"), 1427 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1428 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1429 exp.Xor: rename_func("BOOLXOR"), 1430 exp.ByteLength: rename_func("OCTET_LENGTH"), 1431 } 1432 1433 SUPPORTED_JSON_PATH_PARTS = { 1434 exp.JSONPathKey, 1435 exp.JSONPathRoot, 1436 exp.JSONPathSubscript, 1437 } 1438 1439 TYPE_MAPPING = { 1440 **generator.Generator.TYPE_MAPPING, 1441 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1442 exp.DataType.Type.NESTED: "OBJECT", 1443 exp.DataType.Type.STRUCT: "OBJECT", 1444 exp.DataType.Type.TEXT: "VARCHAR", 1445 } 1446 1447 TOKEN_MAPPING = { 1448 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1449 } 1450 1451 PROPERTIES_LOCATION = { 1452 **generator.Generator.PROPERTIES_LOCATION, 1453 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1454 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1455 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1456 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1457 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1458 } 1459 1460 UNSUPPORTED_VALUES_EXPRESSIONS = { 1461 exp.Map, 1462 exp.StarMap, 1463 exp.Struct, 1464 exp.VarMap, 1465 } 1466 1467 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1468 1469 def with_properties(self, properties: exp.Properties) -> str: 1470 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1471 1472 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1473 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1474 values_as_table = False 1475 1476 return super().values_sql(expression, values_as_table=values_as_table) 1477 1478 def datatype_sql(self, expression: exp.DataType) -> str: 1479 expressions = expression.expressions 1480 if ( 1481 expressions 1482 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1483 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1484 ): 1485 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1486 return "OBJECT" 1487 1488 return super().datatype_sql(expression) 1489 1490 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1491 return self.func( 1492 "TO_NUMBER", 1493 expression.this, 1494 expression.args.get("format"), 1495 expression.args.get("precision"), 1496 expression.args.get("scale"), 1497 ) 1498 1499 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1500 milli = expression.args.get("milli") 1501 if milli is not None: 1502 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1503 expression.set("nano", milli_to_nano) 1504 1505 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1506 1507 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1508 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1509 return self.func("TO_GEOGRAPHY", expression.this) 1510 if expression.is_type(exp.DataType.Type.GEOMETRY): 1511 return self.func("TO_GEOMETRY", expression.this) 1512 1513 return super().cast_sql(expression, safe_prefix=safe_prefix) 1514 1515 def trycast_sql(self, expression: exp.TryCast) -> str: 1516 value = expression.this 1517 1518 if value.type is None: 1519 from sqlglot.optimizer.annotate_types import annotate_types 1520 1521 value = annotate_types(value, dialect=self.dialect) 1522 1523 # Snowflake requires that TRY_CAST's value be a string 1524 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1525 # if we can deduce that the value is a string, then we can generate TRY_CAST 1526 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1527 return super().trycast_sql(expression) 1528 1529 return self.cast_sql(expression) 1530 1531 def log_sql(self, expression: exp.Log) -> str: 1532 if not expression.expression: 1533 return self.func("LN", expression.this) 1534 1535 return super().log_sql(expression) 1536 1537 def unnest_sql(self, expression: exp.Unnest) -> str: 1538 unnest_alias = expression.args.get("alias") 1539 offset = expression.args.get("offset") 1540 1541 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1542 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1543 1544 columns = [ 1545 exp.to_identifier("seq"), 1546 exp.to_identifier("key"), 1547 exp.to_identifier("path"), 1548 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1549 value, 1550 exp.to_identifier("this"), 1551 ] 1552 1553 if unnest_alias: 1554 unnest_alias.set("columns", columns) 1555 else: 1556 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1557 1558 table_input = self.sql(expression.expressions[0]) 1559 if not table_input.startswith("INPUT =>"): 1560 table_input = f"INPUT => {table_input}" 1561 1562 expression_parent = expression.parent 1563 1564 explode = ( 1565 f"FLATTEN({table_input})" 1566 if isinstance(expression_parent, exp.Lateral) 1567 else f"TABLE(FLATTEN({table_input}))" 1568 ) 1569 alias = self.sql(unnest_alias) 1570 alias = f" AS {alias}" if alias else "" 1571 value = ( 1572 "" 1573 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1574 else f"{value} FROM " 1575 ) 1576 1577 return f"{value}{explode}{alias}" 1578 1579 def show_sql(self, expression: exp.Show) -> str: 1580 terse = "TERSE " if expression.args.get("terse") else "" 1581 history = " HISTORY" if expression.args.get("history") else "" 1582 like = self.sql(expression, "like") 1583 like = f" LIKE {like}" if like else "" 1584 1585 scope = self.sql(expression, "scope") 1586 scope = f" {scope}" if scope else "" 1587 1588 scope_kind = self.sql(expression, "scope_kind") 1589 if scope_kind: 1590 scope_kind = f" IN {scope_kind}" 1591 1592 starts_with = self.sql(expression, "starts_with") 1593 if starts_with: 1594 starts_with = f" STARTS WITH {starts_with}" 1595 1596 limit = self.sql(expression, "limit") 1597 1598 from_ = self.sql(expression, "from") 1599 if from_: 1600 from_ = f" FROM {from_}" 1601 1602 privileges = self.expressions(expression, key="privileges", flat=True) 1603 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1604 1605 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1606 1607 def describe_sql(self, expression: exp.Describe) -> str: 1608 # Default to table if kind is unknown 1609 kind_value = expression.args.get("kind") or "TABLE" 1610 kind = f" {kind_value}" if kind_value else "" 1611 this = f" {self.sql(expression, 'this')}" 1612 expressions = self.expressions(expression, flat=True) 1613 expressions = f" {expressions}" if expressions else "" 1614 return f"DESCRIBE{kind}{this}{expressions}" 1615 1616 def generatedasidentitycolumnconstraint_sql( 1617 self, expression: exp.GeneratedAsIdentityColumnConstraint 1618 ) -> str: 1619 start = expression.args.get("start") 1620 start = f" START {start}" if start else "" 1621 increment = expression.args.get("increment") 1622 increment = f" INCREMENT {increment}" if increment else "" 1623 1624 order = expression.args.get("order") 1625 if order is not None: 1626 order_clause = " ORDER" if order else " NOORDER" 1627 else: 1628 order_clause = "" 1629 1630 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1631 1632 def cluster_sql(self, expression: exp.Cluster) -> str: 1633 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1634 1635 def struct_sql(self, expression: exp.Struct) -> str: 1636 if len(expression.expressions) == 1: 1637 arg = expression.expressions[0] 1638 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1639 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1640 return f"{{{self.sql(expression.expressions[0])}}}" 1641 1642 keys = [] 1643 values = [] 1644 1645 for i, e in enumerate(expression.expressions): 1646 if isinstance(e, exp.PropertyEQ): 1647 keys.append( 1648 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1649 ) 1650 values.append(e.expression) 1651 else: 1652 keys.append(exp.Literal.string(f"_{i}")) 1653 values.append(e) 1654 1655 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1656 1657 @unsupported_args("weight", "accuracy") 1658 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1659 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1660 1661 def alterset_sql(self, expression: exp.AlterSet) -> str: 1662 exprs = self.expressions(expression, flat=True) 1663 exprs = f" {exprs}" if exprs else "" 1664 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1665 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1666 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1667 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1668 tag = self.expressions(expression, key="tag", flat=True) 1669 tag = f" TAG {tag}" if tag else "" 1670 1671 return f"SET{exprs}{file_format}{copy_options}{tag}" 1672 1673 def strtotime_sql(self, expression: exp.StrToTime): 1674 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1675 return self.func( 1676 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1677 ) 1678 1679 def timestampsub_sql(self, expression: exp.TimestampSub): 1680 return self.sql( 1681 exp.TimestampAdd( 1682 this=expression.this, 1683 expression=expression.expression * -1, 1684 unit=expression.unit, 1685 ) 1686 ) 1687 1688 def jsonextract_sql(self, expression: exp.JSONExtract): 1689 this = expression.this 1690 1691 # JSON strings are valid coming from other dialects such as BQ so 1692 # for these cases we PARSE_JSON preemptively 1693 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1694 "requires_json" 1695 ): 1696 this = exp.ParseJSON(this=this) 1697 1698 return self.func( 1699 "GET_PATH", 1700 this, 1701 expression.expression, 1702 ) 1703 1704 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1705 this = expression.this 1706 if this.is_string: 1707 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1708 1709 return self.func("TO_CHAR", this, self.format_time(expression)) 1710 1711 def datesub_sql(self, expression: exp.DateSub) -> str: 1712 value = expression.expression 1713 if value: 1714 value.replace(value * (-1)) 1715 else: 1716 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1717 1718 return date_delta_sql("DATEADD")(self, expression) 1719 1720 def select_sql(self, expression: exp.Select) -> str: 1721 limit = expression.args.get("limit") 1722 offset = expression.args.get("offset") 1723 if offset and not limit: 1724 expression.limit(exp.Null(), copy=False) 1725 return super().select_sql(expression) 1726 1727 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1728 is_materialized = expression.find(exp.MaterializedProperty) 1729 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1730 1731 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1732 # For materialized views, COPY GRANTS is located *before* the columns list 1733 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1734 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1735 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1736 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1737 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1738 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1739 1740 this_name = self.sql(expression.this, "this") 1741 copy_grants = self.sql(copy_grants_property) 1742 this_schema = self.schema_columns_sql(expression.this) 1743 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1744 1745 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1746 1747 return super().createable_sql(expression, locations) 1748 1749 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1750 this = expression.this 1751 1752 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1753 # and add it later as part of the WITHIN GROUP clause 1754 order = this if isinstance(this, exp.Order) else None 1755 if order: 1756 expression.set("this", order.this.pop()) 1757 1758 expr_sql = super().arrayagg_sql(expression) 1759 1760 if order: 1761 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1762 1763 return expr_sql 1764 1765 def array_sql(self, expression: exp.Array) -> str: 1766 expressions = expression.expressions 1767 1768 first_expr = seq_get(expressions, 0) 1769 if isinstance(first_expr, exp.Select): 1770 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1771 if first_expr.text("kind").upper() == "STRUCT": 1772 object_construct_args = [] 1773 for expr in first_expr.expressions: 1774 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1775 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1776 name = expr.this if isinstance(expr, exp.Alias) else expr 1777 1778 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1779 1780 array_agg = exp.ArrayAgg( 1781 this=_build_object_construct(args=object_construct_args) 1782 ) 1783 1784 first_expr.set("kind", None) 1785 first_expr.set("expressions", [array_agg]) 1786 1787 return self.sql(first_expr.subquery()) 1788 1789 return inline_array_sql(self, expression) 1790 1791 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1792 zone = self.sql(expression, "this") 1793 if not zone: 1794 return super().currentdate_sql(expression) 1795 1796 expr = exp.Cast( 1797 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1798 to=exp.DataType(this=exp.DataType.Type.DATE), 1799 ) 1800 return self.sql(expr) 1801 1802 def dot_sql(self, expression: exp.Dot) -> str: 1803 this = expression.this 1804 1805 if not this.type: 1806 from sqlglot.optimizer.annotate_types import annotate_types 1807 1808 this = annotate_types(this, dialect=self.dialect) 1809 1810 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1811 # Generate colon notation for the top level STRUCT 1812 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1813 1814 return super().dot_sql(expression) 1815 1816 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1817 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}"
508class Snowflake(Dialect): 509 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 510 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 511 NULL_ORDERING = "nulls_are_large" 512 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 513 SUPPORTS_USER_DEFINED_TYPES = False 514 SUPPORTS_SEMI_ANTI_JOIN = False 515 PREFER_CTE_ALIAS_COLUMN = True 516 TABLESAMPLE_SIZE_IS_PERCENT = True 517 COPY_PARAMS_ARE_CSV = False 518 ARRAY_AGG_INCLUDES_NULLS = None 519 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 520 TRY_CAST_REQUIRES_STRING = True 521 522 TYPE_TO_EXPRESSIONS = { 523 **Dialect.TYPE_TO_EXPRESSIONS, 524 exp.DataType.Type.INT: { 525 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.INT], 526 exp.Length, 527 }, 528 exp.DataType.Type.VARCHAR: { 529 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR], 530 exp.MD5, 531 exp.AIAgg, 532 exp.AISummarizeAgg, 533 exp.RegexpExtract, 534 exp.RegexpReplace, 535 exp.Repeat, 536 exp.Replace, 537 exp.SHA, 538 exp.SHA2, 539 exp.Space, 540 exp.Uuid, 541 }, 542 exp.DataType.Type.BINARY: { 543 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY], 544 exp.MD5Digest, 545 exp.SHA1Digest, 546 exp.SHA2Digest, 547 }, 548 exp.DataType.Type.BIGINT: { 549 *Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BIGINT], 550 exp.MD5NumberLower64, 551 exp.MD5NumberUpper64, 552 }, 553 exp.DataType.Type.ARRAY: { 554 exp.Split, 555 }, 556 } 557 558 ANNOTATORS = { 559 **Dialect.ANNOTATORS, 560 **{ 561 expr_type: annotate_with_type_lambda(data_type) 562 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 563 for expr_type in expressions 564 }, 565 **{ 566 expr_type: lambda self, e: self._annotate_by_args(e, "this") 567 for expr_type in ( 568 exp.Left, 569 exp.Right, 570 exp.Substring, 571 ) 572 }, 573 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 574 exp.Reverse: _annotate_reverse, 575 } 576 577 TIME_MAPPING = { 578 "YYYY": "%Y", 579 "yyyy": "%Y", 580 "YY": "%y", 581 "yy": "%y", 582 "MMMM": "%B", 583 "mmmm": "%B", 584 "MON": "%b", 585 "mon": "%b", 586 "MM": "%m", 587 "mm": "%m", 588 "DD": "%d", 589 "dd": "%-d", 590 "DY": "%a", 591 "dy": "%w", 592 "HH24": "%H", 593 "hh24": "%H", 594 "HH12": "%I", 595 "hh12": "%I", 596 "MI": "%M", 597 "mi": "%M", 598 "SS": "%S", 599 "ss": "%S", 600 "FF6": "%f", 601 "ff6": "%f", 602 } 603 604 DATE_PART_MAPPING = { 605 **Dialect.DATE_PART_MAPPING, 606 "ISOWEEK": "WEEKISO", 607 } 608 609 def quote_identifier(self, expression: E, identify: bool = True) -> E: 610 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 611 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 612 if ( 613 isinstance(expression, exp.Identifier) 614 and isinstance(expression.parent, exp.Table) 615 and expression.name.lower() == "dual" 616 ): 617 return expression # type: ignore 618 619 return super().quote_identifier(expression, identify=identify) 620 621 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 622 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 623 SINGLE_TOKENS.pop("$") 624 625 class Parser(parser.Parser): 626 IDENTIFY_PIVOT_STRINGS = True 627 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 628 COLON_IS_VARIANT_EXTRACT = True 629 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 630 631 ID_VAR_TOKENS = { 632 *parser.Parser.ID_VAR_TOKENS, 633 TokenType.EXCEPT, 634 TokenType.MATCH_CONDITION, 635 } 636 637 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 638 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 639 640 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 641 642 FUNCTIONS = { 643 **parser.Parser.FUNCTIONS, 644 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 645 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 646 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 647 this=seq_get(args, 1), expression=seq_get(args, 0) 648 ), 649 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 650 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 651 start=seq_get(args, 0), 652 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 653 step=seq_get(args, 2), 654 ), 655 "ARRAY_SORT": exp.SortArray.from_arg_list, 656 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 657 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 658 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 659 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 660 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 661 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 662 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 663 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 664 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 665 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 666 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 667 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 668 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 669 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 670 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 671 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 672 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 673 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 674 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 675 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 676 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 677 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 678 "DATE_TRUNC": _date_trunc_to_time, 679 "DATEADD": _build_date_time_add(exp.DateAdd), 680 "DATEDIFF": _build_datediff, 681 "DIV0": _build_if_from_div0, 682 "EDITDISTANCE": lambda args: exp.Levenshtein( 683 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 684 ), 685 "FLATTEN": exp.Explode.from_arg_list, 686 "GET": exp.GetExtract.from_arg_list, 687 "GET_PATH": lambda args, dialect: exp.JSONExtract( 688 this=seq_get(args, 0), 689 expression=dialect.to_json_path(seq_get(args, 1)), 690 requires_json=True, 691 ), 692 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 693 "IFF": exp.If.from_arg_list, 694 "MD5_HEX": exp.MD5.from_arg_list, 695 "MD5_BINARY": exp.MD5Digest.from_arg_list, 696 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 697 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 698 "LAST_DAY": lambda args: exp.LastDay( 699 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 700 ), 701 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 702 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 703 "NULLIFZERO": _build_if_from_nullifzero, 704 "OBJECT_CONSTRUCT": _build_object_construct, 705 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 706 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 707 "REGEXP_REPLACE": _build_regexp_replace, 708 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 709 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 710 "REPLACE": build_replace_with_optional_replacement, 711 "RLIKE": exp.RegexpLike.from_arg_list, 712 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 713 "SHA1_HEX": exp.SHA.from_arg_list, 714 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 715 "SHA2_HEX": exp.SHA2.from_arg_list, 716 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 717 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 718 "TIMEADD": _build_date_time_add(exp.TimeAdd), 719 "TIMEDIFF": _build_datediff, 720 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 721 "TIMESTAMPDIFF": _build_datediff, 722 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 723 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 724 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 725 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 726 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 727 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 728 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 729 "TRY_TO_TIMESTAMP": _build_datetime( 730 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 731 ), 732 "TO_CHAR": build_timetostr_or_tochar, 733 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 734 "TO_NUMBER": lambda args: exp.ToNumber( 735 this=seq_get(args, 0), 736 format=seq_get(args, 1), 737 precision=seq_get(args, 2), 738 scale=seq_get(args, 3), 739 ), 740 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 741 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 742 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 743 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 744 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 745 "TO_VARCHAR": build_timetostr_or_tochar, 746 "TO_JSON": exp.JSONFormat.from_arg_list, 747 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 748 "ZEROIFNULL": _build_if_from_zeroifnull, 749 } 750 FUNCTIONS.pop("PREDICT") 751 752 FUNCTION_PARSERS = { 753 **parser.Parser.FUNCTION_PARSERS, 754 "DATE_PART": lambda self: self._parse_date_part(), 755 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 756 "LISTAGG": lambda self: self._parse_string_agg(), 757 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 758 } 759 FUNCTION_PARSERS.pop("TRIM") 760 761 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 762 763 ALTER_PARSERS = { 764 **parser.Parser.ALTER_PARSERS, 765 "SESSION": lambda self: self._parse_alter_session(), 766 "UNSET": lambda self: self.expression( 767 exp.Set, 768 tag=self._match_text_seq("TAG"), 769 expressions=self._parse_csv(self._parse_id_var), 770 unset=True, 771 ), 772 } 773 774 STATEMENT_PARSERS = { 775 **parser.Parser.STATEMENT_PARSERS, 776 TokenType.GET: lambda self: self._parse_get(), 777 TokenType.PUT: lambda self: self._parse_put(), 778 TokenType.SHOW: lambda self: self._parse_show(), 779 } 780 781 PROPERTY_PARSERS = { 782 **parser.Parser.PROPERTY_PARSERS, 783 "CREDENTIALS": lambda self: self._parse_credentials_property(), 784 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 785 "LOCATION": lambda self: self._parse_location_property(), 786 "TAG": lambda self: self._parse_tag(), 787 "USING": lambda self: self._match_text_seq("TEMPLATE") 788 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 789 } 790 791 TYPE_CONVERTERS = { 792 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 793 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 794 } 795 796 SHOW_PARSERS = { 797 "DATABASES": _show_parser("DATABASES"), 798 "TERSE DATABASES": _show_parser("DATABASES"), 799 "SCHEMAS": _show_parser("SCHEMAS"), 800 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 801 "OBJECTS": _show_parser("OBJECTS"), 802 "TERSE OBJECTS": _show_parser("OBJECTS"), 803 "TABLES": _show_parser("TABLES"), 804 "TERSE TABLES": _show_parser("TABLES"), 805 "VIEWS": _show_parser("VIEWS"), 806 "TERSE VIEWS": _show_parser("VIEWS"), 807 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 808 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 809 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 810 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 811 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 812 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 813 "SEQUENCES": _show_parser("SEQUENCES"), 814 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 815 "STAGES": _show_parser("STAGES"), 816 "COLUMNS": _show_parser("COLUMNS"), 817 "USERS": _show_parser("USERS"), 818 "TERSE USERS": _show_parser("USERS"), 819 "FILE FORMATS": _show_parser("FILE FORMATS"), 820 "FUNCTIONS": _show_parser("FUNCTIONS"), 821 "PROCEDURES": _show_parser("PROCEDURES"), 822 "WAREHOUSES": _show_parser("WAREHOUSES"), 823 } 824 825 CONSTRAINT_PARSERS = { 826 **parser.Parser.CONSTRAINT_PARSERS, 827 "WITH": lambda self: self._parse_with_constraint(), 828 "MASKING": lambda self: self._parse_with_constraint(), 829 "PROJECTION": lambda self: self._parse_with_constraint(), 830 "TAG": lambda self: self._parse_with_constraint(), 831 } 832 833 STAGED_FILE_SINGLE_TOKENS = { 834 TokenType.DOT, 835 TokenType.MOD, 836 TokenType.SLASH, 837 } 838 839 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 840 841 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 842 843 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 844 845 LAMBDAS = { 846 **parser.Parser.LAMBDAS, 847 TokenType.ARROW: lambda self, expressions: self.expression( 848 exp.Lambda, 849 this=self._replace_lambda( 850 self._parse_assignment(), 851 expressions, 852 ), 853 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 854 ), 855 } 856 857 COLUMN_OPERATORS = { 858 **parser.Parser.COLUMN_OPERATORS, 859 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 860 exp.ModelAttribute, this=this, expression=attr 861 ), 862 } 863 864 def _parse_use(self) -> exp.Use: 865 if self._match_text_seq("SECONDARY", "ROLES"): 866 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 867 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 868 return self.expression( 869 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 870 ) 871 872 return super()._parse_use() 873 874 def _negate_range( 875 self, this: t.Optional[exp.Expression] = None 876 ) -> t.Optional[exp.Expression]: 877 if not this: 878 return this 879 880 query = this.args.get("query") 881 if isinstance(this, exp.In) and isinstance(query, exp.Query): 882 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 883 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 884 # which can produce different results (most likely a SnowFlake bug). 885 # 886 # https://docs.snowflake.com/en/sql-reference/functions/in 887 # Context: https://github.com/tobymao/sqlglot/issues/3890 888 return self.expression( 889 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 890 ) 891 892 return self.expression(exp.Not, this=this) 893 894 def _parse_tag(self) -> exp.Tags: 895 return self.expression( 896 exp.Tags, 897 expressions=self._parse_wrapped_csv(self._parse_property), 898 ) 899 900 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 901 if self._prev.token_type != TokenType.WITH: 902 self._retreat(self._index - 1) 903 904 if self._match_text_seq("MASKING", "POLICY"): 905 policy = self._parse_column() 906 return self.expression( 907 exp.MaskingPolicyColumnConstraint, 908 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 909 expressions=self._match(TokenType.USING) 910 and self._parse_wrapped_csv(self._parse_id_var), 911 ) 912 if self._match_text_seq("PROJECTION", "POLICY"): 913 policy = self._parse_column() 914 return self.expression( 915 exp.ProjectionPolicyColumnConstraint, 916 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 917 ) 918 if self._match(TokenType.TAG): 919 return self._parse_tag() 920 921 return None 922 923 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 924 if self._match(TokenType.TAG): 925 return self._parse_tag() 926 927 return super()._parse_with_property() 928 929 def _parse_create(self) -> exp.Create | exp.Command: 930 expression = super()._parse_create() 931 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 932 # Replace the Table node with the enclosed Identifier 933 expression.this.replace(expression.this.this) 934 935 return expression 936 937 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 938 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 939 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 940 this = self._parse_var() or self._parse_type() 941 942 if not this: 943 return None 944 945 self._match(TokenType.COMMA) 946 expression = self._parse_bitwise() 947 this = map_date_part(this) 948 name = this.name.upper() 949 950 if name.startswith("EPOCH"): 951 if name == "EPOCH_MILLISECOND": 952 scale = 10**3 953 elif name == "EPOCH_MICROSECOND": 954 scale = 10**6 955 elif name == "EPOCH_NANOSECOND": 956 scale = 10**9 957 else: 958 scale = None 959 960 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 961 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 962 963 if scale: 964 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 965 966 return to_unix 967 968 return self.expression(exp.Extract, this=this, expression=expression) 969 970 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 971 if is_map: 972 # Keys are strings in Snowflake's objects, see also: 973 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 974 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 975 return self._parse_slice(self._parse_string()) or self._parse_assignment() 976 977 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 978 979 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 980 lateral = super()._parse_lateral() 981 if not lateral: 982 return lateral 983 984 if isinstance(lateral.this, exp.Explode): 985 table_alias = lateral.args.get("alias") 986 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 987 if table_alias and not table_alias.args.get("columns"): 988 table_alias.set("columns", columns) 989 elif not table_alias: 990 exp.alias_(lateral, "_flattened", table=columns, copy=False) 991 992 return lateral 993 994 def _parse_table_parts( 995 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 996 ) -> exp.Table: 997 # https://docs.snowflake.com/en/user-guide/querying-stage 998 if self._match(TokenType.STRING, advance=False): 999 table = self._parse_string() 1000 elif self._match_text_seq("@", advance=False): 1001 table = self._parse_location_path() 1002 else: 1003 table = None 1004 1005 if table: 1006 file_format = None 1007 pattern = None 1008 1009 wrapped = self._match(TokenType.L_PAREN) 1010 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1011 if self._match_text_seq("FILE_FORMAT", "=>"): 1012 file_format = self._parse_string() or super()._parse_table_parts( 1013 is_db_reference=is_db_reference 1014 ) 1015 elif self._match_text_seq("PATTERN", "=>"): 1016 pattern = self._parse_string() 1017 else: 1018 break 1019 1020 self._match(TokenType.COMMA) 1021 1022 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1023 else: 1024 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1025 1026 return table 1027 1028 def _parse_table( 1029 self, 1030 schema: bool = False, 1031 joins: bool = False, 1032 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1033 parse_bracket: bool = False, 1034 is_db_reference: bool = False, 1035 parse_partition: bool = False, 1036 consume_pipe: bool = False, 1037 ) -> t.Optional[exp.Expression]: 1038 table = super()._parse_table( 1039 schema=schema, 1040 joins=joins, 1041 alias_tokens=alias_tokens, 1042 parse_bracket=parse_bracket, 1043 is_db_reference=is_db_reference, 1044 parse_partition=parse_partition, 1045 ) 1046 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1047 table_from_rows = table.this 1048 for arg in exp.TableFromRows.arg_types: 1049 if arg != "this": 1050 table_from_rows.set(arg, table.args.get(arg)) 1051 1052 table = table_from_rows 1053 1054 return table 1055 1056 def _parse_id_var( 1057 self, 1058 any_token: bool = True, 1059 tokens: t.Optional[t.Collection[TokenType]] = None, 1060 ) -> t.Optional[exp.Expression]: 1061 if self._match_text_seq("IDENTIFIER", "("): 1062 identifier = ( 1063 super()._parse_id_var(any_token=any_token, tokens=tokens) 1064 or self._parse_string() 1065 ) 1066 self._match_r_paren() 1067 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1068 1069 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1070 1071 def _parse_show_snowflake(self, this: str) -> exp.Show: 1072 scope = None 1073 scope_kind = None 1074 1075 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1076 # which is syntactically valid but has no effect on the output 1077 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1078 1079 history = self._match_text_seq("HISTORY") 1080 1081 like = self._parse_string() if self._match(TokenType.LIKE) else None 1082 1083 if self._match(TokenType.IN): 1084 if self._match_text_seq("ACCOUNT"): 1085 scope_kind = "ACCOUNT" 1086 elif self._match_text_seq("CLASS"): 1087 scope_kind = "CLASS" 1088 scope = self._parse_table_parts() 1089 elif self._match_text_seq("APPLICATION"): 1090 scope_kind = "APPLICATION" 1091 if self._match_text_seq("PACKAGE"): 1092 scope_kind += " PACKAGE" 1093 scope = self._parse_table_parts() 1094 elif self._match_set(self.DB_CREATABLES): 1095 scope_kind = self._prev.text.upper() 1096 if self._curr: 1097 scope = self._parse_table_parts() 1098 elif self._curr: 1099 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1100 scope = self._parse_table_parts() 1101 1102 return self.expression( 1103 exp.Show, 1104 **{ 1105 "terse": terse, 1106 "this": this, 1107 "history": history, 1108 "like": like, 1109 "scope": scope, 1110 "scope_kind": scope_kind, 1111 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1112 "limit": self._parse_limit(), 1113 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1114 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1115 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1116 }, 1117 ) 1118 1119 def _parse_put(self) -> exp.Put | exp.Command: 1120 if self._curr.token_type != TokenType.STRING: 1121 return self._parse_as_command(self._prev) 1122 1123 return self.expression( 1124 exp.Put, 1125 this=self._parse_string(), 1126 target=self._parse_location_path(), 1127 properties=self._parse_properties(), 1128 ) 1129 1130 def _parse_get(self) -> t.Optional[exp.Expression]: 1131 start = self._prev 1132 1133 # If we detect GET( then we need to parse a function, not a statement 1134 if self._match(TokenType.L_PAREN): 1135 self._retreat(self._index - 2) 1136 return self._parse_expression() 1137 1138 target = self._parse_location_path() 1139 1140 # Parse as command if unquoted file path 1141 if self._curr.token_type == TokenType.URI_START: 1142 return self._parse_as_command(start) 1143 1144 return self.expression( 1145 exp.Get, 1146 this=self._parse_string(), 1147 target=target, 1148 properties=self._parse_properties(), 1149 ) 1150 1151 def _parse_location_property(self) -> exp.LocationProperty: 1152 self._match(TokenType.EQ) 1153 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1154 1155 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1156 # Parse either a subquery or a staged file 1157 return ( 1158 self._parse_select(table=True, parse_subquery_alias=False) 1159 if self._match(TokenType.L_PAREN, advance=False) 1160 else self._parse_table_parts() 1161 ) 1162 1163 def _parse_location_path(self) -> exp.Var: 1164 start = self._curr 1165 self._advance_any(ignore_reserved=True) 1166 1167 # We avoid consuming a comma token because external tables like @foo and @bar 1168 # can be joined in a query with a comma separator, as well as closing paren 1169 # in case of subqueries 1170 while self._is_connected() and not self._match_set( 1171 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1172 ): 1173 self._advance_any(ignore_reserved=True) 1174 1175 return exp.var(self._find_sql(start, self._prev)) 1176 1177 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1178 this = super()._parse_lambda_arg() 1179 1180 if not this: 1181 return this 1182 1183 typ = self._parse_types() 1184 1185 if typ: 1186 return self.expression(exp.Cast, this=this, to=typ) 1187 1188 return this 1189 1190 def _parse_foreign_key(self) -> exp.ForeignKey: 1191 # inlineFK, the REFERENCES columns are implied 1192 if self._match(TokenType.REFERENCES, advance=False): 1193 return self.expression(exp.ForeignKey) 1194 1195 # outoflineFK, explicitly names the columns 1196 return super()._parse_foreign_key() 1197 1198 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1199 self._match(TokenType.EQ) 1200 if self._match(TokenType.L_PAREN, advance=False): 1201 expressions = self._parse_wrapped_options() 1202 else: 1203 expressions = [self._parse_format_name()] 1204 1205 return self.expression( 1206 exp.FileFormatProperty, 1207 expressions=expressions, 1208 ) 1209 1210 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1211 return self.expression( 1212 exp.CredentialsProperty, 1213 expressions=self._parse_wrapped_options(), 1214 ) 1215 1216 def _parse_semantic_view(self) -> exp.SemanticView: 1217 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1218 1219 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1220 if self._match_text_seq("DIMENSIONS"): 1221 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1222 if self._match_text_seq("METRICS"): 1223 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1224 if self._match_text_seq("WHERE"): 1225 kwargs["where"] = self._parse_expression() 1226 1227 return self.expression(exp.SemanticView, **kwargs) 1228 1229 class Tokenizer(tokens.Tokenizer): 1230 STRING_ESCAPES = ["\\", "'"] 1231 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1232 RAW_STRINGS = ["$$"] 1233 COMMENTS = ["--", "//", ("/*", "*/")] 1234 NESTED_COMMENTS = False 1235 1236 KEYWORDS = { 1237 **tokens.Tokenizer.KEYWORDS, 1238 "BYTEINT": TokenType.INT, 1239 "FILE://": TokenType.URI_START, 1240 "FILE FORMAT": TokenType.FILE_FORMAT, 1241 "GET": TokenType.GET, 1242 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1243 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1244 "MINUS": TokenType.EXCEPT, 1245 "NCHAR VARYING": TokenType.VARCHAR, 1246 "PUT": TokenType.PUT, 1247 "REMOVE": TokenType.COMMAND, 1248 "RM": TokenType.COMMAND, 1249 "SAMPLE": TokenType.TABLE_SAMPLE, 1250 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1251 "SQL_DOUBLE": TokenType.DOUBLE, 1252 "SQL_VARCHAR": TokenType.VARCHAR, 1253 "STAGE": TokenType.STAGE, 1254 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1255 "STREAMLIT": TokenType.STREAMLIT, 1256 "TAG": TokenType.TAG, 1257 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1258 "TOP": TokenType.TOP, 1259 "WAREHOUSE": TokenType.WAREHOUSE, 1260 } 1261 KEYWORDS.pop("/*+") 1262 1263 SINGLE_TOKENS = { 1264 **tokens.Tokenizer.SINGLE_TOKENS, 1265 "$": TokenType.PARAMETER, 1266 "!": TokenType.EXCLAMATION, 1267 } 1268 1269 VAR_SINGLE_TOKENS = {"$"} 1270 1271 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1272 1273 class Generator(generator.Generator): 1274 PARAMETER_TOKEN = "$" 1275 MATCHED_BY_SOURCE = False 1276 SINGLE_STRING_INTERVAL = True 1277 JOIN_HINTS = False 1278 TABLE_HINTS = False 1279 QUERY_HINTS = False 1280 AGGREGATE_FILTER_SUPPORTED = False 1281 SUPPORTS_TABLE_COPY = False 1282 COLLATE_IS_FUNC = True 1283 LIMIT_ONLY_LITERALS = True 1284 JSON_KEY_VALUE_PAIR_SEP = "," 1285 INSERT_OVERWRITE = " OVERWRITE INTO" 1286 STRUCT_DELIMITER = ("(", ")") 1287 COPY_PARAMS_ARE_WRAPPED = False 1288 COPY_PARAMS_EQ_REQUIRED = True 1289 STAR_EXCEPT = "EXCLUDE" 1290 SUPPORTS_EXPLODING_PROJECTIONS = False 1291 ARRAY_CONCAT_IS_VAR_LEN = False 1292 SUPPORTS_CONVERT_TIMEZONE = True 1293 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1294 SUPPORTS_MEDIAN = True 1295 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1296 SUPPORTS_DECODE_CASE = True 1297 IS_BOOL_ALLOWED = False 1298 1299 TRANSFORMS = { 1300 **generator.Generator.TRANSFORMS, 1301 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1302 exp.ArgMax: rename_func("MAX_BY"), 1303 exp.ArgMin: rename_func("MIN_BY"), 1304 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1305 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1306 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1307 exp.AtTimeZone: lambda self, e: self.func( 1308 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1309 ), 1310 exp.BitwiseOr: rename_func("BITOR"), 1311 exp.BitwiseXor: rename_func("BITXOR"), 1312 exp.BitwiseAnd: rename_func("BITAND"), 1313 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1314 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1315 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1316 exp.BitwiseNot: rename_func("BITNOT"), 1317 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1318 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1319 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1320 exp.DateAdd: date_delta_sql("DATEADD"), 1321 exp.DateDiff: date_delta_sql("DATEDIFF"), 1322 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1323 exp.DatetimeDiff: timestampdiff_sql, 1324 exp.DateStrToDate: datestrtodate_sql, 1325 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1326 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1327 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1328 exp.DayOfYear: rename_func("DAYOFYEAR"), 1329 exp.Explode: rename_func("FLATTEN"), 1330 exp.Extract: lambda self, e: self.func( 1331 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1332 ), 1333 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1334 exp.FileFormatProperty: lambda self, 1335 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1336 exp.FromTimeZone: lambda self, e: self.func( 1337 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1338 ), 1339 exp.GenerateSeries: lambda self, e: self.func( 1340 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1341 ), 1342 exp.GetExtract: rename_func("GET"), 1343 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1344 exp.If: if_sql(name="IFF", false_value="NULL"), 1345 exp.JSONExtractArray: _json_extract_value_array_sql, 1346 exp.JSONExtractScalar: lambda self, e: self.func( 1347 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1348 ), 1349 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1350 exp.JSONPathRoot: lambda *_: "", 1351 exp.JSONValueArray: _json_extract_value_array_sql, 1352 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1353 rename_func("EDITDISTANCE") 1354 ), 1355 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1356 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1357 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1358 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1359 exp.MakeInterval: no_make_interval_sql, 1360 exp.Max: max_or_greatest, 1361 exp.Min: min_or_least, 1362 exp.ParseJSON: lambda self, e: self.func( 1363 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1364 ), 1365 exp.JSONFormat: rename_func("TO_JSON"), 1366 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1367 exp.PercentileCont: transforms.preprocess( 1368 [transforms.add_within_group_for_percentiles] 1369 ), 1370 exp.PercentileDisc: transforms.preprocess( 1371 [transforms.add_within_group_for_percentiles] 1372 ), 1373 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1374 exp.RegexpExtract: _regexpextract_sql, 1375 exp.RegexpExtractAll: _regexpextract_sql, 1376 exp.RegexpILike: _regexpilike_sql, 1377 exp.Rand: rename_func("RANDOM"), 1378 exp.Select: transforms.preprocess( 1379 [ 1380 transforms.eliminate_window_clause, 1381 transforms.eliminate_distinct_on, 1382 transforms.explode_projection_to_unnest(), 1383 transforms.eliminate_semi_and_anti_joins, 1384 _transform_generate_date_array, 1385 _qualify_unnested_columns, 1386 _eliminate_dot_variant_lookup, 1387 ] 1388 ), 1389 exp.SHA: rename_func("SHA1"), 1390 exp.MD5Digest: rename_func("MD5_BINARY"), 1391 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1392 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1393 exp.LowerHex: rename_func("TO_CHAR"), 1394 exp.SortArray: rename_func("ARRAY_SORT"), 1395 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1396 exp.StartsWith: rename_func("STARTSWITH"), 1397 exp.EndsWith: rename_func("ENDSWITH"), 1398 exp.StrPosition: lambda self, e: strposition_sql( 1399 self, e, func_name="CHARINDEX", supports_position=True 1400 ), 1401 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1402 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1403 exp.Stuff: rename_func("INSERT"), 1404 exp.StPoint: rename_func("ST_MAKEPOINT"), 1405 exp.TimeAdd: date_delta_sql("TIMEADD"), 1406 exp.Timestamp: no_timestamp_sql, 1407 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1408 exp.TimestampDiff: lambda self, e: self.func( 1409 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1410 ), 1411 exp.TimestampTrunc: timestamptrunc_sql(), 1412 exp.TimeStrToTime: timestrtotime_sql, 1413 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1414 exp.ToArray: rename_func("TO_ARRAY"), 1415 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1416 exp.ToDouble: rename_func("TO_DOUBLE"), 1417 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1418 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1419 exp.TsOrDsToDate: lambda self, e: self.func( 1420 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1421 ), 1422 exp.TsOrDsToTime: lambda self, e: self.func( 1423 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1424 ), 1425 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1426 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1427 exp.Uuid: rename_func("UUID_STRING"), 1428 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1429 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1430 exp.Xor: rename_func("BOOLXOR"), 1431 exp.ByteLength: rename_func("OCTET_LENGTH"), 1432 } 1433 1434 SUPPORTED_JSON_PATH_PARTS = { 1435 exp.JSONPathKey, 1436 exp.JSONPathRoot, 1437 exp.JSONPathSubscript, 1438 } 1439 1440 TYPE_MAPPING = { 1441 **generator.Generator.TYPE_MAPPING, 1442 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1443 exp.DataType.Type.NESTED: "OBJECT", 1444 exp.DataType.Type.STRUCT: "OBJECT", 1445 exp.DataType.Type.TEXT: "VARCHAR", 1446 } 1447 1448 TOKEN_MAPPING = { 1449 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1450 } 1451 1452 PROPERTIES_LOCATION = { 1453 **generator.Generator.PROPERTIES_LOCATION, 1454 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1455 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1456 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1457 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1458 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1459 } 1460 1461 UNSUPPORTED_VALUES_EXPRESSIONS = { 1462 exp.Map, 1463 exp.StarMap, 1464 exp.Struct, 1465 exp.VarMap, 1466 } 1467 1468 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1469 1470 def with_properties(self, properties: exp.Properties) -> str: 1471 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1472 1473 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1474 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1475 values_as_table = False 1476 1477 return super().values_sql(expression, values_as_table=values_as_table) 1478 1479 def datatype_sql(self, expression: exp.DataType) -> str: 1480 expressions = expression.expressions 1481 if ( 1482 expressions 1483 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1484 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1485 ): 1486 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1487 return "OBJECT" 1488 1489 return super().datatype_sql(expression) 1490 1491 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1492 return self.func( 1493 "TO_NUMBER", 1494 expression.this, 1495 expression.args.get("format"), 1496 expression.args.get("precision"), 1497 expression.args.get("scale"), 1498 ) 1499 1500 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1501 milli = expression.args.get("milli") 1502 if milli is not None: 1503 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1504 expression.set("nano", milli_to_nano) 1505 1506 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1507 1508 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1509 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1510 return self.func("TO_GEOGRAPHY", expression.this) 1511 if expression.is_type(exp.DataType.Type.GEOMETRY): 1512 return self.func("TO_GEOMETRY", expression.this) 1513 1514 return super().cast_sql(expression, safe_prefix=safe_prefix) 1515 1516 def trycast_sql(self, expression: exp.TryCast) -> str: 1517 value = expression.this 1518 1519 if value.type is None: 1520 from sqlglot.optimizer.annotate_types import annotate_types 1521 1522 value = annotate_types(value, dialect=self.dialect) 1523 1524 # Snowflake requires that TRY_CAST's value be a string 1525 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1526 # if we can deduce that the value is a string, then we can generate TRY_CAST 1527 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1528 return super().trycast_sql(expression) 1529 1530 return self.cast_sql(expression) 1531 1532 def log_sql(self, expression: exp.Log) -> str: 1533 if not expression.expression: 1534 return self.func("LN", expression.this) 1535 1536 return super().log_sql(expression) 1537 1538 def unnest_sql(self, expression: exp.Unnest) -> str: 1539 unnest_alias = expression.args.get("alias") 1540 offset = expression.args.get("offset") 1541 1542 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1543 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1544 1545 columns = [ 1546 exp.to_identifier("seq"), 1547 exp.to_identifier("key"), 1548 exp.to_identifier("path"), 1549 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1550 value, 1551 exp.to_identifier("this"), 1552 ] 1553 1554 if unnest_alias: 1555 unnest_alias.set("columns", columns) 1556 else: 1557 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1558 1559 table_input = self.sql(expression.expressions[0]) 1560 if not table_input.startswith("INPUT =>"): 1561 table_input = f"INPUT => {table_input}" 1562 1563 expression_parent = expression.parent 1564 1565 explode = ( 1566 f"FLATTEN({table_input})" 1567 if isinstance(expression_parent, exp.Lateral) 1568 else f"TABLE(FLATTEN({table_input}))" 1569 ) 1570 alias = self.sql(unnest_alias) 1571 alias = f" AS {alias}" if alias else "" 1572 value = ( 1573 "" 1574 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1575 else f"{value} FROM " 1576 ) 1577 1578 return f"{value}{explode}{alias}" 1579 1580 def show_sql(self, expression: exp.Show) -> str: 1581 terse = "TERSE " if expression.args.get("terse") else "" 1582 history = " HISTORY" if expression.args.get("history") else "" 1583 like = self.sql(expression, "like") 1584 like = f" LIKE {like}" if like else "" 1585 1586 scope = self.sql(expression, "scope") 1587 scope = f" {scope}" if scope else "" 1588 1589 scope_kind = self.sql(expression, "scope_kind") 1590 if scope_kind: 1591 scope_kind = f" IN {scope_kind}" 1592 1593 starts_with = self.sql(expression, "starts_with") 1594 if starts_with: 1595 starts_with = f" STARTS WITH {starts_with}" 1596 1597 limit = self.sql(expression, "limit") 1598 1599 from_ = self.sql(expression, "from") 1600 if from_: 1601 from_ = f" FROM {from_}" 1602 1603 privileges = self.expressions(expression, key="privileges", flat=True) 1604 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1605 1606 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1607 1608 def describe_sql(self, expression: exp.Describe) -> str: 1609 # Default to table if kind is unknown 1610 kind_value = expression.args.get("kind") or "TABLE" 1611 kind = f" {kind_value}" if kind_value else "" 1612 this = f" {self.sql(expression, 'this')}" 1613 expressions = self.expressions(expression, flat=True) 1614 expressions = f" {expressions}" if expressions else "" 1615 return f"DESCRIBE{kind}{this}{expressions}" 1616 1617 def generatedasidentitycolumnconstraint_sql( 1618 self, expression: exp.GeneratedAsIdentityColumnConstraint 1619 ) -> str: 1620 start = expression.args.get("start") 1621 start = f" START {start}" if start else "" 1622 increment = expression.args.get("increment") 1623 increment = f" INCREMENT {increment}" if increment else "" 1624 1625 order = expression.args.get("order") 1626 if order is not None: 1627 order_clause = " ORDER" if order else " NOORDER" 1628 else: 1629 order_clause = "" 1630 1631 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1632 1633 def cluster_sql(self, expression: exp.Cluster) -> str: 1634 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1635 1636 def struct_sql(self, expression: exp.Struct) -> str: 1637 if len(expression.expressions) == 1: 1638 arg = expression.expressions[0] 1639 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1640 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1641 return f"{{{self.sql(expression.expressions[0])}}}" 1642 1643 keys = [] 1644 values = [] 1645 1646 for i, e in enumerate(expression.expressions): 1647 if isinstance(e, exp.PropertyEQ): 1648 keys.append( 1649 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1650 ) 1651 values.append(e.expression) 1652 else: 1653 keys.append(exp.Literal.string(f"_{i}")) 1654 values.append(e) 1655 1656 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1657 1658 @unsupported_args("weight", "accuracy") 1659 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1660 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1661 1662 def alterset_sql(self, expression: exp.AlterSet) -> str: 1663 exprs = self.expressions(expression, flat=True) 1664 exprs = f" {exprs}" if exprs else "" 1665 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1666 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1667 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1668 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1669 tag = self.expressions(expression, key="tag", flat=True) 1670 tag = f" TAG {tag}" if tag else "" 1671 1672 return f"SET{exprs}{file_format}{copy_options}{tag}" 1673 1674 def strtotime_sql(self, expression: exp.StrToTime): 1675 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1676 return self.func( 1677 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1678 ) 1679 1680 def timestampsub_sql(self, expression: exp.TimestampSub): 1681 return self.sql( 1682 exp.TimestampAdd( 1683 this=expression.this, 1684 expression=expression.expression * -1, 1685 unit=expression.unit, 1686 ) 1687 ) 1688 1689 def jsonextract_sql(self, expression: exp.JSONExtract): 1690 this = expression.this 1691 1692 # JSON strings are valid coming from other dialects such as BQ so 1693 # for these cases we PARSE_JSON preemptively 1694 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1695 "requires_json" 1696 ): 1697 this = exp.ParseJSON(this=this) 1698 1699 return self.func( 1700 "GET_PATH", 1701 this, 1702 expression.expression, 1703 ) 1704 1705 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1706 this = expression.this 1707 if this.is_string: 1708 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1709 1710 return self.func("TO_CHAR", this, self.format_time(expression)) 1711 1712 def datesub_sql(self, expression: exp.DateSub) -> str: 1713 value = expression.expression 1714 if value: 1715 value.replace(value * (-1)) 1716 else: 1717 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1718 1719 return date_delta_sql("DATEADD")(self, expression) 1720 1721 def select_sql(self, expression: exp.Select) -> str: 1722 limit = expression.args.get("limit") 1723 offset = expression.args.get("offset") 1724 if offset and not limit: 1725 expression.limit(exp.Null(), copy=False) 1726 return super().select_sql(expression) 1727 1728 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1729 is_materialized = expression.find(exp.MaterializedProperty) 1730 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1731 1732 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1733 # For materialized views, COPY GRANTS is located *before* the columns list 1734 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1735 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1736 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1737 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1738 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1739 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1740 1741 this_name = self.sql(expression.this, "this") 1742 copy_grants = self.sql(copy_grants_property) 1743 this_schema = self.schema_columns_sql(expression.this) 1744 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1745 1746 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1747 1748 return super().createable_sql(expression, locations) 1749 1750 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1751 this = expression.this 1752 1753 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1754 # and add it later as part of the WITHIN GROUP clause 1755 order = this if isinstance(this, exp.Order) else None 1756 if order: 1757 expression.set("this", order.this.pop()) 1758 1759 expr_sql = super().arrayagg_sql(expression) 1760 1761 if order: 1762 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1763 1764 return expr_sql 1765 1766 def array_sql(self, expression: exp.Array) -> str: 1767 expressions = expression.expressions 1768 1769 first_expr = seq_get(expressions, 0) 1770 if isinstance(first_expr, exp.Select): 1771 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1772 if first_expr.text("kind").upper() == "STRUCT": 1773 object_construct_args = [] 1774 for expr in first_expr.expressions: 1775 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1776 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1777 name = expr.this if isinstance(expr, exp.Alias) else expr 1778 1779 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1780 1781 array_agg = exp.ArrayAgg( 1782 this=_build_object_construct(args=object_construct_args) 1783 ) 1784 1785 first_expr.set("kind", None) 1786 first_expr.set("expressions", [array_agg]) 1787 1788 return self.sql(first_expr.subquery()) 1789 1790 return inline_array_sql(self, expression) 1791 1792 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1793 zone = self.sql(expression, "this") 1794 if not zone: 1795 return super().currentdate_sql(expression) 1796 1797 expr = exp.Cast( 1798 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1799 to=exp.DataType(this=exp.DataType.Type.DATE), 1800 ) 1801 return self.sql(expr) 1802 1803 def dot_sql(self, expression: exp.Dot) -> str: 1804 this = expression.this 1805 1806 if not this.type: 1807 from sqlglot.optimizer.annotate_types import annotate_types 1808 1809 this = annotate_types(this, dialect=self.dialect) 1810 1811 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1812 # Generate colon notation for the top level STRUCT 1813 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1814 1815 return super().dot_sql(expression) 1816 1817 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1818 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime formats.
609 def quote_identifier(self, expression: E, identify: bool = True) -> E: 610 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 611 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 612 if ( 613 isinstance(expression, exp.Identifier) 614 and isinstance(expression.parent, exp.Table) 615 and expression.name.lower() == "dual" 616 ): 617 return expression # type: ignore 618 619 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier, this method is a no-op. - identify: If set to
False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n) to its unescaped version (
).
621 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 622 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 623 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
625 class Parser(parser.Parser): 626 IDENTIFY_PIVOT_STRINGS = True 627 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 628 COLON_IS_VARIANT_EXTRACT = True 629 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 630 631 ID_VAR_TOKENS = { 632 *parser.Parser.ID_VAR_TOKENS, 633 TokenType.EXCEPT, 634 TokenType.MATCH_CONDITION, 635 } 636 637 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 638 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 639 640 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 641 642 FUNCTIONS = { 643 **parser.Parser.FUNCTIONS, 644 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 645 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 646 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 647 this=seq_get(args, 1), expression=seq_get(args, 0) 648 ), 649 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 650 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 651 start=seq_get(args, 0), 652 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 653 step=seq_get(args, 2), 654 ), 655 "ARRAY_SORT": exp.SortArray.from_arg_list, 656 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 657 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 658 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 659 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 660 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 661 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 662 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 663 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 664 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 665 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 666 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 667 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 668 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 669 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 670 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 671 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 672 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 673 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 674 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 675 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 676 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 677 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 678 "DATE_TRUNC": _date_trunc_to_time, 679 "DATEADD": _build_date_time_add(exp.DateAdd), 680 "DATEDIFF": _build_datediff, 681 "DIV0": _build_if_from_div0, 682 "EDITDISTANCE": lambda args: exp.Levenshtein( 683 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 684 ), 685 "FLATTEN": exp.Explode.from_arg_list, 686 "GET": exp.GetExtract.from_arg_list, 687 "GET_PATH": lambda args, dialect: exp.JSONExtract( 688 this=seq_get(args, 0), 689 expression=dialect.to_json_path(seq_get(args, 1)), 690 requires_json=True, 691 ), 692 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 693 "IFF": exp.If.from_arg_list, 694 "MD5_HEX": exp.MD5.from_arg_list, 695 "MD5_BINARY": exp.MD5Digest.from_arg_list, 696 "MD5_NUMBER_LOWER64": exp.MD5NumberLower64.from_arg_list, 697 "MD5_NUMBER_UPPER64": exp.MD5NumberUpper64.from_arg_list, 698 "LAST_DAY": lambda args: exp.LastDay( 699 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 700 ), 701 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 702 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 703 "NULLIFZERO": _build_if_from_nullifzero, 704 "OBJECT_CONSTRUCT": _build_object_construct, 705 "OCTET_LENGTH": exp.ByteLength.from_arg_list, 706 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 707 "REGEXP_REPLACE": _build_regexp_replace, 708 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 709 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 710 "REPLACE": build_replace_with_optional_replacement, 711 "RLIKE": exp.RegexpLike.from_arg_list, 712 "SHA1_BINARY": exp.SHA1Digest.from_arg_list, 713 "SHA1_HEX": exp.SHA.from_arg_list, 714 "SHA2_BINARY": exp.SHA2Digest.from_arg_list, 715 "SHA2_HEX": exp.SHA2.from_arg_list, 716 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 717 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 718 "TIMEADD": _build_date_time_add(exp.TimeAdd), 719 "TIMEDIFF": _build_datediff, 720 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 721 "TIMESTAMPDIFF": _build_datediff, 722 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 723 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 724 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 725 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 726 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 727 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 728 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 729 "TRY_TO_TIMESTAMP": _build_datetime( 730 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 731 ), 732 "TO_CHAR": build_timetostr_or_tochar, 733 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 734 "TO_NUMBER": lambda args: exp.ToNumber( 735 this=seq_get(args, 0), 736 format=seq_get(args, 1), 737 precision=seq_get(args, 2), 738 scale=seq_get(args, 3), 739 ), 740 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 741 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 742 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 743 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 744 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 745 "TO_VARCHAR": build_timetostr_or_tochar, 746 "TO_JSON": exp.JSONFormat.from_arg_list, 747 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 748 "ZEROIFNULL": _build_if_from_zeroifnull, 749 } 750 FUNCTIONS.pop("PREDICT") 751 752 FUNCTION_PARSERS = { 753 **parser.Parser.FUNCTION_PARSERS, 754 "DATE_PART": lambda self: self._parse_date_part(), 755 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 756 "LISTAGG": lambda self: self._parse_string_agg(), 757 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 758 } 759 FUNCTION_PARSERS.pop("TRIM") 760 761 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 762 763 ALTER_PARSERS = { 764 **parser.Parser.ALTER_PARSERS, 765 "SESSION": lambda self: self._parse_alter_session(), 766 "UNSET": lambda self: self.expression( 767 exp.Set, 768 tag=self._match_text_seq("TAG"), 769 expressions=self._parse_csv(self._parse_id_var), 770 unset=True, 771 ), 772 } 773 774 STATEMENT_PARSERS = { 775 **parser.Parser.STATEMENT_PARSERS, 776 TokenType.GET: lambda self: self._parse_get(), 777 TokenType.PUT: lambda self: self._parse_put(), 778 TokenType.SHOW: lambda self: self._parse_show(), 779 } 780 781 PROPERTY_PARSERS = { 782 **parser.Parser.PROPERTY_PARSERS, 783 "CREDENTIALS": lambda self: self._parse_credentials_property(), 784 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 785 "LOCATION": lambda self: self._parse_location_property(), 786 "TAG": lambda self: self._parse_tag(), 787 "USING": lambda self: self._match_text_seq("TEMPLATE") 788 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 789 } 790 791 TYPE_CONVERTERS = { 792 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 793 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 794 } 795 796 SHOW_PARSERS = { 797 "DATABASES": _show_parser("DATABASES"), 798 "TERSE DATABASES": _show_parser("DATABASES"), 799 "SCHEMAS": _show_parser("SCHEMAS"), 800 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 801 "OBJECTS": _show_parser("OBJECTS"), 802 "TERSE OBJECTS": _show_parser("OBJECTS"), 803 "TABLES": _show_parser("TABLES"), 804 "TERSE TABLES": _show_parser("TABLES"), 805 "VIEWS": _show_parser("VIEWS"), 806 "TERSE VIEWS": _show_parser("VIEWS"), 807 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 808 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 809 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 810 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 811 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 812 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 813 "SEQUENCES": _show_parser("SEQUENCES"), 814 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 815 "STAGES": _show_parser("STAGES"), 816 "COLUMNS": _show_parser("COLUMNS"), 817 "USERS": _show_parser("USERS"), 818 "TERSE USERS": _show_parser("USERS"), 819 "FILE FORMATS": _show_parser("FILE FORMATS"), 820 "FUNCTIONS": _show_parser("FUNCTIONS"), 821 "PROCEDURES": _show_parser("PROCEDURES"), 822 "WAREHOUSES": _show_parser("WAREHOUSES"), 823 } 824 825 CONSTRAINT_PARSERS = { 826 **parser.Parser.CONSTRAINT_PARSERS, 827 "WITH": lambda self: self._parse_with_constraint(), 828 "MASKING": lambda self: self._parse_with_constraint(), 829 "PROJECTION": lambda self: self._parse_with_constraint(), 830 "TAG": lambda self: self._parse_with_constraint(), 831 } 832 833 STAGED_FILE_SINGLE_TOKENS = { 834 TokenType.DOT, 835 TokenType.MOD, 836 TokenType.SLASH, 837 } 838 839 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 840 841 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 842 843 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 844 845 LAMBDAS = { 846 **parser.Parser.LAMBDAS, 847 TokenType.ARROW: lambda self, expressions: self.expression( 848 exp.Lambda, 849 this=self._replace_lambda( 850 self._parse_assignment(), 851 expressions, 852 ), 853 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 854 ), 855 } 856 857 COLUMN_OPERATORS = { 858 **parser.Parser.COLUMN_OPERATORS, 859 TokenType.EXCLAMATION: lambda self, this, attr: self.expression( 860 exp.ModelAttribute, this=this, expression=attr 861 ), 862 } 863 864 def _parse_use(self) -> exp.Use: 865 if self._match_text_seq("SECONDARY", "ROLES"): 866 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 867 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 868 return self.expression( 869 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 870 ) 871 872 return super()._parse_use() 873 874 def _negate_range( 875 self, this: t.Optional[exp.Expression] = None 876 ) -> t.Optional[exp.Expression]: 877 if not this: 878 return this 879 880 query = this.args.get("query") 881 if isinstance(this, exp.In) and isinstance(query, exp.Query): 882 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 883 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 884 # which can produce different results (most likely a SnowFlake bug). 885 # 886 # https://docs.snowflake.com/en/sql-reference/functions/in 887 # Context: https://github.com/tobymao/sqlglot/issues/3890 888 return self.expression( 889 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 890 ) 891 892 return self.expression(exp.Not, this=this) 893 894 def _parse_tag(self) -> exp.Tags: 895 return self.expression( 896 exp.Tags, 897 expressions=self._parse_wrapped_csv(self._parse_property), 898 ) 899 900 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 901 if self._prev.token_type != TokenType.WITH: 902 self._retreat(self._index - 1) 903 904 if self._match_text_seq("MASKING", "POLICY"): 905 policy = self._parse_column() 906 return self.expression( 907 exp.MaskingPolicyColumnConstraint, 908 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 909 expressions=self._match(TokenType.USING) 910 and self._parse_wrapped_csv(self._parse_id_var), 911 ) 912 if self._match_text_seq("PROJECTION", "POLICY"): 913 policy = self._parse_column() 914 return self.expression( 915 exp.ProjectionPolicyColumnConstraint, 916 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 917 ) 918 if self._match(TokenType.TAG): 919 return self._parse_tag() 920 921 return None 922 923 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 924 if self._match(TokenType.TAG): 925 return self._parse_tag() 926 927 return super()._parse_with_property() 928 929 def _parse_create(self) -> exp.Create | exp.Command: 930 expression = super()._parse_create() 931 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 932 # Replace the Table node with the enclosed Identifier 933 expression.this.replace(expression.this.this) 934 935 return expression 936 937 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 938 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 939 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 940 this = self._parse_var() or self._parse_type() 941 942 if not this: 943 return None 944 945 self._match(TokenType.COMMA) 946 expression = self._parse_bitwise() 947 this = map_date_part(this) 948 name = this.name.upper() 949 950 if name.startswith("EPOCH"): 951 if name == "EPOCH_MILLISECOND": 952 scale = 10**3 953 elif name == "EPOCH_MICROSECOND": 954 scale = 10**6 955 elif name == "EPOCH_NANOSECOND": 956 scale = 10**9 957 else: 958 scale = None 959 960 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 961 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 962 963 if scale: 964 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 965 966 return to_unix 967 968 return self.expression(exp.Extract, this=this, expression=expression) 969 970 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 971 if is_map: 972 # Keys are strings in Snowflake's objects, see also: 973 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 974 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 975 return self._parse_slice(self._parse_string()) or self._parse_assignment() 976 977 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 978 979 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 980 lateral = super()._parse_lateral() 981 if not lateral: 982 return lateral 983 984 if isinstance(lateral.this, exp.Explode): 985 table_alias = lateral.args.get("alias") 986 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 987 if table_alias and not table_alias.args.get("columns"): 988 table_alias.set("columns", columns) 989 elif not table_alias: 990 exp.alias_(lateral, "_flattened", table=columns, copy=False) 991 992 return lateral 993 994 def _parse_table_parts( 995 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 996 ) -> exp.Table: 997 # https://docs.snowflake.com/en/user-guide/querying-stage 998 if self._match(TokenType.STRING, advance=False): 999 table = self._parse_string() 1000 elif self._match_text_seq("@", advance=False): 1001 table = self._parse_location_path() 1002 else: 1003 table = None 1004 1005 if table: 1006 file_format = None 1007 pattern = None 1008 1009 wrapped = self._match(TokenType.L_PAREN) 1010 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 1011 if self._match_text_seq("FILE_FORMAT", "=>"): 1012 file_format = self._parse_string() or super()._parse_table_parts( 1013 is_db_reference=is_db_reference 1014 ) 1015 elif self._match_text_seq("PATTERN", "=>"): 1016 pattern = self._parse_string() 1017 else: 1018 break 1019 1020 self._match(TokenType.COMMA) 1021 1022 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 1023 else: 1024 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 1025 1026 return table 1027 1028 def _parse_table( 1029 self, 1030 schema: bool = False, 1031 joins: bool = False, 1032 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 1033 parse_bracket: bool = False, 1034 is_db_reference: bool = False, 1035 parse_partition: bool = False, 1036 consume_pipe: bool = False, 1037 ) -> t.Optional[exp.Expression]: 1038 table = super()._parse_table( 1039 schema=schema, 1040 joins=joins, 1041 alias_tokens=alias_tokens, 1042 parse_bracket=parse_bracket, 1043 is_db_reference=is_db_reference, 1044 parse_partition=parse_partition, 1045 ) 1046 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 1047 table_from_rows = table.this 1048 for arg in exp.TableFromRows.arg_types: 1049 if arg != "this": 1050 table_from_rows.set(arg, table.args.get(arg)) 1051 1052 table = table_from_rows 1053 1054 return table 1055 1056 def _parse_id_var( 1057 self, 1058 any_token: bool = True, 1059 tokens: t.Optional[t.Collection[TokenType]] = None, 1060 ) -> t.Optional[exp.Expression]: 1061 if self._match_text_seq("IDENTIFIER", "("): 1062 identifier = ( 1063 super()._parse_id_var(any_token=any_token, tokens=tokens) 1064 or self._parse_string() 1065 ) 1066 self._match_r_paren() 1067 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 1068 1069 return super()._parse_id_var(any_token=any_token, tokens=tokens) 1070 1071 def _parse_show_snowflake(self, this: str) -> exp.Show: 1072 scope = None 1073 scope_kind = None 1074 1075 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 1076 # which is syntactically valid but has no effect on the output 1077 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 1078 1079 history = self._match_text_seq("HISTORY") 1080 1081 like = self._parse_string() if self._match(TokenType.LIKE) else None 1082 1083 if self._match(TokenType.IN): 1084 if self._match_text_seq("ACCOUNT"): 1085 scope_kind = "ACCOUNT" 1086 elif self._match_text_seq("CLASS"): 1087 scope_kind = "CLASS" 1088 scope = self._parse_table_parts() 1089 elif self._match_text_seq("APPLICATION"): 1090 scope_kind = "APPLICATION" 1091 if self._match_text_seq("PACKAGE"): 1092 scope_kind += " PACKAGE" 1093 scope = self._parse_table_parts() 1094 elif self._match_set(self.DB_CREATABLES): 1095 scope_kind = self._prev.text.upper() 1096 if self._curr: 1097 scope = self._parse_table_parts() 1098 elif self._curr: 1099 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1100 scope = self._parse_table_parts() 1101 1102 return self.expression( 1103 exp.Show, 1104 **{ 1105 "terse": terse, 1106 "this": this, 1107 "history": history, 1108 "like": like, 1109 "scope": scope, 1110 "scope_kind": scope_kind, 1111 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1112 "limit": self._parse_limit(), 1113 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1114 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1115 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1116 }, 1117 ) 1118 1119 def _parse_put(self) -> exp.Put | exp.Command: 1120 if self._curr.token_type != TokenType.STRING: 1121 return self._parse_as_command(self._prev) 1122 1123 return self.expression( 1124 exp.Put, 1125 this=self._parse_string(), 1126 target=self._parse_location_path(), 1127 properties=self._parse_properties(), 1128 ) 1129 1130 def _parse_get(self) -> t.Optional[exp.Expression]: 1131 start = self._prev 1132 1133 # If we detect GET( then we need to parse a function, not a statement 1134 if self._match(TokenType.L_PAREN): 1135 self._retreat(self._index - 2) 1136 return self._parse_expression() 1137 1138 target = self._parse_location_path() 1139 1140 # Parse as command if unquoted file path 1141 if self._curr.token_type == TokenType.URI_START: 1142 return self._parse_as_command(start) 1143 1144 return self.expression( 1145 exp.Get, 1146 this=self._parse_string(), 1147 target=target, 1148 properties=self._parse_properties(), 1149 ) 1150 1151 def _parse_location_property(self) -> exp.LocationProperty: 1152 self._match(TokenType.EQ) 1153 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1154 1155 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1156 # Parse either a subquery or a staged file 1157 return ( 1158 self._parse_select(table=True, parse_subquery_alias=False) 1159 if self._match(TokenType.L_PAREN, advance=False) 1160 else self._parse_table_parts() 1161 ) 1162 1163 def _parse_location_path(self) -> exp.Var: 1164 start = self._curr 1165 self._advance_any(ignore_reserved=True) 1166 1167 # We avoid consuming a comma token because external tables like @foo and @bar 1168 # can be joined in a query with a comma separator, as well as closing paren 1169 # in case of subqueries 1170 while self._is_connected() and not self._match_set( 1171 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1172 ): 1173 self._advance_any(ignore_reserved=True) 1174 1175 return exp.var(self._find_sql(start, self._prev)) 1176 1177 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1178 this = super()._parse_lambda_arg() 1179 1180 if not this: 1181 return this 1182 1183 typ = self._parse_types() 1184 1185 if typ: 1186 return self.expression(exp.Cast, this=this, to=typ) 1187 1188 return this 1189 1190 def _parse_foreign_key(self) -> exp.ForeignKey: 1191 # inlineFK, the REFERENCES columns are implied 1192 if self._match(TokenType.REFERENCES, advance=False): 1193 return self.expression(exp.ForeignKey) 1194 1195 # outoflineFK, explicitly names the columns 1196 return super()._parse_foreign_key() 1197 1198 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1199 self._match(TokenType.EQ) 1200 if self._match(TokenType.L_PAREN, advance=False): 1201 expressions = self._parse_wrapped_options() 1202 else: 1203 expressions = [self._parse_format_name()] 1204 1205 return self.expression( 1206 exp.FileFormatProperty, 1207 expressions=expressions, 1208 ) 1209 1210 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1211 return self.expression( 1212 exp.CredentialsProperty, 1213 expressions=self._parse_wrapped_options(), 1214 ) 1215 1216 def _parse_semantic_view(self) -> exp.SemanticView: 1217 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1218 1219 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1220 if self._match_text_seq("DIMENSIONS"): 1221 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1222 if self._match_text_seq("METRICS"): 1223 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1224 if self._match_text_seq("WHERE"): 1225 kwargs["where"] = self._parse_expression() 1226 1227 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- ADD_JOIN_ON_TRUE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1229 class Tokenizer(tokens.Tokenizer): 1230 STRING_ESCAPES = ["\\", "'"] 1231 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1232 RAW_STRINGS = ["$$"] 1233 COMMENTS = ["--", "//", ("/*", "*/")] 1234 NESTED_COMMENTS = False 1235 1236 KEYWORDS = { 1237 **tokens.Tokenizer.KEYWORDS, 1238 "BYTEINT": TokenType.INT, 1239 "FILE://": TokenType.URI_START, 1240 "FILE FORMAT": TokenType.FILE_FORMAT, 1241 "GET": TokenType.GET, 1242 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1243 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1244 "MINUS": TokenType.EXCEPT, 1245 "NCHAR VARYING": TokenType.VARCHAR, 1246 "PUT": TokenType.PUT, 1247 "REMOVE": TokenType.COMMAND, 1248 "RM": TokenType.COMMAND, 1249 "SAMPLE": TokenType.TABLE_SAMPLE, 1250 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1251 "SQL_DOUBLE": TokenType.DOUBLE, 1252 "SQL_VARCHAR": TokenType.VARCHAR, 1253 "STAGE": TokenType.STAGE, 1254 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1255 "STREAMLIT": TokenType.STREAMLIT, 1256 "TAG": TokenType.TAG, 1257 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1258 "TOP": TokenType.TOP, 1259 "WAREHOUSE": TokenType.WAREHOUSE, 1260 } 1261 KEYWORDS.pop("/*+") 1262 1263 SINGLE_TOKENS = { 1264 **tokens.Tokenizer.SINGLE_TOKENS, 1265 "$": TokenType.PARAMETER, 1266 "!": TokenType.EXCLAMATION, 1267 } 1268 1269 VAR_SINGLE_TOKENS = {"$"} 1270 1271 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1273 class Generator(generator.Generator): 1274 PARAMETER_TOKEN = "$" 1275 MATCHED_BY_SOURCE = False 1276 SINGLE_STRING_INTERVAL = True 1277 JOIN_HINTS = False 1278 TABLE_HINTS = False 1279 QUERY_HINTS = False 1280 AGGREGATE_FILTER_SUPPORTED = False 1281 SUPPORTS_TABLE_COPY = False 1282 COLLATE_IS_FUNC = True 1283 LIMIT_ONLY_LITERALS = True 1284 JSON_KEY_VALUE_PAIR_SEP = "," 1285 INSERT_OVERWRITE = " OVERWRITE INTO" 1286 STRUCT_DELIMITER = ("(", ")") 1287 COPY_PARAMS_ARE_WRAPPED = False 1288 COPY_PARAMS_EQ_REQUIRED = True 1289 STAR_EXCEPT = "EXCLUDE" 1290 SUPPORTS_EXPLODING_PROJECTIONS = False 1291 ARRAY_CONCAT_IS_VAR_LEN = False 1292 SUPPORTS_CONVERT_TIMEZONE = True 1293 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1294 SUPPORTS_MEDIAN = True 1295 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1296 SUPPORTS_DECODE_CASE = True 1297 IS_BOOL_ALLOWED = False 1298 1299 TRANSFORMS = { 1300 **generator.Generator.TRANSFORMS, 1301 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1302 exp.ArgMax: rename_func("MAX_BY"), 1303 exp.ArgMin: rename_func("MIN_BY"), 1304 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1305 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1306 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1307 exp.AtTimeZone: lambda self, e: self.func( 1308 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1309 ), 1310 exp.BitwiseOr: rename_func("BITOR"), 1311 exp.BitwiseXor: rename_func("BITXOR"), 1312 exp.BitwiseAnd: rename_func("BITAND"), 1313 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1314 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1315 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1316 exp.BitwiseNot: rename_func("BITNOT"), 1317 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1318 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1319 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1320 exp.DateAdd: date_delta_sql("DATEADD"), 1321 exp.DateDiff: date_delta_sql("DATEDIFF"), 1322 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1323 exp.DatetimeDiff: timestampdiff_sql, 1324 exp.DateStrToDate: datestrtodate_sql, 1325 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1326 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1327 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1328 exp.DayOfYear: rename_func("DAYOFYEAR"), 1329 exp.Explode: rename_func("FLATTEN"), 1330 exp.Extract: lambda self, e: self.func( 1331 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1332 ), 1333 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1334 exp.FileFormatProperty: lambda self, 1335 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1336 exp.FromTimeZone: lambda self, e: self.func( 1337 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1338 ), 1339 exp.GenerateSeries: lambda self, e: self.func( 1340 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1341 ), 1342 exp.GetExtract: rename_func("GET"), 1343 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1344 exp.If: if_sql(name="IFF", false_value="NULL"), 1345 exp.JSONExtractArray: _json_extract_value_array_sql, 1346 exp.JSONExtractScalar: lambda self, e: self.func( 1347 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1348 ), 1349 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1350 exp.JSONPathRoot: lambda *_: "", 1351 exp.JSONValueArray: _json_extract_value_array_sql, 1352 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1353 rename_func("EDITDISTANCE") 1354 ), 1355 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1356 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1357 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1358 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1359 exp.MakeInterval: no_make_interval_sql, 1360 exp.Max: max_or_greatest, 1361 exp.Min: min_or_least, 1362 exp.ParseJSON: lambda self, e: self.func( 1363 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1364 ), 1365 exp.JSONFormat: rename_func("TO_JSON"), 1366 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1367 exp.PercentileCont: transforms.preprocess( 1368 [transforms.add_within_group_for_percentiles] 1369 ), 1370 exp.PercentileDisc: transforms.preprocess( 1371 [transforms.add_within_group_for_percentiles] 1372 ), 1373 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1374 exp.RegexpExtract: _regexpextract_sql, 1375 exp.RegexpExtractAll: _regexpextract_sql, 1376 exp.RegexpILike: _regexpilike_sql, 1377 exp.Rand: rename_func("RANDOM"), 1378 exp.Select: transforms.preprocess( 1379 [ 1380 transforms.eliminate_window_clause, 1381 transforms.eliminate_distinct_on, 1382 transforms.explode_projection_to_unnest(), 1383 transforms.eliminate_semi_and_anti_joins, 1384 _transform_generate_date_array, 1385 _qualify_unnested_columns, 1386 _eliminate_dot_variant_lookup, 1387 ] 1388 ), 1389 exp.SHA: rename_func("SHA1"), 1390 exp.MD5Digest: rename_func("MD5_BINARY"), 1391 exp.MD5NumberLower64: rename_func("MD5_NUMBER_LOWER64"), 1392 exp.MD5NumberUpper64: rename_func("MD5_NUMBER_UPPER64"), 1393 exp.LowerHex: rename_func("TO_CHAR"), 1394 exp.SortArray: rename_func("ARRAY_SORT"), 1395 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1396 exp.StartsWith: rename_func("STARTSWITH"), 1397 exp.EndsWith: rename_func("ENDSWITH"), 1398 exp.StrPosition: lambda self, e: strposition_sql( 1399 self, e, func_name="CHARINDEX", supports_position=True 1400 ), 1401 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1402 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1403 exp.Stuff: rename_func("INSERT"), 1404 exp.StPoint: rename_func("ST_MAKEPOINT"), 1405 exp.TimeAdd: date_delta_sql("TIMEADD"), 1406 exp.Timestamp: no_timestamp_sql, 1407 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1408 exp.TimestampDiff: lambda self, e: self.func( 1409 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1410 ), 1411 exp.TimestampTrunc: timestamptrunc_sql(), 1412 exp.TimeStrToTime: timestrtotime_sql, 1413 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1414 exp.ToArray: rename_func("TO_ARRAY"), 1415 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1416 exp.ToDouble: rename_func("TO_DOUBLE"), 1417 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1418 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1419 exp.TsOrDsToDate: lambda self, e: self.func( 1420 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1421 ), 1422 exp.TsOrDsToTime: lambda self, e: self.func( 1423 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1424 ), 1425 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1426 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1427 exp.Uuid: rename_func("UUID_STRING"), 1428 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1429 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1430 exp.Xor: rename_func("BOOLXOR"), 1431 exp.ByteLength: rename_func("OCTET_LENGTH"), 1432 } 1433 1434 SUPPORTED_JSON_PATH_PARTS = { 1435 exp.JSONPathKey, 1436 exp.JSONPathRoot, 1437 exp.JSONPathSubscript, 1438 } 1439 1440 TYPE_MAPPING = { 1441 **generator.Generator.TYPE_MAPPING, 1442 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1443 exp.DataType.Type.NESTED: "OBJECT", 1444 exp.DataType.Type.STRUCT: "OBJECT", 1445 exp.DataType.Type.TEXT: "VARCHAR", 1446 } 1447 1448 TOKEN_MAPPING = { 1449 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1450 } 1451 1452 PROPERTIES_LOCATION = { 1453 **generator.Generator.PROPERTIES_LOCATION, 1454 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1455 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1456 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1457 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1458 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1459 } 1460 1461 UNSUPPORTED_VALUES_EXPRESSIONS = { 1462 exp.Map, 1463 exp.StarMap, 1464 exp.Struct, 1465 exp.VarMap, 1466 } 1467 1468 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1469 1470 def with_properties(self, properties: exp.Properties) -> str: 1471 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1472 1473 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1474 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1475 values_as_table = False 1476 1477 return super().values_sql(expression, values_as_table=values_as_table) 1478 1479 def datatype_sql(self, expression: exp.DataType) -> str: 1480 expressions = expression.expressions 1481 if ( 1482 expressions 1483 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1484 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1485 ): 1486 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1487 return "OBJECT" 1488 1489 return super().datatype_sql(expression) 1490 1491 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1492 return self.func( 1493 "TO_NUMBER", 1494 expression.this, 1495 expression.args.get("format"), 1496 expression.args.get("precision"), 1497 expression.args.get("scale"), 1498 ) 1499 1500 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1501 milli = expression.args.get("milli") 1502 if milli is not None: 1503 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1504 expression.set("nano", milli_to_nano) 1505 1506 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1507 1508 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1509 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1510 return self.func("TO_GEOGRAPHY", expression.this) 1511 if expression.is_type(exp.DataType.Type.GEOMETRY): 1512 return self.func("TO_GEOMETRY", expression.this) 1513 1514 return super().cast_sql(expression, safe_prefix=safe_prefix) 1515 1516 def trycast_sql(self, expression: exp.TryCast) -> str: 1517 value = expression.this 1518 1519 if value.type is None: 1520 from sqlglot.optimizer.annotate_types import annotate_types 1521 1522 value = annotate_types(value, dialect=self.dialect) 1523 1524 # Snowflake requires that TRY_CAST's value be a string 1525 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1526 # if we can deduce that the value is a string, then we can generate TRY_CAST 1527 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1528 return super().trycast_sql(expression) 1529 1530 return self.cast_sql(expression) 1531 1532 def log_sql(self, expression: exp.Log) -> str: 1533 if not expression.expression: 1534 return self.func("LN", expression.this) 1535 1536 return super().log_sql(expression) 1537 1538 def unnest_sql(self, expression: exp.Unnest) -> str: 1539 unnest_alias = expression.args.get("alias") 1540 offset = expression.args.get("offset") 1541 1542 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1543 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1544 1545 columns = [ 1546 exp.to_identifier("seq"), 1547 exp.to_identifier("key"), 1548 exp.to_identifier("path"), 1549 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1550 value, 1551 exp.to_identifier("this"), 1552 ] 1553 1554 if unnest_alias: 1555 unnest_alias.set("columns", columns) 1556 else: 1557 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1558 1559 table_input = self.sql(expression.expressions[0]) 1560 if not table_input.startswith("INPUT =>"): 1561 table_input = f"INPUT => {table_input}" 1562 1563 expression_parent = expression.parent 1564 1565 explode = ( 1566 f"FLATTEN({table_input})" 1567 if isinstance(expression_parent, exp.Lateral) 1568 else f"TABLE(FLATTEN({table_input}))" 1569 ) 1570 alias = self.sql(unnest_alias) 1571 alias = f" AS {alias}" if alias else "" 1572 value = ( 1573 "" 1574 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1575 else f"{value} FROM " 1576 ) 1577 1578 return f"{value}{explode}{alias}" 1579 1580 def show_sql(self, expression: exp.Show) -> str: 1581 terse = "TERSE " if expression.args.get("terse") else "" 1582 history = " HISTORY" if expression.args.get("history") else "" 1583 like = self.sql(expression, "like") 1584 like = f" LIKE {like}" if like else "" 1585 1586 scope = self.sql(expression, "scope") 1587 scope = f" {scope}" if scope else "" 1588 1589 scope_kind = self.sql(expression, "scope_kind") 1590 if scope_kind: 1591 scope_kind = f" IN {scope_kind}" 1592 1593 starts_with = self.sql(expression, "starts_with") 1594 if starts_with: 1595 starts_with = f" STARTS WITH {starts_with}" 1596 1597 limit = self.sql(expression, "limit") 1598 1599 from_ = self.sql(expression, "from") 1600 if from_: 1601 from_ = f" FROM {from_}" 1602 1603 privileges = self.expressions(expression, key="privileges", flat=True) 1604 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1605 1606 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1607 1608 def describe_sql(self, expression: exp.Describe) -> str: 1609 # Default to table if kind is unknown 1610 kind_value = expression.args.get("kind") or "TABLE" 1611 kind = f" {kind_value}" if kind_value else "" 1612 this = f" {self.sql(expression, 'this')}" 1613 expressions = self.expressions(expression, flat=True) 1614 expressions = f" {expressions}" if expressions else "" 1615 return f"DESCRIBE{kind}{this}{expressions}" 1616 1617 def generatedasidentitycolumnconstraint_sql( 1618 self, expression: exp.GeneratedAsIdentityColumnConstraint 1619 ) -> str: 1620 start = expression.args.get("start") 1621 start = f" START {start}" if start else "" 1622 increment = expression.args.get("increment") 1623 increment = f" INCREMENT {increment}" if increment else "" 1624 1625 order = expression.args.get("order") 1626 if order is not None: 1627 order_clause = " ORDER" if order else " NOORDER" 1628 else: 1629 order_clause = "" 1630 1631 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1632 1633 def cluster_sql(self, expression: exp.Cluster) -> str: 1634 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1635 1636 def struct_sql(self, expression: exp.Struct) -> str: 1637 if len(expression.expressions) == 1: 1638 arg = expression.expressions[0] 1639 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1640 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1641 return f"{{{self.sql(expression.expressions[0])}}}" 1642 1643 keys = [] 1644 values = [] 1645 1646 for i, e in enumerate(expression.expressions): 1647 if isinstance(e, exp.PropertyEQ): 1648 keys.append( 1649 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1650 ) 1651 values.append(e.expression) 1652 else: 1653 keys.append(exp.Literal.string(f"_{i}")) 1654 values.append(e) 1655 1656 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1657 1658 @unsupported_args("weight", "accuracy") 1659 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1660 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1661 1662 def alterset_sql(self, expression: exp.AlterSet) -> str: 1663 exprs = self.expressions(expression, flat=True) 1664 exprs = f" {exprs}" if exprs else "" 1665 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1666 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1667 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1668 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1669 tag = self.expressions(expression, key="tag", flat=True) 1670 tag = f" TAG {tag}" if tag else "" 1671 1672 return f"SET{exprs}{file_format}{copy_options}{tag}" 1673 1674 def strtotime_sql(self, expression: exp.StrToTime): 1675 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1676 return self.func( 1677 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1678 ) 1679 1680 def timestampsub_sql(self, expression: exp.TimestampSub): 1681 return self.sql( 1682 exp.TimestampAdd( 1683 this=expression.this, 1684 expression=expression.expression * -1, 1685 unit=expression.unit, 1686 ) 1687 ) 1688 1689 def jsonextract_sql(self, expression: exp.JSONExtract): 1690 this = expression.this 1691 1692 # JSON strings are valid coming from other dialects such as BQ so 1693 # for these cases we PARSE_JSON preemptively 1694 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1695 "requires_json" 1696 ): 1697 this = exp.ParseJSON(this=this) 1698 1699 return self.func( 1700 "GET_PATH", 1701 this, 1702 expression.expression, 1703 ) 1704 1705 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1706 this = expression.this 1707 if this.is_string: 1708 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1709 1710 return self.func("TO_CHAR", this, self.format_time(expression)) 1711 1712 def datesub_sql(self, expression: exp.DateSub) -> str: 1713 value = expression.expression 1714 if value: 1715 value.replace(value * (-1)) 1716 else: 1717 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1718 1719 return date_delta_sql("DATEADD")(self, expression) 1720 1721 def select_sql(self, expression: exp.Select) -> str: 1722 limit = expression.args.get("limit") 1723 offset = expression.args.get("offset") 1724 if offset and not limit: 1725 expression.limit(exp.Null(), copy=False) 1726 return super().select_sql(expression) 1727 1728 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1729 is_materialized = expression.find(exp.MaterializedProperty) 1730 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1731 1732 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1733 # For materialized views, COPY GRANTS is located *before* the columns list 1734 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1735 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1736 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1737 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1738 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1739 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1740 1741 this_name = self.sql(expression.this, "this") 1742 copy_grants = self.sql(copy_grants_property) 1743 this_schema = self.schema_columns_sql(expression.this) 1744 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1745 1746 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1747 1748 return super().createable_sql(expression, locations) 1749 1750 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1751 this = expression.this 1752 1753 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1754 # and add it later as part of the WITHIN GROUP clause 1755 order = this if isinstance(this, exp.Order) else None 1756 if order: 1757 expression.set("this", order.this.pop()) 1758 1759 expr_sql = super().arrayagg_sql(expression) 1760 1761 if order: 1762 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1763 1764 return expr_sql 1765 1766 def array_sql(self, expression: exp.Array) -> str: 1767 expressions = expression.expressions 1768 1769 first_expr = seq_get(expressions, 0) 1770 if isinstance(first_expr, exp.Select): 1771 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1772 if first_expr.text("kind").upper() == "STRUCT": 1773 object_construct_args = [] 1774 for expr in first_expr.expressions: 1775 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1776 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1777 name = expr.this if isinstance(expr, exp.Alias) else expr 1778 1779 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1780 1781 array_agg = exp.ArrayAgg( 1782 this=_build_object_construct(args=object_construct_args) 1783 ) 1784 1785 first_expr.set("kind", None) 1786 first_expr.set("expressions", [array_agg]) 1787 1788 return self.sql(first_expr.subquery()) 1789 1790 return inline_array_sql(self, expression) 1791 1792 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1793 zone = self.sql(expression, "this") 1794 if not zone: 1795 return super().currentdate_sql(expression) 1796 1797 expr = exp.Cast( 1798 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1799 to=exp.DataType(this=exp.DataType.Type.DATE), 1800 ) 1801 return self.sql(expr) 1802 1803 def dot_sql(self, expression: exp.Dot) -> str: 1804 this = expression.this 1805 1806 if not this.type: 1807 from sqlglot.optimizer.annotate_types import annotate_types 1808 1809 this = annotate_types(this, dialect=self.dialect) 1810 1811 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1812 # Generate colon notation for the top level STRUCT 1813 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1814 1815 return super().dot_sql(expression) 1816 1817 def modelattribute_sql(self, expression: exp.ModelAttribute) -> str: 1818 return f"{self.sql(expression, 'this')}!{self.sql(expression, 'expression')}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1479 def datatype_sql(self, expression: exp.DataType) -> str: 1480 expressions = expression.expressions 1481 if ( 1482 expressions 1483 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1484 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1485 ): 1486 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1487 return "OBJECT" 1488 1489 return super().datatype_sql(expression)
1500 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1501 milli = expression.args.get("milli") 1502 if milli is not None: 1503 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1504 expression.set("nano", milli_to_nano) 1505 1506 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1508 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1509 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1510 return self.func("TO_GEOGRAPHY", expression.this) 1511 if expression.is_type(exp.DataType.Type.GEOMETRY): 1512 return self.func("TO_GEOMETRY", expression.this) 1513 1514 return super().cast_sql(expression, safe_prefix=safe_prefix)
1516 def trycast_sql(self, expression: exp.TryCast) -> str: 1517 value = expression.this 1518 1519 if value.type is None: 1520 from sqlglot.optimizer.annotate_types import annotate_types 1521 1522 value = annotate_types(value, dialect=self.dialect) 1523 1524 # Snowflake requires that TRY_CAST's value be a string 1525 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1526 # if we can deduce that the value is a string, then we can generate TRY_CAST 1527 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1528 return super().trycast_sql(expression) 1529 1530 return self.cast_sql(expression)
1538 def unnest_sql(self, expression: exp.Unnest) -> str: 1539 unnest_alias = expression.args.get("alias") 1540 offset = expression.args.get("offset") 1541 1542 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1543 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1544 1545 columns = [ 1546 exp.to_identifier("seq"), 1547 exp.to_identifier("key"), 1548 exp.to_identifier("path"), 1549 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1550 value, 1551 exp.to_identifier("this"), 1552 ] 1553 1554 if unnest_alias: 1555 unnest_alias.set("columns", columns) 1556 else: 1557 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1558 1559 table_input = self.sql(expression.expressions[0]) 1560 if not table_input.startswith("INPUT =>"): 1561 table_input = f"INPUT => {table_input}" 1562 1563 expression_parent = expression.parent 1564 1565 explode = ( 1566 f"FLATTEN({table_input})" 1567 if isinstance(expression_parent, exp.Lateral) 1568 else f"TABLE(FLATTEN({table_input}))" 1569 ) 1570 alias = self.sql(unnest_alias) 1571 alias = f" AS {alias}" if alias else "" 1572 value = ( 1573 "" 1574 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1575 else f"{value} FROM " 1576 ) 1577 1578 return f"{value}{explode}{alias}"
1580 def show_sql(self, expression: exp.Show) -> str: 1581 terse = "TERSE " if expression.args.get("terse") else "" 1582 history = " HISTORY" if expression.args.get("history") else "" 1583 like = self.sql(expression, "like") 1584 like = f" LIKE {like}" if like else "" 1585 1586 scope = self.sql(expression, "scope") 1587 scope = f" {scope}" if scope else "" 1588 1589 scope_kind = self.sql(expression, "scope_kind") 1590 if scope_kind: 1591 scope_kind = f" IN {scope_kind}" 1592 1593 starts_with = self.sql(expression, "starts_with") 1594 if starts_with: 1595 starts_with = f" STARTS WITH {starts_with}" 1596 1597 limit = self.sql(expression, "limit") 1598 1599 from_ = self.sql(expression, "from") 1600 if from_: 1601 from_ = f" FROM {from_}" 1602 1603 privileges = self.expressions(expression, key="privileges", flat=True) 1604 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1605 1606 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1608 def describe_sql(self, expression: exp.Describe) -> str: 1609 # Default to table if kind is unknown 1610 kind_value = expression.args.get("kind") or "TABLE" 1611 kind = f" {kind_value}" if kind_value else "" 1612 this = f" {self.sql(expression, 'this')}" 1613 expressions = self.expressions(expression, flat=True) 1614 expressions = f" {expressions}" if expressions else "" 1615 return f"DESCRIBE{kind}{this}{expressions}"
1617 def generatedasidentitycolumnconstraint_sql( 1618 self, expression: exp.GeneratedAsIdentityColumnConstraint 1619 ) -> str: 1620 start = expression.args.get("start") 1621 start = f" START {start}" if start else "" 1622 increment = expression.args.get("increment") 1623 increment = f" INCREMENT {increment}" if increment else "" 1624 1625 order = expression.args.get("order") 1626 if order is not None: 1627 order_clause = " ORDER" if order else " NOORDER" 1628 else: 1629 order_clause = "" 1630 1631 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1636 def struct_sql(self, expression: exp.Struct) -> str: 1637 if len(expression.expressions) == 1: 1638 arg = expression.expressions[0] 1639 if arg.is_star or (isinstance(arg, exp.ILike) and arg.left.is_star): 1640 # Wildcard syntax: https://docs.snowflake.com/en/sql-reference/data-types-semistructured#object 1641 return f"{{{self.sql(expression.expressions[0])}}}" 1642 1643 keys = [] 1644 values = [] 1645 1646 for i, e in enumerate(expression.expressions): 1647 if isinstance(e, exp.PropertyEQ): 1648 keys.append( 1649 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1650 ) 1651 values.append(e.expression) 1652 else: 1653 keys.append(exp.Literal.string(f"_{i}")) 1654 values.append(e) 1655 1656 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1662 def alterset_sql(self, expression: exp.AlterSet) -> str: 1663 exprs = self.expressions(expression, flat=True) 1664 exprs = f" {exprs}" if exprs else "" 1665 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1666 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1667 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1668 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1669 tag = self.expressions(expression, key="tag", flat=True) 1670 tag = f" TAG {tag}" if tag else "" 1671 1672 return f"SET{exprs}{file_format}{copy_options}{tag}"
1689 def jsonextract_sql(self, expression: exp.JSONExtract): 1690 this = expression.this 1691 1692 # JSON strings are valid coming from other dialects such as BQ so 1693 # for these cases we PARSE_JSON preemptively 1694 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1695 "requires_json" 1696 ): 1697 this = exp.ParseJSON(this=this) 1698 1699 return self.func( 1700 "GET_PATH", 1701 this, 1702 expression.expression, 1703 )
1712 def datesub_sql(self, expression: exp.DateSub) -> str: 1713 value = expression.expression 1714 if value: 1715 value.replace(value * (-1)) 1716 else: 1717 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1718 1719 return date_delta_sql("DATEADD")(self, expression)
1728 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1729 is_materialized = expression.find(exp.MaterializedProperty) 1730 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1731 1732 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1733 # For materialized views, COPY GRANTS is located *before* the columns list 1734 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1735 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1736 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1737 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1738 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1739 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1740 1741 this_name = self.sql(expression.this, "this") 1742 copy_grants = self.sql(copy_grants_property) 1743 this_schema = self.schema_columns_sql(expression.this) 1744 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1745 1746 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1747 1748 return super().createable_sql(expression, locations)
1750 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1751 this = expression.this 1752 1753 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1754 # and add it later as part of the WITHIN GROUP clause 1755 order = this if isinstance(this, exp.Order) else None 1756 if order: 1757 expression.set("this", order.this.pop()) 1758 1759 expr_sql = super().arrayagg_sql(expression) 1760 1761 if order: 1762 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1763 1764 return expr_sql
1766 def array_sql(self, expression: exp.Array) -> str: 1767 expressions = expression.expressions 1768 1769 first_expr = seq_get(expressions, 0) 1770 if isinstance(first_expr, exp.Select): 1771 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1772 if first_expr.text("kind").upper() == "STRUCT": 1773 object_construct_args = [] 1774 for expr in first_expr.expressions: 1775 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1776 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1777 name = expr.this if isinstance(expr, exp.Alias) else expr 1778 1779 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1780 1781 array_agg = exp.ArrayAgg( 1782 this=_build_object_construct(args=object_construct_args) 1783 ) 1784 1785 first_expr.set("kind", None) 1786 first_expr.set("expressions", [array_agg]) 1787 1788 return self.sql(first_expr.subquery()) 1789 1790 return inline_array_sql(self, expression)
1792 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1793 zone = self.sql(expression, "this") 1794 if not zone: 1795 return super().currentdate_sql(expression) 1796 1797 expr = exp.Cast( 1798 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1799 to=exp.DataType(this=exp.DataType.Type.DATE), 1800 ) 1801 return self.sql(expr)
1803 def dot_sql(self, expression: exp.Dot) -> str: 1804 this = expression.this 1805 1806 if not this.type: 1807 from sqlglot.optimizer.annotate_types import annotate_types 1808 1809 this = annotate_types(this, dialect=self.dialect) 1810 1811 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1812 # Generate colon notation for the top level STRUCT 1813 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1814 1815 return super().dot_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- mltranslate_sql
- mlforecast_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- install_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql