sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 STRUCT_TYPE_TOKENS = { 106 TokenType.NESTED, 107 TokenType.STRUCT, 108 } 109 110 NESTED_TYPE_TOKENS = { 111 TokenType.ARRAY, 112 TokenType.LOWCARDINALITY, 113 TokenType.MAP, 114 TokenType.NULLABLE, 115 *STRUCT_TYPE_TOKENS, 116 } 117 118 ENUM_TYPE_TOKENS = { 119 TokenType.ENUM, 120 TokenType.ENUM8, 121 TokenType.ENUM16, 122 } 123 124 TYPE_TOKENS = { 125 TokenType.BIT, 126 TokenType.BOOLEAN, 127 TokenType.TINYINT, 128 TokenType.UTINYINT, 129 TokenType.SMALLINT, 130 TokenType.USMALLINT, 131 TokenType.INT, 132 TokenType.UINT, 133 TokenType.BIGINT, 134 TokenType.UBIGINT, 135 TokenType.INT128, 136 TokenType.UINT128, 137 TokenType.INT256, 138 TokenType.UINT256, 139 TokenType.MEDIUMINT, 140 TokenType.FIXEDSTRING, 141 TokenType.FLOAT, 142 TokenType.DOUBLE, 143 TokenType.CHAR, 144 TokenType.NCHAR, 145 TokenType.VARCHAR, 146 TokenType.NVARCHAR, 147 TokenType.TEXT, 148 TokenType.MEDIUMTEXT, 149 TokenType.LONGTEXT, 150 TokenType.MEDIUMBLOB, 151 TokenType.LONGBLOB, 152 TokenType.BINARY, 153 TokenType.VARBINARY, 154 TokenType.JSON, 155 TokenType.JSONB, 156 TokenType.INTERVAL, 157 TokenType.TIME, 158 TokenType.TIMETZ, 159 TokenType.TIMESTAMP, 160 TokenType.TIMESTAMPTZ, 161 TokenType.TIMESTAMPLTZ, 162 TokenType.DATETIME, 163 TokenType.DATETIME64, 164 TokenType.DATE, 165 TokenType.INT4RANGE, 166 TokenType.INT4MULTIRANGE, 167 TokenType.INT8RANGE, 168 TokenType.INT8MULTIRANGE, 169 TokenType.NUMRANGE, 170 TokenType.NUMMULTIRANGE, 171 TokenType.TSRANGE, 172 TokenType.TSMULTIRANGE, 173 TokenType.TSTZRANGE, 174 TokenType.TSTZMULTIRANGE, 175 TokenType.DATERANGE, 176 TokenType.DATEMULTIRANGE, 177 TokenType.DECIMAL, 178 TokenType.BIGDECIMAL, 179 TokenType.UUID, 180 TokenType.GEOGRAPHY, 181 TokenType.GEOMETRY, 182 TokenType.HLLSKETCH, 183 TokenType.HSTORE, 184 TokenType.PSEUDO_TYPE, 185 TokenType.SUPER, 186 TokenType.SERIAL, 187 TokenType.SMALLSERIAL, 188 TokenType.BIGSERIAL, 189 TokenType.XML, 190 TokenType.YEAR, 191 TokenType.UNIQUEIDENTIFIER, 192 TokenType.USERDEFINED, 193 TokenType.MONEY, 194 TokenType.SMALLMONEY, 195 TokenType.ROWVERSION, 196 TokenType.IMAGE, 197 TokenType.VARIANT, 198 TokenType.OBJECT, 199 TokenType.INET, 200 TokenType.IPADDRESS, 201 TokenType.IPPREFIX, 202 TokenType.UNKNOWN, 203 TokenType.NULL, 204 *ENUM_TYPE_TOKENS, 205 *NESTED_TYPE_TOKENS, 206 } 207 208 SUBQUERY_PREDICATES = { 209 TokenType.ANY: exp.Any, 210 TokenType.ALL: exp.All, 211 TokenType.EXISTS: exp.Exists, 212 TokenType.SOME: exp.Any, 213 } 214 215 RESERVED_KEYWORDS = { 216 *Tokenizer.SINGLE_TOKENS.values(), 217 TokenType.SELECT, 218 } 219 220 DB_CREATABLES = { 221 TokenType.DATABASE, 222 TokenType.SCHEMA, 223 TokenType.TABLE, 224 TokenType.VIEW, 225 TokenType.DICTIONARY, 226 } 227 228 CREATABLES = { 229 TokenType.COLUMN, 230 TokenType.FUNCTION, 231 TokenType.INDEX, 232 TokenType.PROCEDURE, 233 *DB_CREATABLES, 234 } 235 236 # Tokens that can represent identifiers 237 ID_VAR_TOKENS = { 238 TokenType.VAR, 239 TokenType.ANTI, 240 TokenType.APPLY, 241 TokenType.ASC, 242 TokenType.AUTO_INCREMENT, 243 TokenType.BEGIN, 244 TokenType.CACHE, 245 TokenType.CASE, 246 TokenType.COLLATE, 247 TokenType.COMMAND, 248 TokenType.COMMENT, 249 TokenType.COMMIT, 250 TokenType.CONSTRAINT, 251 TokenType.DEFAULT, 252 TokenType.DELETE, 253 TokenType.DESC, 254 TokenType.DESCRIBE, 255 TokenType.DICTIONARY, 256 TokenType.DIV, 257 TokenType.END, 258 TokenType.EXECUTE, 259 TokenType.ESCAPE, 260 TokenType.FALSE, 261 TokenType.FIRST, 262 TokenType.FILTER, 263 TokenType.FORMAT, 264 TokenType.FULL, 265 TokenType.IS, 266 TokenType.ISNULL, 267 TokenType.INTERVAL, 268 TokenType.KEEP, 269 TokenType.LEFT, 270 TokenType.LOAD, 271 TokenType.MERGE, 272 TokenType.NATURAL, 273 TokenType.NEXT, 274 TokenType.OFFSET, 275 TokenType.ORDINALITY, 276 TokenType.OVERWRITE, 277 TokenType.PARTITION, 278 TokenType.PERCENT, 279 TokenType.PIVOT, 280 TokenType.PRAGMA, 281 TokenType.RANGE, 282 TokenType.REFERENCES, 283 TokenType.RIGHT, 284 TokenType.ROW, 285 TokenType.ROWS, 286 TokenType.SEMI, 287 TokenType.SET, 288 TokenType.SETTINGS, 289 TokenType.SHOW, 290 TokenType.TEMPORARY, 291 TokenType.TOP, 292 TokenType.TRUE, 293 TokenType.UNIQUE, 294 TokenType.UNPIVOT, 295 TokenType.UPDATE, 296 TokenType.VOLATILE, 297 TokenType.WINDOW, 298 *CREATABLES, 299 *SUBQUERY_PREDICATES, 300 *TYPE_TOKENS, 301 *NO_PAREN_FUNCTIONS, 302 } 303 304 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 305 306 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 307 TokenType.APPLY, 308 TokenType.ASOF, 309 TokenType.FULL, 310 TokenType.LEFT, 311 TokenType.LOCK, 312 TokenType.NATURAL, 313 TokenType.OFFSET, 314 TokenType.RIGHT, 315 TokenType.WINDOW, 316 } 317 318 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 319 320 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 321 322 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 323 324 FUNC_TOKENS = { 325 TokenType.COMMAND, 326 TokenType.CURRENT_DATE, 327 TokenType.CURRENT_DATETIME, 328 TokenType.CURRENT_TIMESTAMP, 329 TokenType.CURRENT_TIME, 330 TokenType.CURRENT_USER, 331 TokenType.FILTER, 332 TokenType.FIRST, 333 TokenType.FORMAT, 334 TokenType.GLOB, 335 TokenType.IDENTIFIER, 336 TokenType.INDEX, 337 TokenType.ISNULL, 338 TokenType.ILIKE, 339 TokenType.INSERT, 340 TokenType.LIKE, 341 TokenType.MERGE, 342 TokenType.OFFSET, 343 TokenType.PRIMARY_KEY, 344 TokenType.RANGE, 345 TokenType.REPLACE, 346 TokenType.RLIKE, 347 TokenType.ROW, 348 TokenType.UNNEST, 349 TokenType.VAR, 350 TokenType.LEFT, 351 TokenType.RIGHT, 352 TokenType.DATE, 353 TokenType.DATETIME, 354 TokenType.TABLE, 355 TokenType.TIMESTAMP, 356 TokenType.TIMESTAMPTZ, 357 TokenType.WINDOW, 358 TokenType.XOR, 359 *TYPE_TOKENS, 360 *SUBQUERY_PREDICATES, 361 } 362 363 CONJUNCTION = { 364 TokenType.AND: exp.And, 365 TokenType.OR: exp.Or, 366 } 367 368 EQUALITY = { 369 TokenType.EQ: exp.EQ, 370 TokenType.NEQ: exp.NEQ, 371 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 372 } 373 374 COMPARISON = { 375 TokenType.GT: exp.GT, 376 TokenType.GTE: exp.GTE, 377 TokenType.LT: exp.LT, 378 TokenType.LTE: exp.LTE, 379 } 380 381 BITWISE = { 382 TokenType.AMP: exp.BitwiseAnd, 383 TokenType.CARET: exp.BitwiseXor, 384 TokenType.PIPE: exp.BitwiseOr, 385 TokenType.DPIPE: exp.DPipe, 386 } 387 388 TERM = { 389 TokenType.DASH: exp.Sub, 390 TokenType.PLUS: exp.Add, 391 TokenType.MOD: exp.Mod, 392 TokenType.COLLATE: exp.Collate, 393 } 394 395 FACTOR = { 396 TokenType.DIV: exp.IntDiv, 397 TokenType.LR_ARROW: exp.Distance, 398 TokenType.SLASH: exp.Div, 399 TokenType.STAR: exp.Mul, 400 } 401 402 TIMES = { 403 TokenType.TIME, 404 TokenType.TIMETZ, 405 } 406 407 TIMESTAMPS = { 408 TokenType.TIMESTAMP, 409 TokenType.TIMESTAMPTZ, 410 TokenType.TIMESTAMPLTZ, 411 *TIMES, 412 } 413 414 SET_OPERATIONS = { 415 TokenType.UNION, 416 TokenType.INTERSECT, 417 TokenType.EXCEPT, 418 } 419 420 JOIN_METHODS = { 421 TokenType.NATURAL, 422 TokenType.ASOF, 423 } 424 425 JOIN_SIDES = { 426 TokenType.LEFT, 427 TokenType.RIGHT, 428 TokenType.FULL, 429 } 430 431 JOIN_KINDS = { 432 TokenType.INNER, 433 TokenType.OUTER, 434 TokenType.CROSS, 435 TokenType.SEMI, 436 TokenType.ANTI, 437 } 438 439 JOIN_HINTS: t.Set[str] = set() 440 441 LAMBDAS = { 442 TokenType.ARROW: lambda self, expressions: self.expression( 443 exp.Lambda, 444 this=self._replace_lambda( 445 self._parse_conjunction(), 446 {node.name for node in expressions}, 447 ), 448 expressions=expressions, 449 ), 450 TokenType.FARROW: lambda self, expressions: self.expression( 451 exp.Kwarg, 452 this=exp.var(expressions[0].name), 453 expression=self._parse_conjunction(), 454 ), 455 } 456 457 COLUMN_OPERATORS = { 458 TokenType.DOT: None, 459 TokenType.DCOLON: lambda self, this, to: self.expression( 460 exp.Cast if self.STRICT_CAST else exp.TryCast, 461 this=this, 462 to=to, 463 ), 464 TokenType.ARROW: lambda self, this, path: self.expression( 465 exp.JSONExtract, 466 this=this, 467 expression=path, 468 ), 469 TokenType.DARROW: lambda self, this, path: self.expression( 470 exp.JSONExtractScalar, 471 this=this, 472 expression=path, 473 ), 474 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 475 exp.JSONBExtract, 476 this=this, 477 expression=path, 478 ), 479 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 480 exp.JSONBExtractScalar, 481 this=this, 482 expression=path, 483 ), 484 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 485 exp.JSONBContains, 486 this=this, 487 expression=key, 488 ), 489 } 490 491 EXPRESSION_PARSERS = { 492 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 493 exp.Column: lambda self: self._parse_column(), 494 exp.Condition: lambda self: self._parse_conjunction(), 495 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 496 exp.Expression: lambda self: self._parse_statement(), 497 exp.From: lambda self: self._parse_from(), 498 exp.Group: lambda self: self._parse_group(), 499 exp.Having: lambda self: self._parse_having(), 500 exp.Identifier: lambda self: self._parse_id_var(), 501 exp.Join: lambda self: self._parse_join(), 502 exp.Lambda: lambda self: self._parse_lambda(), 503 exp.Lateral: lambda self: self._parse_lateral(), 504 exp.Limit: lambda self: self._parse_limit(), 505 exp.Offset: lambda self: self._parse_offset(), 506 exp.Order: lambda self: self._parse_order(), 507 exp.Ordered: lambda self: self._parse_ordered(), 508 exp.Properties: lambda self: self._parse_properties(), 509 exp.Qualify: lambda self: self._parse_qualify(), 510 exp.Returning: lambda self: self._parse_returning(), 511 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 512 exp.Table: lambda self: self._parse_table_parts(), 513 exp.TableAlias: lambda self: self._parse_table_alias(), 514 exp.Where: lambda self: self._parse_where(), 515 exp.Window: lambda self: self._parse_named_window(), 516 exp.With: lambda self: self._parse_with(), 517 "JOIN_TYPE": lambda self: self._parse_join_parts(), 518 } 519 520 STATEMENT_PARSERS = { 521 TokenType.ALTER: lambda self: self._parse_alter(), 522 TokenType.BEGIN: lambda self: self._parse_transaction(), 523 TokenType.CACHE: lambda self: self._parse_cache(), 524 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 525 TokenType.COMMENT: lambda self: self._parse_comment(), 526 TokenType.CREATE: lambda self: self._parse_create(), 527 TokenType.DELETE: lambda self: self._parse_delete(), 528 TokenType.DESC: lambda self: self._parse_describe(), 529 TokenType.DESCRIBE: lambda self: self._parse_describe(), 530 TokenType.DROP: lambda self: self._parse_drop(), 531 TokenType.INSERT: lambda self: self._parse_insert(), 532 TokenType.LOAD: lambda self: self._parse_load(), 533 TokenType.MERGE: lambda self: self._parse_merge(), 534 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 535 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 536 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 537 TokenType.SET: lambda self: self._parse_set(), 538 TokenType.UNCACHE: lambda self: self._parse_uncache(), 539 TokenType.UPDATE: lambda self: self._parse_update(), 540 TokenType.USE: lambda self: self.expression( 541 exp.Use, 542 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 543 and exp.var(self._prev.text), 544 this=self._parse_table(schema=False), 545 ), 546 } 547 548 UNARY_PARSERS = { 549 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 550 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 551 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 552 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 553 } 554 555 PRIMARY_PARSERS = { 556 TokenType.STRING: lambda self, token: self.expression( 557 exp.Literal, this=token.text, is_string=True 558 ), 559 TokenType.NUMBER: lambda self, token: self.expression( 560 exp.Literal, this=token.text, is_string=False 561 ), 562 TokenType.STAR: lambda self, _: self.expression( 563 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 564 ), 565 TokenType.NULL: lambda self, _: self.expression(exp.Null), 566 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 567 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 568 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 569 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 570 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 571 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 572 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 573 exp.National, this=token.text 574 ), 575 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 576 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 577 } 578 579 PLACEHOLDER_PARSERS = { 580 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 581 TokenType.PARAMETER: lambda self: self._parse_parameter(), 582 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 583 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 584 else None, 585 } 586 587 RANGE_PARSERS = { 588 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 589 TokenType.GLOB: binary_range_parser(exp.Glob), 590 TokenType.ILIKE: binary_range_parser(exp.ILike), 591 TokenType.IN: lambda self, this: self._parse_in(this), 592 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 593 TokenType.IS: lambda self, this: self._parse_is(this), 594 TokenType.LIKE: binary_range_parser(exp.Like), 595 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 596 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 597 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 598 } 599 600 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 601 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 602 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 603 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 604 "CHARACTER SET": lambda self: self._parse_character_set(), 605 "CHECKSUM": lambda self: self._parse_checksum(), 606 "CLUSTER BY": lambda self: self._parse_cluster(), 607 "CLUSTERED": lambda self: self._parse_clustered_by(), 608 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 609 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 610 "COPY": lambda self: self._parse_copy_property(), 611 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 612 "DEFINER": lambda self: self._parse_definer(), 613 "DETERMINISTIC": lambda self: self.expression( 614 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 615 ), 616 "DISTKEY": lambda self: self._parse_distkey(), 617 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 618 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 619 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 620 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 621 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 622 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 623 "FREESPACE": lambda self: self._parse_freespace(), 624 "HEAP": lambda self: self.expression(exp.HeapProperty), 625 "IMMUTABLE": lambda self: self.expression( 626 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 627 ), 628 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 629 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 630 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 631 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 632 "LIKE": lambda self: self._parse_create_like(), 633 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 634 "LOCK": lambda self: self._parse_locking(), 635 "LOCKING": lambda self: self._parse_locking(), 636 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 637 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 638 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 639 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 640 "NO": lambda self: self._parse_no_property(), 641 "ON": lambda self: self._parse_on_property(), 642 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 643 "PARTITION BY": lambda self: self._parse_partitioned_by(), 644 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 645 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 646 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 647 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 648 "RETURNS": lambda self: self._parse_returns(), 649 "ROW": lambda self: self._parse_row(), 650 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 651 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 652 "SETTINGS": lambda self: self.expression( 653 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 654 ), 655 "SORTKEY": lambda self: self._parse_sortkey(), 656 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 657 "STABLE": lambda self: self.expression( 658 exp.StabilityProperty, this=exp.Literal.string("STABLE") 659 ), 660 "STORED": lambda self: self._parse_stored(), 661 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 662 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 663 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 664 "TO": lambda self: self._parse_to_table(), 665 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 666 "TTL": lambda self: self._parse_ttl(), 667 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 668 "VOLATILE": lambda self: self._parse_volatile_property(), 669 "WITH": lambda self: self._parse_with_property(), 670 } 671 672 CONSTRAINT_PARSERS = { 673 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 674 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 675 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 676 "CHARACTER SET": lambda self: self.expression( 677 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 678 ), 679 "CHECK": lambda self: self.expression( 680 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 681 ), 682 "COLLATE": lambda self: self.expression( 683 exp.CollateColumnConstraint, this=self._parse_var() 684 ), 685 "COMMENT": lambda self: self.expression( 686 exp.CommentColumnConstraint, this=self._parse_string() 687 ), 688 "COMPRESS": lambda self: self._parse_compress(), 689 "CLUSTERED": lambda self: self.expression( 690 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 691 ), 692 "NONCLUSTERED": lambda self: self.expression( 693 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 694 ), 695 "DEFAULT": lambda self: self.expression( 696 exp.DefaultColumnConstraint, this=self._parse_bitwise() 697 ), 698 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 699 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 700 "FORMAT": lambda self: self.expression( 701 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 702 ), 703 "GENERATED": lambda self: self._parse_generated_as_identity(), 704 "IDENTITY": lambda self: self._parse_auto_increment(), 705 "INLINE": lambda self: self._parse_inline(), 706 "LIKE": lambda self: self._parse_create_like(), 707 "NOT": lambda self: self._parse_not_constraint(), 708 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 709 "ON": lambda self: ( 710 self._match(TokenType.UPDATE) 711 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 712 ) 713 or self.expression(exp.OnProperty, this=self._parse_id_var()), 714 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 715 "PRIMARY KEY": lambda self: self._parse_primary_key(), 716 "REFERENCES": lambda self: self._parse_references(match=False), 717 "TITLE": lambda self: self.expression( 718 exp.TitleColumnConstraint, this=self._parse_var_or_string() 719 ), 720 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 721 "UNIQUE": lambda self: self._parse_unique(), 722 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 723 "WITH": lambda self: self.expression( 724 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 725 ), 726 } 727 728 ALTER_PARSERS = { 729 "ADD": lambda self: self._parse_alter_table_add(), 730 "ALTER": lambda self: self._parse_alter_table_alter(), 731 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 732 "DROP": lambda self: self._parse_alter_table_drop(), 733 "RENAME": lambda self: self._parse_alter_table_rename(), 734 } 735 736 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 737 738 NO_PAREN_FUNCTION_PARSERS = { 739 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 740 "CASE": lambda self: self._parse_case(), 741 "IF": lambda self: self._parse_if(), 742 "NEXT": lambda self: self._parse_next_value_for(), 743 } 744 745 INVALID_FUNC_NAME_TOKENS = { 746 TokenType.IDENTIFIER, 747 TokenType.STRING, 748 } 749 750 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 751 752 FUNCTION_PARSERS = { 753 "ANY_VALUE": lambda self: self._parse_any_value(), 754 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 755 "CONCAT": lambda self: self._parse_concat(), 756 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 757 "DECODE": lambda self: self._parse_decode(), 758 "EXTRACT": lambda self: self._parse_extract(), 759 "JSON_OBJECT": lambda self: self._parse_json_object(), 760 "LOG": lambda self: self._parse_logarithm(), 761 "MATCH": lambda self: self._parse_match_against(), 762 "OPENJSON": lambda self: self._parse_open_json(), 763 "POSITION": lambda self: self._parse_position(), 764 "SAFE_CAST": lambda self: self._parse_cast(False), 765 "STRING_AGG": lambda self: self._parse_string_agg(), 766 "SUBSTRING": lambda self: self._parse_substring(), 767 "TRIM": lambda self: self._parse_trim(), 768 "TRY_CAST": lambda self: self._parse_cast(False), 769 "TRY_CONVERT": lambda self: self._parse_convert(False), 770 } 771 772 QUERY_MODIFIER_PARSERS = { 773 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 774 TokenType.WHERE: lambda self: ("where", self._parse_where()), 775 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 776 TokenType.HAVING: lambda self: ("having", self._parse_having()), 777 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 778 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 779 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 780 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 781 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 782 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 783 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 784 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 785 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 786 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 787 TokenType.CLUSTER_BY: lambda self: ( 788 "cluster", 789 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 790 ), 791 TokenType.DISTRIBUTE_BY: lambda self: ( 792 "distribute", 793 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 794 ), 795 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 796 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 797 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 798 } 799 800 SET_PARSERS = { 801 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 802 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 803 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 804 "TRANSACTION": lambda self: self._parse_set_transaction(), 805 } 806 807 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 808 809 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 810 811 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 812 813 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 814 815 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 816 817 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 818 TRANSACTION_CHARACTERISTICS = { 819 "ISOLATION LEVEL REPEATABLE READ", 820 "ISOLATION LEVEL READ COMMITTED", 821 "ISOLATION LEVEL READ UNCOMMITTED", 822 "ISOLATION LEVEL SERIALIZABLE", 823 "READ WRITE", 824 "READ ONLY", 825 } 826 827 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 828 829 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 830 831 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 832 833 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 834 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 835 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 836 837 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 838 839 DISTINCT_TOKENS = {TokenType.DISTINCT} 840 841 STRICT_CAST = True 842 843 # A NULL arg in CONCAT yields NULL by default 844 CONCAT_NULL_OUTPUTS_STRING = False 845 846 PREFIXED_PIVOT_COLUMNS = False 847 IDENTIFY_PIVOT_STRINGS = False 848 849 LOG_BASE_FIRST = True 850 LOG_DEFAULTS_TO_LN = False 851 852 SUPPORTS_USER_DEFINED_TYPES = True 853 854 __slots__ = ( 855 "error_level", 856 "error_message_context", 857 "max_errors", 858 "sql", 859 "errors", 860 "_tokens", 861 "_index", 862 "_curr", 863 "_next", 864 "_prev", 865 "_prev_comments", 866 "_tokenizer", 867 ) 868 869 # Autofilled 870 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 871 INDEX_OFFSET: int = 0 872 UNNEST_COLUMN_ONLY: bool = False 873 ALIAS_POST_TABLESAMPLE: bool = False 874 STRICT_STRING_CONCAT = False 875 NORMALIZE_FUNCTIONS = "upper" 876 NULL_ORDERING: str = "nulls_are_small" 877 SHOW_TRIE: t.Dict = {} 878 SET_TRIE: t.Dict = {} 879 FORMAT_MAPPING: t.Dict[str, str] = {} 880 FORMAT_TRIE: t.Dict = {} 881 TIME_MAPPING: t.Dict[str, str] = {} 882 TIME_TRIE: t.Dict = {} 883 884 def __init__( 885 self, 886 error_level: t.Optional[ErrorLevel] = None, 887 error_message_context: int = 100, 888 max_errors: int = 3, 889 ): 890 self.error_level = error_level or ErrorLevel.IMMEDIATE 891 self.error_message_context = error_message_context 892 self.max_errors = max_errors 893 self._tokenizer = self.TOKENIZER_CLASS() 894 self.reset() 895 896 def reset(self): 897 self.sql = "" 898 self.errors = [] 899 self._tokens = [] 900 self._index = 0 901 self._curr = None 902 self._next = None 903 self._prev = None 904 self._prev_comments = None 905 906 def parse( 907 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 908 ) -> t.List[t.Optional[exp.Expression]]: 909 """ 910 Parses a list of tokens and returns a list of syntax trees, one tree 911 per parsed SQL statement. 912 913 Args: 914 raw_tokens: The list of tokens. 915 sql: The original SQL string, used to produce helpful debug messages. 916 917 Returns: 918 The list of the produced syntax trees. 919 """ 920 return self._parse( 921 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 922 ) 923 924 def parse_into( 925 self, 926 expression_types: exp.IntoType, 927 raw_tokens: t.List[Token], 928 sql: t.Optional[str] = None, 929 ) -> t.List[t.Optional[exp.Expression]]: 930 """ 931 Parses a list of tokens into a given Expression type. If a collection of Expression 932 types is given instead, this method will try to parse the token list into each one 933 of them, stopping at the first for which the parsing succeeds. 934 935 Args: 936 expression_types: The expression type(s) to try and parse the token list into. 937 raw_tokens: The list of tokens. 938 sql: The original SQL string, used to produce helpful debug messages. 939 940 Returns: 941 The target Expression. 942 """ 943 errors = [] 944 for expression_type in ensure_list(expression_types): 945 parser = self.EXPRESSION_PARSERS.get(expression_type) 946 if not parser: 947 raise TypeError(f"No parser registered for {expression_type}") 948 949 try: 950 return self._parse(parser, raw_tokens, sql) 951 except ParseError as e: 952 e.errors[0]["into_expression"] = expression_type 953 errors.append(e) 954 955 raise ParseError( 956 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 957 errors=merge_errors(errors), 958 ) from errors[-1] 959 960 def _parse( 961 self, 962 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 963 raw_tokens: t.List[Token], 964 sql: t.Optional[str] = None, 965 ) -> t.List[t.Optional[exp.Expression]]: 966 self.reset() 967 self.sql = sql or "" 968 969 total = len(raw_tokens) 970 chunks: t.List[t.List[Token]] = [[]] 971 972 for i, token in enumerate(raw_tokens): 973 if token.token_type == TokenType.SEMICOLON: 974 if i < total - 1: 975 chunks.append([]) 976 else: 977 chunks[-1].append(token) 978 979 expressions = [] 980 981 for tokens in chunks: 982 self._index = -1 983 self._tokens = tokens 984 self._advance() 985 986 expressions.append(parse_method(self)) 987 988 if self._index < len(self._tokens): 989 self.raise_error("Invalid expression / Unexpected token") 990 991 self.check_errors() 992 993 return expressions 994 995 def check_errors(self) -> None: 996 """Logs or raises any found errors, depending on the chosen error level setting.""" 997 if self.error_level == ErrorLevel.WARN: 998 for error in self.errors: 999 logger.error(str(error)) 1000 elif self.error_level == ErrorLevel.RAISE and self.errors: 1001 raise ParseError( 1002 concat_messages(self.errors, self.max_errors), 1003 errors=merge_errors(self.errors), 1004 ) 1005 1006 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1007 """ 1008 Appends an error in the list of recorded errors or raises it, depending on the chosen 1009 error level setting. 1010 """ 1011 token = token or self._curr or self._prev or Token.string("") 1012 start = token.start 1013 end = token.end + 1 1014 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1015 highlight = self.sql[start:end] 1016 end_context = self.sql[end : end + self.error_message_context] 1017 1018 error = ParseError.new( 1019 f"{message}. Line {token.line}, Col: {token.col}.\n" 1020 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1021 description=message, 1022 line=token.line, 1023 col=token.col, 1024 start_context=start_context, 1025 highlight=highlight, 1026 end_context=end_context, 1027 ) 1028 1029 if self.error_level == ErrorLevel.IMMEDIATE: 1030 raise error 1031 1032 self.errors.append(error) 1033 1034 def expression( 1035 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1036 ) -> E: 1037 """ 1038 Creates a new, validated Expression. 1039 1040 Args: 1041 exp_class: The expression class to instantiate. 1042 comments: An optional list of comments to attach to the expression. 1043 kwargs: The arguments to set for the expression along with their respective values. 1044 1045 Returns: 1046 The target expression. 1047 """ 1048 instance = exp_class(**kwargs) 1049 instance.add_comments(comments) if comments else self._add_comments(instance) 1050 return self.validate_expression(instance) 1051 1052 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1053 if expression and self._prev_comments: 1054 expression.add_comments(self._prev_comments) 1055 self._prev_comments = None 1056 1057 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1058 """ 1059 Validates an Expression, making sure that all its mandatory arguments are set. 1060 1061 Args: 1062 expression: The expression to validate. 1063 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1064 1065 Returns: 1066 The validated expression. 1067 """ 1068 if self.error_level != ErrorLevel.IGNORE: 1069 for error_message in expression.error_messages(args): 1070 self.raise_error(error_message) 1071 1072 return expression 1073 1074 def _find_sql(self, start: Token, end: Token) -> str: 1075 return self.sql[start.start : end.end + 1] 1076 1077 def _advance(self, times: int = 1) -> None: 1078 self._index += times 1079 self._curr = seq_get(self._tokens, self._index) 1080 self._next = seq_get(self._tokens, self._index + 1) 1081 1082 if self._index > 0: 1083 self._prev = self._tokens[self._index - 1] 1084 self._prev_comments = self._prev.comments 1085 else: 1086 self._prev = None 1087 self._prev_comments = None 1088 1089 def _retreat(self, index: int) -> None: 1090 if index != self._index: 1091 self._advance(index - self._index) 1092 1093 def _parse_command(self) -> exp.Command: 1094 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1095 1096 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1097 start = self._prev 1098 exists = self._parse_exists() if allow_exists else None 1099 1100 self._match(TokenType.ON) 1101 1102 kind = self._match_set(self.CREATABLES) and self._prev 1103 if not kind: 1104 return self._parse_as_command(start) 1105 1106 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1107 this = self._parse_user_defined_function(kind=kind.token_type) 1108 elif kind.token_type == TokenType.TABLE: 1109 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1110 elif kind.token_type == TokenType.COLUMN: 1111 this = self._parse_column() 1112 else: 1113 this = self._parse_id_var() 1114 1115 self._match(TokenType.IS) 1116 1117 return self.expression( 1118 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1119 ) 1120 1121 def _parse_to_table( 1122 self, 1123 ) -> exp.ToTableProperty: 1124 table = self._parse_table_parts(schema=True) 1125 return self.expression(exp.ToTableProperty, this=table) 1126 1127 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1128 def _parse_ttl(self) -> exp.Expression: 1129 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1130 this = self._parse_bitwise() 1131 1132 if self._match_text_seq("DELETE"): 1133 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1134 if self._match_text_seq("RECOMPRESS"): 1135 return self.expression( 1136 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1137 ) 1138 if self._match_text_seq("TO", "DISK"): 1139 return self.expression( 1140 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1141 ) 1142 if self._match_text_seq("TO", "VOLUME"): 1143 return self.expression( 1144 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1145 ) 1146 1147 return this 1148 1149 expressions = self._parse_csv(_parse_ttl_action) 1150 where = self._parse_where() 1151 group = self._parse_group() 1152 1153 aggregates = None 1154 if group and self._match(TokenType.SET): 1155 aggregates = self._parse_csv(self._parse_set_item) 1156 1157 return self.expression( 1158 exp.MergeTreeTTL, 1159 expressions=expressions, 1160 where=where, 1161 group=group, 1162 aggregates=aggregates, 1163 ) 1164 1165 def _parse_statement(self) -> t.Optional[exp.Expression]: 1166 if self._curr is None: 1167 return None 1168 1169 if self._match_set(self.STATEMENT_PARSERS): 1170 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1171 1172 if self._match_set(Tokenizer.COMMANDS): 1173 return self._parse_command() 1174 1175 expression = self._parse_expression() 1176 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1177 return self._parse_query_modifiers(expression) 1178 1179 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1180 start = self._prev 1181 temporary = self._match(TokenType.TEMPORARY) 1182 materialized = self._match_text_seq("MATERIALIZED") 1183 1184 kind = self._match_set(self.CREATABLES) and self._prev.text 1185 if not kind: 1186 return self._parse_as_command(start) 1187 1188 return self.expression( 1189 exp.Drop, 1190 comments=start.comments, 1191 exists=exists or self._parse_exists(), 1192 this=self._parse_table(schema=True), 1193 kind=kind, 1194 temporary=temporary, 1195 materialized=materialized, 1196 cascade=self._match_text_seq("CASCADE"), 1197 constraints=self._match_text_seq("CONSTRAINTS"), 1198 purge=self._match_text_seq("PURGE"), 1199 ) 1200 1201 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1202 return ( 1203 self._match_text_seq("IF") 1204 and (not not_ or self._match(TokenType.NOT)) 1205 and self._match(TokenType.EXISTS) 1206 ) 1207 1208 def _parse_create(self) -> exp.Create | exp.Command: 1209 # Note: this can't be None because we've matched a statement parser 1210 start = self._prev 1211 comments = self._prev_comments 1212 1213 replace = start.text.upper() == "REPLACE" or self._match_pair( 1214 TokenType.OR, TokenType.REPLACE 1215 ) 1216 unique = self._match(TokenType.UNIQUE) 1217 1218 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1219 self._advance() 1220 1221 properties = None 1222 create_token = self._match_set(self.CREATABLES) and self._prev 1223 1224 if not create_token: 1225 # exp.Properties.Location.POST_CREATE 1226 properties = self._parse_properties() 1227 create_token = self._match_set(self.CREATABLES) and self._prev 1228 1229 if not properties or not create_token: 1230 return self._parse_as_command(start) 1231 1232 exists = self._parse_exists(not_=True) 1233 this = None 1234 expression: t.Optional[exp.Expression] = None 1235 indexes = None 1236 no_schema_binding = None 1237 begin = None 1238 clone = None 1239 1240 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1241 nonlocal properties 1242 if properties and temp_props: 1243 properties.expressions.extend(temp_props.expressions) 1244 elif temp_props: 1245 properties = temp_props 1246 1247 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1248 this = self._parse_user_defined_function(kind=create_token.token_type) 1249 1250 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1251 extend_props(self._parse_properties()) 1252 1253 self._match(TokenType.ALIAS) 1254 1255 if self._match(TokenType.COMMAND): 1256 expression = self._parse_as_command(self._prev) 1257 else: 1258 begin = self._match(TokenType.BEGIN) 1259 return_ = self._match_text_seq("RETURN") 1260 expression = self._parse_statement() 1261 1262 if return_: 1263 expression = self.expression(exp.Return, this=expression) 1264 elif create_token.token_type == TokenType.INDEX: 1265 this = self._parse_index(index=self._parse_id_var()) 1266 elif create_token.token_type in self.DB_CREATABLES: 1267 table_parts = self._parse_table_parts(schema=True) 1268 1269 # exp.Properties.Location.POST_NAME 1270 self._match(TokenType.COMMA) 1271 extend_props(self._parse_properties(before=True)) 1272 1273 this = self._parse_schema(this=table_parts) 1274 1275 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1276 extend_props(self._parse_properties()) 1277 1278 self._match(TokenType.ALIAS) 1279 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1280 # exp.Properties.Location.POST_ALIAS 1281 extend_props(self._parse_properties()) 1282 1283 expression = self._parse_ddl_select() 1284 1285 if create_token.token_type == TokenType.TABLE: 1286 # exp.Properties.Location.POST_EXPRESSION 1287 extend_props(self._parse_properties()) 1288 1289 indexes = [] 1290 while True: 1291 index = self._parse_index() 1292 1293 # exp.Properties.Location.POST_INDEX 1294 extend_props(self._parse_properties()) 1295 1296 if not index: 1297 break 1298 else: 1299 self._match(TokenType.COMMA) 1300 indexes.append(index) 1301 elif create_token.token_type == TokenType.VIEW: 1302 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1303 no_schema_binding = True 1304 1305 if self._match_text_seq("CLONE"): 1306 clone = self._parse_table(schema=True) 1307 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1308 clone_kind = ( 1309 self._match(TokenType.L_PAREN) 1310 and self._match_texts(self.CLONE_KINDS) 1311 and self._prev.text.upper() 1312 ) 1313 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1314 self._match(TokenType.R_PAREN) 1315 clone = self.expression( 1316 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1317 ) 1318 1319 return self.expression( 1320 exp.Create, 1321 comments=comments, 1322 this=this, 1323 kind=create_token.text, 1324 replace=replace, 1325 unique=unique, 1326 expression=expression, 1327 exists=exists, 1328 properties=properties, 1329 indexes=indexes, 1330 no_schema_binding=no_schema_binding, 1331 begin=begin, 1332 clone=clone, 1333 ) 1334 1335 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1336 # only used for teradata currently 1337 self._match(TokenType.COMMA) 1338 1339 kwargs = { 1340 "no": self._match_text_seq("NO"), 1341 "dual": self._match_text_seq("DUAL"), 1342 "before": self._match_text_seq("BEFORE"), 1343 "default": self._match_text_seq("DEFAULT"), 1344 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1345 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1346 "after": self._match_text_seq("AFTER"), 1347 "minimum": self._match_texts(("MIN", "MINIMUM")), 1348 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1349 } 1350 1351 if self._match_texts(self.PROPERTY_PARSERS): 1352 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1353 try: 1354 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1355 except TypeError: 1356 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1357 1358 return None 1359 1360 def _parse_property(self) -> t.Optional[exp.Expression]: 1361 if self._match_texts(self.PROPERTY_PARSERS): 1362 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1363 1364 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1365 return self._parse_character_set(default=True) 1366 1367 if self._match_text_seq("COMPOUND", "SORTKEY"): 1368 return self._parse_sortkey(compound=True) 1369 1370 if self._match_text_seq("SQL", "SECURITY"): 1371 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1372 1373 assignment = self._match_pair( 1374 TokenType.VAR, TokenType.EQ, advance=False 1375 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1376 1377 if assignment: 1378 key = self._parse_var_or_string() 1379 self._match(TokenType.EQ) 1380 return self.expression( 1381 exp.Property, 1382 this=key, 1383 value=self._parse_column() or self._parse_var(any_token=True), 1384 ) 1385 1386 return None 1387 1388 def _parse_stored(self) -> exp.FileFormatProperty: 1389 self._match(TokenType.ALIAS) 1390 1391 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1392 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1393 1394 return self.expression( 1395 exp.FileFormatProperty, 1396 this=self.expression( 1397 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1398 ) 1399 if input_format or output_format 1400 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1401 ) 1402 1403 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1404 self._match(TokenType.EQ) 1405 self._match(TokenType.ALIAS) 1406 return self.expression(exp_class, this=self._parse_field()) 1407 1408 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1409 properties = [] 1410 while True: 1411 if before: 1412 prop = self._parse_property_before() 1413 else: 1414 prop = self._parse_property() 1415 1416 if not prop: 1417 break 1418 for p in ensure_list(prop): 1419 properties.append(p) 1420 1421 if properties: 1422 return self.expression(exp.Properties, expressions=properties) 1423 1424 return None 1425 1426 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1427 return self.expression( 1428 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1429 ) 1430 1431 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1432 if self._index >= 2: 1433 pre_volatile_token = self._tokens[self._index - 2] 1434 else: 1435 pre_volatile_token = None 1436 1437 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1438 return exp.VolatileProperty() 1439 1440 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1441 1442 def _parse_with_property( 1443 self, 1444 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1445 if self._match(TokenType.L_PAREN, advance=False): 1446 return self._parse_wrapped_csv(self._parse_property) 1447 1448 if self._match_text_seq("JOURNAL"): 1449 return self._parse_withjournaltable() 1450 1451 if self._match_text_seq("DATA"): 1452 return self._parse_withdata(no=False) 1453 elif self._match_text_seq("NO", "DATA"): 1454 return self._parse_withdata(no=True) 1455 1456 if not self._next: 1457 return None 1458 1459 return self._parse_withisolatedloading() 1460 1461 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1462 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1463 self._match(TokenType.EQ) 1464 1465 user = self._parse_id_var() 1466 self._match(TokenType.PARAMETER) 1467 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1468 1469 if not user or not host: 1470 return None 1471 1472 return exp.DefinerProperty(this=f"{user}@{host}") 1473 1474 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1475 self._match(TokenType.TABLE) 1476 self._match(TokenType.EQ) 1477 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1478 1479 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1480 return self.expression(exp.LogProperty, no=no) 1481 1482 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1483 return self.expression(exp.JournalProperty, **kwargs) 1484 1485 def _parse_checksum(self) -> exp.ChecksumProperty: 1486 self._match(TokenType.EQ) 1487 1488 on = None 1489 if self._match(TokenType.ON): 1490 on = True 1491 elif self._match_text_seq("OFF"): 1492 on = False 1493 1494 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1495 1496 def _parse_cluster(self) -> exp.Cluster: 1497 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1498 1499 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1500 self._match_text_seq("BY") 1501 1502 self._match_l_paren() 1503 expressions = self._parse_csv(self._parse_column) 1504 self._match_r_paren() 1505 1506 if self._match_text_seq("SORTED", "BY"): 1507 self._match_l_paren() 1508 sorted_by = self._parse_csv(self._parse_ordered) 1509 self._match_r_paren() 1510 else: 1511 sorted_by = None 1512 1513 self._match(TokenType.INTO) 1514 buckets = self._parse_number() 1515 self._match_text_seq("BUCKETS") 1516 1517 return self.expression( 1518 exp.ClusteredByProperty, 1519 expressions=expressions, 1520 sorted_by=sorted_by, 1521 buckets=buckets, 1522 ) 1523 1524 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1525 if not self._match_text_seq("GRANTS"): 1526 self._retreat(self._index - 1) 1527 return None 1528 1529 return self.expression(exp.CopyGrantsProperty) 1530 1531 def _parse_freespace(self) -> exp.FreespaceProperty: 1532 self._match(TokenType.EQ) 1533 return self.expression( 1534 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1535 ) 1536 1537 def _parse_mergeblockratio( 1538 self, no: bool = False, default: bool = False 1539 ) -> exp.MergeBlockRatioProperty: 1540 if self._match(TokenType.EQ): 1541 return self.expression( 1542 exp.MergeBlockRatioProperty, 1543 this=self._parse_number(), 1544 percent=self._match(TokenType.PERCENT), 1545 ) 1546 1547 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1548 1549 def _parse_datablocksize( 1550 self, 1551 default: t.Optional[bool] = None, 1552 minimum: t.Optional[bool] = None, 1553 maximum: t.Optional[bool] = None, 1554 ) -> exp.DataBlocksizeProperty: 1555 self._match(TokenType.EQ) 1556 size = self._parse_number() 1557 1558 units = None 1559 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1560 units = self._prev.text 1561 1562 return self.expression( 1563 exp.DataBlocksizeProperty, 1564 size=size, 1565 units=units, 1566 default=default, 1567 minimum=minimum, 1568 maximum=maximum, 1569 ) 1570 1571 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1572 self._match(TokenType.EQ) 1573 always = self._match_text_seq("ALWAYS") 1574 manual = self._match_text_seq("MANUAL") 1575 never = self._match_text_seq("NEVER") 1576 default = self._match_text_seq("DEFAULT") 1577 1578 autotemp = None 1579 if self._match_text_seq("AUTOTEMP"): 1580 autotemp = self._parse_schema() 1581 1582 return self.expression( 1583 exp.BlockCompressionProperty, 1584 always=always, 1585 manual=manual, 1586 never=never, 1587 default=default, 1588 autotemp=autotemp, 1589 ) 1590 1591 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1592 no = self._match_text_seq("NO") 1593 concurrent = self._match_text_seq("CONCURRENT") 1594 self._match_text_seq("ISOLATED", "LOADING") 1595 for_all = self._match_text_seq("FOR", "ALL") 1596 for_insert = self._match_text_seq("FOR", "INSERT") 1597 for_none = self._match_text_seq("FOR", "NONE") 1598 return self.expression( 1599 exp.IsolatedLoadingProperty, 1600 no=no, 1601 concurrent=concurrent, 1602 for_all=for_all, 1603 for_insert=for_insert, 1604 for_none=for_none, 1605 ) 1606 1607 def _parse_locking(self) -> exp.LockingProperty: 1608 if self._match(TokenType.TABLE): 1609 kind = "TABLE" 1610 elif self._match(TokenType.VIEW): 1611 kind = "VIEW" 1612 elif self._match(TokenType.ROW): 1613 kind = "ROW" 1614 elif self._match_text_seq("DATABASE"): 1615 kind = "DATABASE" 1616 else: 1617 kind = None 1618 1619 if kind in ("DATABASE", "TABLE", "VIEW"): 1620 this = self._parse_table_parts() 1621 else: 1622 this = None 1623 1624 if self._match(TokenType.FOR): 1625 for_or_in = "FOR" 1626 elif self._match(TokenType.IN): 1627 for_or_in = "IN" 1628 else: 1629 for_or_in = None 1630 1631 if self._match_text_seq("ACCESS"): 1632 lock_type = "ACCESS" 1633 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1634 lock_type = "EXCLUSIVE" 1635 elif self._match_text_seq("SHARE"): 1636 lock_type = "SHARE" 1637 elif self._match_text_seq("READ"): 1638 lock_type = "READ" 1639 elif self._match_text_seq("WRITE"): 1640 lock_type = "WRITE" 1641 elif self._match_text_seq("CHECKSUM"): 1642 lock_type = "CHECKSUM" 1643 else: 1644 lock_type = None 1645 1646 override = self._match_text_seq("OVERRIDE") 1647 1648 return self.expression( 1649 exp.LockingProperty, 1650 this=this, 1651 kind=kind, 1652 for_or_in=for_or_in, 1653 lock_type=lock_type, 1654 override=override, 1655 ) 1656 1657 def _parse_partition_by(self) -> t.List[exp.Expression]: 1658 if self._match(TokenType.PARTITION_BY): 1659 return self._parse_csv(self._parse_conjunction) 1660 return [] 1661 1662 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1663 self._match(TokenType.EQ) 1664 return self.expression( 1665 exp.PartitionedByProperty, 1666 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1667 ) 1668 1669 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1670 if self._match_text_seq("AND", "STATISTICS"): 1671 statistics = True 1672 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1673 statistics = False 1674 else: 1675 statistics = None 1676 1677 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1678 1679 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1680 if self._match_text_seq("PRIMARY", "INDEX"): 1681 return exp.NoPrimaryIndexProperty() 1682 return None 1683 1684 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1685 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1686 return exp.OnCommitProperty() 1687 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1688 return exp.OnCommitProperty(delete=True) 1689 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1690 1691 def _parse_distkey(self) -> exp.DistKeyProperty: 1692 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1693 1694 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1695 table = self._parse_table(schema=True) 1696 1697 options = [] 1698 while self._match_texts(("INCLUDING", "EXCLUDING")): 1699 this = self._prev.text.upper() 1700 1701 id_var = self._parse_id_var() 1702 if not id_var: 1703 return None 1704 1705 options.append( 1706 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1707 ) 1708 1709 return self.expression(exp.LikeProperty, this=table, expressions=options) 1710 1711 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1712 return self.expression( 1713 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1714 ) 1715 1716 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1717 self._match(TokenType.EQ) 1718 return self.expression( 1719 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1720 ) 1721 1722 def _parse_returns(self) -> exp.ReturnsProperty: 1723 value: t.Optional[exp.Expression] 1724 is_table = self._match(TokenType.TABLE) 1725 1726 if is_table: 1727 if self._match(TokenType.LT): 1728 value = self.expression( 1729 exp.Schema, 1730 this="TABLE", 1731 expressions=self._parse_csv(self._parse_struct_types), 1732 ) 1733 if not self._match(TokenType.GT): 1734 self.raise_error("Expecting >") 1735 else: 1736 value = self._parse_schema(exp.var("TABLE")) 1737 else: 1738 value = self._parse_types() 1739 1740 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1741 1742 def _parse_describe(self) -> exp.Describe: 1743 kind = self._match_set(self.CREATABLES) and self._prev.text 1744 this = self._parse_table() 1745 return self.expression(exp.Describe, this=this, kind=kind) 1746 1747 def _parse_insert(self) -> exp.Insert: 1748 comments = ensure_list(self._prev_comments) 1749 overwrite = self._match(TokenType.OVERWRITE) 1750 ignore = self._match(TokenType.IGNORE) 1751 local = self._match_text_seq("LOCAL") 1752 alternative = None 1753 1754 if self._match_text_seq("DIRECTORY"): 1755 this: t.Optional[exp.Expression] = self.expression( 1756 exp.Directory, 1757 this=self._parse_var_or_string(), 1758 local=local, 1759 row_format=self._parse_row_format(match_row=True), 1760 ) 1761 else: 1762 if self._match(TokenType.OR): 1763 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1764 1765 self._match(TokenType.INTO) 1766 comments += ensure_list(self._prev_comments) 1767 self._match(TokenType.TABLE) 1768 this = self._parse_table(schema=True) 1769 1770 returning = self._parse_returning() 1771 1772 return self.expression( 1773 exp.Insert, 1774 comments=comments, 1775 this=this, 1776 by_name=self._match_text_seq("BY", "NAME"), 1777 exists=self._parse_exists(), 1778 partition=self._parse_partition(), 1779 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1780 and self._parse_conjunction(), 1781 expression=self._parse_ddl_select(), 1782 conflict=self._parse_on_conflict(), 1783 returning=returning or self._parse_returning(), 1784 overwrite=overwrite, 1785 alternative=alternative, 1786 ignore=ignore, 1787 ) 1788 1789 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1790 conflict = self._match_text_seq("ON", "CONFLICT") 1791 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1792 1793 if not conflict and not duplicate: 1794 return None 1795 1796 nothing = None 1797 expressions = None 1798 key = None 1799 constraint = None 1800 1801 if conflict: 1802 if self._match_text_seq("ON", "CONSTRAINT"): 1803 constraint = self._parse_id_var() 1804 else: 1805 key = self._parse_csv(self._parse_value) 1806 1807 self._match_text_seq("DO") 1808 if self._match_text_seq("NOTHING"): 1809 nothing = True 1810 else: 1811 self._match(TokenType.UPDATE) 1812 self._match(TokenType.SET) 1813 expressions = self._parse_csv(self._parse_equality) 1814 1815 return self.expression( 1816 exp.OnConflict, 1817 duplicate=duplicate, 1818 expressions=expressions, 1819 nothing=nothing, 1820 key=key, 1821 constraint=constraint, 1822 ) 1823 1824 def _parse_returning(self) -> t.Optional[exp.Returning]: 1825 if not self._match(TokenType.RETURNING): 1826 return None 1827 return self.expression( 1828 exp.Returning, 1829 expressions=self._parse_csv(self._parse_expression), 1830 into=self._match(TokenType.INTO) and self._parse_table_part(), 1831 ) 1832 1833 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1834 if not self._match(TokenType.FORMAT): 1835 return None 1836 return self._parse_row_format() 1837 1838 def _parse_row_format( 1839 self, match_row: bool = False 1840 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1841 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1842 return None 1843 1844 if self._match_text_seq("SERDE"): 1845 this = self._parse_string() 1846 1847 serde_properties = None 1848 if self._match(TokenType.SERDE_PROPERTIES): 1849 serde_properties = self.expression( 1850 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1851 ) 1852 1853 return self.expression( 1854 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1855 ) 1856 1857 self._match_text_seq("DELIMITED") 1858 1859 kwargs = {} 1860 1861 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1862 kwargs["fields"] = self._parse_string() 1863 if self._match_text_seq("ESCAPED", "BY"): 1864 kwargs["escaped"] = self._parse_string() 1865 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1866 kwargs["collection_items"] = self._parse_string() 1867 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1868 kwargs["map_keys"] = self._parse_string() 1869 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1870 kwargs["lines"] = self._parse_string() 1871 if self._match_text_seq("NULL", "DEFINED", "AS"): 1872 kwargs["null"] = self._parse_string() 1873 1874 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1875 1876 def _parse_load(self) -> exp.LoadData | exp.Command: 1877 if self._match_text_seq("DATA"): 1878 local = self._match_text_seq("LOCAL") 1879 self._match_text_seq("INPATH") 1880 inpath = self._parse_string() 1881 overwrite = self._match(TokenType.OVERWRITE) 1882 self._match_pair(TokenType.INTO, TokenType.TABLE) 1883 1884 return self.expression( 1885 exp.LoadData, 1886 this=self._parse_table(schema=True), 1887 local=local, 1888 overwrite=overwrite, 1889 inpath=inpath, 1890 partition=self._parse_partition(), 1891 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1892 serde=self._match_text_seq("SERDE") and self._parse_string(), 1893 ) 1894 return self._parse_as_command(self._prev) 1895 1896 def _parse_delete(self) -> exp.Delete: 1897 # This handles MySQL's "Multiple-Table Syntax" 1898 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1899 tables = None 1900 comments = self._prev_comments 1901 if not self._match(TokenType.FROM, advance=False): 1902 tables = self._parse_csv(self._parse_table) or None 1903 1904 returning = self._parse_returning() 1905 1906 return self.expression( 1907 exp.Delete, 1908 comments=comments, 1909 tables=tables, 1910 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1911 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1912 where=self._parse_where(), 1913 returning=returning or self._parse_returning(), 1914 limit=self._parse_limit(), 1915 ) 1916 1917 def _parse_update(self) -> exp.Update: 1918 comments = self._prev_comments 1919 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1920 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1921 returning = self._parse_returning() 1922 return self.expression( 1923 exp.Update, 1924 comments=comments, 1925 **{ # type: ignore 1926 "this": this, 1927 "expressions": expressions, 1928 "from": self._parse_from(joins=True), 1929 "where": self._parse_where(), 1930 "returning": returning or self._parse_returning(), 1931 "limit": self._parse_limit(), 1932 }, 1933 ) 1934 1935 def _parse_uncache(self) -> exp.Uncache: 1936 if not self._match(TokenType.TABLE): 1937 self.raise_error("Expecting TABLE after UNCACHE") 1938 1939 return self.expression( 1940 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1941 ) 1942 1943 def _parse_cache(self) -> exp.Cache: 1944 lazy = self._match_text_seq("LAZY") 1945 self._match(TokenType.TABLE) 1946 table = self._parse_table(schema=True) 1947 1948 options = [] 1949 if self._match_text_seq("OPTIONS"): 1950 self._match_l_paren() 1951 k = self._parse_string() 1952 self._match(TokenType.EQ) 1953 v = self._parse_string() 1954 options = [k, v] 1955 self._match_r_paren() 1956 1957 self._match(TokenType.ALIAS) 1958 return self.expression( 1959 exp.Cache, 1960 this=table, 1961 lazy=lazy, 1962 options=options, 1963 expression=self._parse_select(nested=True), 1964 ) 1965 1966 def _parse_partition(self) -> t.Optional[exp.Partition]: 1967 if not self._match(TokenType.PARTITION): 1968 return None 1969 1970 return self.expression( 1971 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1972 ) 1973 1974 def _parse_value(self) -> exp.Tuple: 1975 if self._match(TokenType.L_PAREN): 1976 expressions = self._parse_csv(self._parse_conjunction) 1977 self._match_r_paren() 1978 return self.expression(exp.Tuple, expressions=expressions) 1979 1980 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1981 # https://prestodb.io/docs/current/sql/values.html 1982 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1983 1984 def _parse_projections(self) -> t.List[exp.Expression]: 1985 return self._parse_expressions() 1986 1987 def _parse_select( 1988 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1989 ) -> t.Optional[exp.Expression]: 1990 cte = self._parse_with() 1991 1992 if cte: 1993 this = self._parse_statement() 1994 1995 if not this: 1996 self.raise_error("Failed to parse any statement following CTE") 1997 return cte 1998 1999 if "with" in this.arg_types: 2000 this.set("with", cte) 2001 else: 2002 self.raise_error(f"{this.key} does not support CTE") 2003 this = cte 2004 2005 return this 2006 2007 # duckdb supports leading with FROM x 2008 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2009 2010 if self._match(TokenType.SELECT): 2011 comments = self._prev_comments 2012 2013 hint = self._parse_hint() 2014 all_ = self._match(TokenType.ALL) 2015 distinct = self._match_set(self.DISTINCT_TOKENS) 2016 2017 kind = ( 2018 self._match(TokenType.ALIAS) 2019 and self._match_texts(("STRUCT", "VALUE")) 2020 and self._prev.text 2021 ) 2022 2023 if distinct: 2024 distinct = self.expression( 2025 exp.Distinct, 2026 on=self._parse_value() if self._match(TokenType.ON) else None, 2027 ) 2028 2029 if all_ and distinct: 2030 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2031 2032 limit = self._parse_limit(top=True) 2033 projections = self._parse_projections() 2034 2035 this = self.expression( 2036 exp.Select, 2037 kind=kind, 2038 hint=hint, 2039 distinct=distinct, 2040 expressions=projections, 2041 limit=limit, 2042 ) 2043 this.comments = comments 2044 2045 into = self._parse_into() 2046 if into: 2047 this.set("into", into) 2048 2049 if not from_: 2050 from_ = self._parse_from() 2051 2052 if from_: 2053 this.set("from", from_) 2054 2055 this = self._parse_query_modifiers(this) 2056 elif (table or nested) and self._match(TokenType.L_PAREN): 2057 if self._match(TokenType.PIVOT): 2058 this = self._parse_simplified_pivot() 2059 elif self._match(TokenType.FROM): 2060 this = exp.select("*").from_( 2061 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2062 ) 2063 else: 2064 this = self._parse_table() if table else self._parse_select(nested=True) 2065 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2066 2067 self._match_r_paren() 2068 2069 # We return early here so that the UNION isn't attached to the subquery by the 2070 # following call to _parse_set_operations, but instead becomes the parent node 2071 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2072 elif self._match(TokenType.VALUES): 2073 this = self.expression( 2074 exp.Values, 2075 expressions=self._parse_csv(self._parse_value), 2076 alias=self._parse_table_alias(), 2077 ) 2078 elif from_: 2079 this = exp.select("*").from_(from_.this, copy=False) 2080 else: 2081 this = None 2082 2083 return self._parse_set_operations(this) 2084 2085 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2086 if not skip_with_token and not self._match(TokenType.WITH): 2087 return None 2088 2089 comments = self._prev_comments 2090 recursive = self._match(TokenType.RECURSIVE) 2091 2092 expressions = [] 2093 while True: 2094 expressions.append(self._parse_cte()) 2095 2096 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2097 break 2098 else: 2099 self._match(TokenType.WITH) 2100 2101 return self.expression( 2102 exp.With, comments=comments, expressions=expressions, recursive=recursive 2103 ) 2104 2105 def _parse_cte(self) -> exp.CTE: 2106 alias = self._parse_table_alias() 2107 if not alias or not alias.this: 2108 self.raise_error("Expected CTE to have alias") 2109 2110 self._match(TokenType.ALIAS) 2111 return self.expression( 2112 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2113 ) 2114 2115 def _parse_table_alias( 2116 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2117 ) -> t.Optional[exp.TableAlias]: 2118 any_token = self._match(TokenType.ALIAS) 2119 alias = ( 2120 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2121 or self._parse_string_as_identifier() 2122 ) 2123 2124 index = self._index 2125 if self._match(TokenType.L_PAREN): 2126 columns = self._parse_csv(self._parse_function_parameter) 2127 self._match_r_paren() if columns else self._retreat(index) 2128 else: 2129 columns = None 2130 2131 if not alias and not columns: 2132 return None 2133 2134 return self.expression(exp.TableAlias, this=alias, columns=columns) 2135 2136 def _parse_subquery( 2137 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2138 ) -> t.Optional[exp.Subquery]: 2139 if not this: 2140 return None 2141 2142 return self.expression( 2143 exp.Subquery, 2144 this=this, 2145 pivots=self._parse_pivots(), 2146 alias=self._parse_table_alias() if parse_alias else None, 2147 ) 2148 2149 def _parse_query_modifiers( 2150 self, this: t.Optional[exp.Expression] 2151 ) -> t.Optional[exp.Expression]: 2152 if isinstance(this, self.MODIFIABLES): 2153 for join in iter(self._parse_join, None): 2154 this.append("joins", join) 2155 for lateral in iter(self._parse_lateral, None): 2156 this.append("laterals", lateral) 2157 2158 while True: 2159 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2160 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2161 key, expression = parser(self) 2162 2163 if expression: 2164 this.set(key, expression) 2165 if key == "limit": 2166 offset = expression.args.pop("offset", None) 2167 if offset: 2168 this.set("offset", exp.Offset(expression=offset)) 2169 continue 2170 break 2171 return this 2172 2173 def _parse_hint(self) -> t.Optional[exp.Hint]: 2174 if self._match(TokenType.HINT): 2175 hints = [] 2176 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2177 hints.extend(hint) 2178 2179 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2180 self.raise_error("Expected */ after HINT") 2181 2182 return self.expression(exp.Hint, expressions=hints) 2183 2184 return None 2185 2186 def _parse_into(self) -> t.Optional[exp.Into]: 2187 if not self._match(TokenType.INTO): 2188 return None 2189 2190 temp = self._match(TokenType.TEMPORARY) 2191 unlogged = self._match_text_seq("UNLOGGED") 2192 self._match(TokenType.TABLE) 2193 2194 return self.expression( 2195 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2196 ) 2197 2198 def _parse_from( 2199 self, joins: bool = False, skip_from_token: bool = False 2200 ) -> t.Optional[exp.From]: 2201 if not skip_from_token and not self._match(TokenType.FROM): 2202 return None 2203 2204 return self.expression( 2205 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2206 ) 2207 2208 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2209 if not self._match(TokenType.MATCH_RECOGNIZE): 2210 return None 2211 2212 self._match_l_paren() 2213 2214 partition = self._parse_partition_by() 2215 order = self._parse_order() 2216 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2217 2218 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2219 rows = exp.var("ONE ROW PER MATCH") 2220 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2221 text = "ALL ROWS PER MATCH" 2222 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2223 text += f" SHOW EMPTY MATCHES" 2224 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2225 text += f" OMIT EMPTY MATCHES" 2226 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2227 text += f" WITH UNMATCHED ROWS" 2228 rows = exp.var(text) 2229 else: 2230 rows = None 2231 2232 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2233 text = "AFTER MATCH SKIP" 2234 if self._match_text_seq("PAST", "LAST", "ROW"): 2235 text += f" PAST LAST ROW" 2236 elif self._match_text_seq("TO", "NEXT", "ROW"): 2237 text += f" TO NEXT ROW" 2238 elif self._match_text_seq("TO", "FIRST"): 2239 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2240 elif self._match_text_seq("TO", "LAST"): 2241 text += f" TO LAST {self._advance_any().text}" # type: ignore 2242 after = exp.var(text) 2243 else: 2244 after = None 2245 2246 if self._match_text_seq("PATTERN"): 2247 self._match_l_paren() 2248 2249 if not self._curr: 2250 self.raise_error("Expecting )", self._curr) 2251 2252 paren = 1 2253 start = self._curr 2254 2255 while self._curr and paren > 0: 2256 if self._curr.token_type == TokenType.L_PAREN: 2257 paren += 1 2258 if self._curr.token_type == TokenType.R_PAREN: 2259 paren -= 1 2260 2261 end = self._prev 2262 self._advance() 2263 2264 if paren > 0: 2265 self.raise_error("Expecting )", self._curr) 2266 2267 pattern = exp.var(self._find_sql(start, end)) 2268 else: 2269 pattern = None 2270 2271 define = ( 2272 self._parse_csv( 2273 lambda: self.expression( 2274 exp.Alias, 2275 alias=self._parse_id_var(any_token=True), 2276 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2277 ) 2278 ) 2279 if self._match_text_seq("DEFINE") 2280 else None 2281 ) 2282 2283 self._match_r_paren() 2284 2285 return self.expression( 2286 exp.MatchRecognize, 2287 partition_by=partition, 2288 order=order, 2289 measures=measures, 2290 rows=rows, 2291 after=after, 2292 pattern=pattern, 2293 define=define, 2294 alias=self._parse_table_alias(), 2295 ) 2296 2297 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2298 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2299 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2300 2301 if outer_apply or cross_apply: 2302 this = self._parse_select(table=True) 2303 view = None 2304 outer = not cross_apply 2305 elif self._match(TokenType.LATERAL): 2306 this = self._parse_select(table=True) 2307 view = self._match(TokenType.VIEW) 2308 outer = self._match(TokenType.OUTER) 2309 else: 2310 return None 2311 2312 if not this: 2313 this = ( 2314 self._parse_unnest() 2315 or self._parse_function() 2316 or self._parse_id_var(any_token=False) 2317 ) 2318 2319 while self._match(TokenType.DOT): 2320 this = exp.Dot( 2321 this=this, 2322 expression=self._parse_function() or self._parse_id_var(any_token=False), 2323 ) 2324 2325 if view: 2326 table = self._parse_id_var(any_token=False) 2327 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2328 table_alias: t.Optional[exp.TableAlias] = self.expression( 2329 exp.TableAlias, this=table, columns=columns 2330 ) 2331 elif isinstance(this, exp.Subquery) and this.alias: 2332 # Ensures parity between the Subquery's and the Lateral's "alias" args 2333 table_alias = this.args["alias"].copy() 2334 else: 2335 table_alias = self._parse_table_alias() 2336 2337 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2338 2339 def _parse_join_parts( 2340 self, 2341 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2342 return ( 2343 self._match_set(self.JOIN_METHODS) and self._prev, 2344 self._match_set(self.JOIN_SIDES) and self._prev, 2345 self._match_set(self.JOIN_KINDS) and self._prev, 2346 ) 2347 2348 def _parse_join( 2349 self, skip_join_token: bool = False, parse_bracket: bool = False 2350 ) -> t.Optional[exp.Join]: 2351 if self._match(TokenType.COMMA): 2352 return self.expression(exp.Join, this=self._parse_table()) 2353 2354 index = self._index 2355 method, side, kind = self._parse_join_parts() 2356 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2357 join = self._match(TokenType.JOIN) 2358 2359 if not skip_join_token and not join: 2360 self._retreat(index) 2361 kind = None 2362 method = None 2363 side = None 2364 2365 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2366 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2367 2368 if not skip_join_token and not join and not outer_apply and not cross_apply: 2369 return None 2370 2371 if outer_apply: 2372 side = Token(TokenType.LEFT, "LEFT") 2373 2374 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2375 2376 if method: 2377 kwargs["method"] = method.text 2378 if side: 2379 kwargs["side"] = side.text 2380 if kind: 2381 kwargs["kind"] = kind.text 2382 if hint: 2383 kwargs["hint"] = hint 2384 2385 if self._match(TokenType.ON): 2386 kwargs["on"] = self._parse_conjunction() 2387 elif self._match(TokenType.USING): 2388 kwargs["using"] = self._parse_wrapped_id_vars() 2389 elif not (kind and kind.token_type == TokenType.CROSS): 2390 index = self._index 2391 joins = self._parse_joins() 2392 2393 if joins and self._match(TokenType.ON): 2394 kwargs["on"] = self._parse_conjunction() 2395 elif joins and self._match(TokenType.USING): 2396 kwargs["using"] = self._parse_wrapped_id_vars() 2397 else: 2398 joins = None 2399 self._retreat(index) 2400 2401 kwargs["this"].set("joins", joins) 2402 2403 comments = [c for token in (method, side, kind) if token for c in token.comments] 2404 return self.expression(exp.Join, comments=comments, **kwargs) 2405 2406 def _parse_index( 2407 self, 2408 index: t.Optional[exp.Expression] = None, 2409 ) -> t.Optional[exp.Index]: 2410 if index: 2411 unique = None 2412 primary = None 2413 amp = None 2414 2415 self._match(TokenType.ON) 2416 self._match(TokenType.TABLE) # hive 2417 table = self._parse_table_parts(schema=True) 2418 else: 2419 unique = self._match(TokenType.UNIQUE) 2420 primary = self._match_text_seq("PRIMARY") 2421 amp = self._match_text_seq("AMP") 2422 2423 if not self._match(TokenType.INDEX): 2424 return None 2425 2426 index = self._parse_id_var() 2427 table = None 2428 2429 using = self._parse_field() if self._match(TokenType.USING) else None 2430 2431 if self._match(TokenType.L_PAREN, advance=False): 2432 columns = self._parse_wrapped_csv(self._parse_ordered) 2433 else: 2434 columns = None 2435 2436 return self.expression( 2437 exp.Index, 2438 this=index, 2439 table=table, 2440 using=using, 2441 columns=columns, 2442 unique=unique, 2443 primary=primary, 2444 amp=amp, 2445 partition_by=self._parse_partition_by(), 2446 ) 2447 2448 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2449 hints: t.List[exp.Expression] = [] 2450 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2451 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2452 hints.append( 2453 self.expression( 2454 exp.WithTableHint, 2455 expressions=self._parse_csv( 2456 lambda: self._parse_function() or self._parse_var(any_token=True) 2457 ), 2458 ) 2459 ) 2460 self._match_r_paren() 2461 else: 2462 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2463 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2464 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2465 2466 self._match_texts({"INDEX", "KEY"}) 2467 if self._match(TokenType.FOR): 2468 hint.set("target", self._advance_any() and self._prev.text.upper()) 2469 2470 hint.set("expressions", self._parse_wrapped_id_vars()) 2471 hints.append(hint) 2472 2473 return hints or None 2474 2475 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2476 return ( 2477 (not schema and self._parse_function(optional_parens=False)) 2478 or self._parse_id_var(any_token=False) 2479 or self._parse_string_as_identifier() 2480 or self._parse_placeholder() 2481 ) 2482 2483 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2484 catalog = None 2485 db = None 2486 table = self._parse_table_part(schema=schema) 2487 2488 while self._match(TokenType.DOT): 2489 if catalog: 2490 # This allows nesting the table in arbitrarily many dot expressions if needed 2491 table = self.expression( 2492 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2493 ) 2494 else: 2495 catalog = db 2496 db = table 2497 table = self._parse_table_part(schema=schema) 2498 2499 if not table: 2500 self.raise_error(f"Expected table name but got {self._curr}") 2501 2502 return self.expression( 2503 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2504 ) 2505 2506 def _parse_table( 2507 self, 2508 schema: bool = False, 2509 joins: bool = False, 2510 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2511 parse_bracket: bool = False, 2512 ) -> t.Optional[exp.Expression]: 2513 lateral = self._parse_lateral() 2514 if lateral: 2515 return lateral 2516 2517 unnest = self._parse_unnest() 2518 if unnest: 2519 return unnest 2520 2521 values = self._parse_derived_table_values() 2522 if values: 2523 return values 2524 2525 subquery = self._parse_select(table=True) 2526 if subquery: 2527 if not subquery.args.get("pivots"): 2528 subquery.set("pivots", self._parse_pivots()) 2529 return subquery 2530 2531 bracket = parse_bracket and self._parse_bracket(None) 2532 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2533 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2534 2535 if schema: 2536 return self._parse_schema(this=this) 2537 2538 if self.ALIAS_POST_TABLESAMPLE: 2539 table_sample = self._parse_table_sample() 2540 2541 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2542 if alias: 2543 this.set("alias", alias) 2544 2545 if not this.args.get("pivots"): 2546 this.set("pivots", self._parse_pivots()) 2547 2548 this.set("hints", self._parse_table_hints()) 2549 2550 if not self.ALIAS_POST_TABLESAMPLE: 2551 table_sample = self._parse_table_sample() 2552 2553 if table_sample: 2554 table_sample.set("this", this) 2555 this = table_sample 2556 2557 if joins: 2558 for join in iter(self._parse_join, None): 2559 this.append("joins", join) 2560 2561 return this 2562 2563 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2564 if not self._match(TokenType.UNNEST): 2565 return None 2566 2567 expressions = self._parse_wrapped_csv(self._parse_type) 2568 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2569 2570 alias = self._parse_table_alias() if with_alias else None 2571 2572 if alias and self.UNNEST_COLUMN_ONLY: 2573 if alias.args.get("columns"): 2574 self.raise_error("Unexpected extra column alias in unnest.") 2575 2576 alias.set("columns", [alias.this]) 2577 alias.set("this", None) 2578 2579 offset = None 2580 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2581 self._match(TokenType.ALIAS) 2582 offset = self._parse_id_var() or exp.to_identifier("offset") 2583 2584 return self.expression( 2585 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2586 ) 2587 2588 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2589 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2590 if not is_derived and not self._match(TokenType.VALUES): 2591 return None 2592 2593 expressions = self._parse_csv(self._parse_value) 2594 alias = self._parse_table_alias() 2595 2596 if is_derived: 2597 self._match_r_paren() 2598 2599 return self.expression( 2600 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2601 ) 2602 2603 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2604 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2605 as_modifier and self._match_text_seq("USING", "SAMPLE") 2606 ): 2607 return None 2608 2609 bucket_numerator = None 2610 bucket_denominator = None 2611 bucket_field = None 2612 percent = None 2613 rows = None 2614 size = None 2615 seed = None 2616 2617 kind = ( 2618 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2619 ) 2620 method = self._parse_var(tokens=(TokenType.ROW,)) 2621 2622 self._match(TokenType.L_PAREN) 2623 2624 num = self._parse_number() 2625 2626 if self._match_text_seq("BUCKET"): 2627 bucket_numerator = self._parse_number() 2628 self._match_text_seq("OUT", "OF") 2629 bucket_denominator = bucket_denominator = self._parse_number() 2630 self._match(TokenType.ON) 2631 bucket_field = self._parse_field() 2632 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2633 percent = num 2634 elif self._match(TokenType.ROWS): 2635 rows = num 2636 else: 2637 size = num 2638 2639 self._match(TokenType.R_PAREN) 2640 2641 if self._match(TokenType.L_PAREN): 2642 method = self._parse_var() 2643 seed = self._match(TokenType.COMMA) and self._parse_number() 2644 self._match_r_paren() 2645 elif self._match_texts(("SEED", "REPEATABLE")): 2646 seed = self._parse_wrapped(self._parse_number) 2647 2648 return self.expression( 2649 exp.TableSample, 2650 method=method, 2651 bucket_numerator=bucket_numerator, 2652 bucket_denominator=bucket_denominator, 2653 bucket_field=bucket_field, 2654 percent=percent, 2655 rows=rows, 2656 size=size, 2657 seed=seed, 2658 kind=kind, 2659 ) 2660 2661 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2662 return list(iter(self._parse_pivot, None)) or None 2663 2664 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2665 return list(iter(self._parse_join, None)) or None 2666 2667 # https://duckdb.org/docs/sql/statements/pivot 2668 def _parse_simplified_pivot(self) -> exp.Pivot: 2669 def _parse_on() -> t.Optional[exp.Expression]: 2670 this = self._parse_bitwise() 2671 return self._parse_in(this) if self._match(TokenType.IN) else this 2672 2673 this = self._parse_table() 2674 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2675 using = self._match(TokenType.USING) and self._parse_csv( 2676 lambda: self._parse_alias(self._parse_function()) 2677 ) 2678 group = self._parse_group() 2679 return self.expression( 2680 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2681 ) 2682 2683 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2684 index = self._index 2685 include_nulls = None 2686 2687 if self._match(TokenType.PIVOT): 2688 unpivot = False 2689 elif self._match(TokenType.UNPIVOT): 2690 unpivot = True 2691 2692 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2693 if self._match_text_seq("INCLUDE", "NULLS"): 2694 include_nulls = True 2695 elif self._match_text_seq("EXCLUDE", "NULLS"): 2696 include_nulls = False 2697 else: 2698 return None 2699 2700 expressions = [] 2701 field = None 2702 2703 if not self._match(TokenType.L_PAREN): 2704 self._retreat(index) 2705 return None 2706 2707 if unpivot: 2708 expressions = self._parse_csv(self._parse_column) 2709 else: 2710 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2711 2712 if not expressions: 2713 self.raise_error("Failed to parse PIVOT's aggregation list") 2714 2715 if not self._match(TokenType.FOR): 2716 self.raise_error("Expecting FOR") 2717 2718 value = self._parse_column() 2719 2720 if not self._match(TokenType.IN): 2721 self.raise_error("Expecting IN") 2722 2723 field = self._parse_in(value, alias=True) 2724 2725 self._match_r_paren() 2726 2727 pivot = self.expression( 2728 exp.Pivot, 2729 expressions=expressions, 2730 field=field, 2731 unpivot=unpivot, 2732 include_nulls=include_nulls, 2733 ) 2734 2735 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2736 pivot.set("alias", self._parse_table_alias()) 2737 2738 if not unpivot: 2739 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2740 2741 columns: t.List[exp.Expression] = [] 2742 for fld in pivot.args["field"].expressions: 2743 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2744 for name in names: 2745 if self.PREFIXED_PIVOT_COLUMNS: 2746 name = f"{name}_{field_name}" if name else field_name 2747 else: 2748 name = f"{field_name}_{name}" if name else field_name 2749 2750 columns.append(exp.to_identifier(name)) 2751 2752 pivot.set("columns", columns) 2753 2754 return pivot 2755 2756 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2757 return [agg.alias for agg in aggregations] 2758 2759 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2760 if not skip_where_token and not self._match(TokenType.WHERE): 2761 return None 2762 2763 return self.expression( 2764 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2765 ) 2766 2767 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2768 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2769 return None 2770 2771 elements = defaultdict(list) 2772 2773 if self._match(TokenType.ALL): 2774 return self.expression(exp.Group, all=True) 2775 2776 while True: 2777 expressions = self._parse_csv(self._parse_conjunction) 2778 if expressions: 2779 elements["expressions"].extend(expressions) 2780 2781 grouping_sets = self._parse_grouping_sets() 2782 if grouping_sets: 2783 elements["grouping_sets"].extend(grouping_sets) 2784 2785 rollup = None 2786 cube = None 2787 totals = None 2788 2789 with_ = self._match(TokenType.WITH) 2790 if self._match(TokenType.ROLLUP): 2791 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2792 elements["rollup"].extend(ensure_list(rollup)) 2793 2794 if self._match(TokenType.CUBE): 2795 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2796 elements["cube"].extend(ensure_list(cube)) 2797 2798 if self._match_text_seq("TOTALS"): 2799 totals = True 2800 elements["totals"] = True # type: ignore 2801 2802 if not (grouping_sets or rollup or cube or totals): 2803 break 2804 2805 return self.expression(exp.Group, **elements) # type: ignore 2806 2807 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2808 if not self._match(TokenType.GROUPING_SETS): 2809 return None 2810 2811 return self._parse_wrapped_csv(self._parse_grouping_set) 2812 2813 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2814 if self._match(TokenType.L_PAREN): 2815 grouping_set = self._parse_csv(self._parse_column) 2816 self._match_r_paren() 2817 return self.expression(exp.Tuple, expressions=grouping_set) 2818 2819 return self._parse_column() 2820 2821 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2822 if not skip_having_token and not self._match(TokenType.HAVING): 2823 return None 2824 return self.expression(exp.Having, this=self._parse_conjunction()) 2825 2826 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2827 if not self._match(TokenType.QUALIFY): 2828 return None 2829 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2830 2831 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2832 if skip_start_token: 2833 start = None 2834 elif self._match(TokenType.START_WITH): 2835 start = self._parse_conjunction() 2836 else: 2837 return None 2838 2839 self._match(TokenType.CONNECT_BY) 2840 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2841 exp.Prior, this=self._parse_bitwise() 2842 ) 2843 connect = self._parse_conjunction() 2844 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2845 return self.expression(exp.Connect, start=start, connect=connect) 2846 2847 def _parse_order( 2848 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2849 ) -> t.Optional[exp.Expression]: 2850 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2851 return this 2852 2853 return self.expression( 2854 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2855 ) 2856 2857 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2858 if not self._match(token): 2859 return None 2860 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2861 2862 def _parse_ordered(self) -> exp.Ordered: 2863 this = self._parse_conjunction() 2864 self._match(TokenType.ASC) 2865 2866 is_desc = self._match(TokenType.DESC) 2867 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2868 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2869 desc = is_desc or False 2870 asc = not desc 2871 nulls_first = is_nulls_first or False 2872 explicitly_null_ordered = is_nulls_first or is_nulls_last 2873 2874 if ( 2875 not explicitly_null_ordered 2876 and ( 2877 (asc and self.NULL_ORDERING == "nulls_are_small") 2878 or (desc and self.NULL_ORDERING != "nulls_are_small") 2879 ) 2880 and self.NULL_ORDERING != "nulls_are_last" 2881 ): 2882 nulls_first = True 2883 2884 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2885 2886 def _parse_limit( 2887 self, this: t.Optional[exp.Expression] = None, top: bool = False 2888 ) -> t.Optional[exp.Expression]: 2889 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2890 comments = self._prev_comments 2891 if top: 2892 limit_paren = self._match(TokenType.L_PAREN) 2893 expression = self._parse_number() 2894 2895 if limit_paren: 2896 self._match_r_paren() 2897 else: 2898 expression = self._parse_term() 2899 2900 if self._match(TokenType.COMMA): 2901 offset = expression 2902 expression = self._parse_term() 2903 else: 2904 offset = None 2905 2906 limit_exp = self.expression( 2907 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2908 ) 2909 2910 return limit_exp 2911 2912 if self._match(TokenType.FETCH): 2913 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2914 direction = self._prev.text if direction else "FIRST" 2915 2916 count = self._parse_number() 2917 percent = self._match(TokenType.PERCENT) 2918 2919 self._match_set((TokenType.ROW, TokenType.ROWS)) 2920 2921 only = self._match_text_seq("ONLY") 2922 with_ties = self._match_text_seq("WITH", "TIES") 2923 2924 if only and with_ties: 2925 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2926 2927 return self.expression( 2928 exp.Fetch, 2929 direction=direction, 2930 count=count, 2931 percent=percent, 2932 with_ties=with_ties, 2933 ) 2934 2935 return this 2936 2937 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2938 if not self._match(TokenType.OFFSET): 2939 return this 2940 2941 count = self._parse_term() 2942 self._match_set((TokenType.ROW, TokenType.ROWS)) 2943 return self.expression(exp.Offset, this=this, expression=count) 2944 2945 def _parse_locks(self) -> t.List[exp.Lock]: 2946 locks = [] 2947 while True: 2948 if self._match_text_seq("FOR", "UPDATE"): 2949 update = True 2950 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2951 "LOCK", "IN", "SHARE", "MODE" 2952 ): 2953 update = False 2954 else: 2955 break 2956 2957 expressions = None 2958 if self._match_text_seq("OF"): 2959 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2960 2961 wait: t.Optional[bool | exp.Expression] = None 2962 if self._match_text_seq("NOWAIT"): 2963 wait = True 2964 elif self._match_text_seq("WAIT"): 2965 wait = self._parse_primary() 2966 elif self._match_text_seq("SKIP", "LOCKED"): 2967 wait = False 2968 2969 locks.append( 2970 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2971 ) 2972 2973 return locks 2974 2975 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2976 if not self._match_set(self.SET_OPERATIONS): 2977 return this 2978 2979 token_type = self._prev.token_type 2980 2981 if token_type == TokenType.UNION: 2982 expression = exp.Union 2983 elif token_type == TokenType.EXCEPT: 2984 expression = exp.Except 2985 else: 2986 expression = exp.Intersect 2987 2988 return self.expression( 2989 expression, 2990 this=this, 2991 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2992 by_name=self._match_text_seq("BY", "NAME"), 2993 expression=self._parse_set_operations(self._parse_select(nested=True)), 2994 ) 2995 2996 def _parse_expression(self) -> t.Optional[exp.Expression]: 2997 return self._parse_alias(self._parse_conjunction()) 2998 2999 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3000 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3001 3002 def _parse_equality(self) -> t.Optional[exp.Expression]: 3003 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3004 3005 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3006 return self._parse_tokens(self._parse_range, self.COMPARISON) 3007 3008 def _parse_range(self) -> t.Optional[exp.Expression]: 3009 this = self._parse_bitwise() 3010 negate = self._match(TokenType.NOT) 3011 3012 if self._match_set(self.RANGE_PARSERS): 3013 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3014 if not expression: 3015 return this 3016 3017 this = expression 3018 elif self._match(TokenType.ISNULL): 3019 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3020 3021 # Postgres supports ISNULL and NOTNULL for conditions. 3022 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3023 if self._match(TokenType.NOTNULL): 3024 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3025 this = self.expression(exp.Not, this=this) 3026 3027 if negate: 3028 this = self.expression(exp.Not, this=this) 3029 3030 if self._match(TokenType.IS): 3031 this = self._parse_is(this) 3032 3033 return this 3034 3035 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3036 index = self._index - 1 3037 negate = self._match(TokenType.NOT) 3038 3039 if self._match_text_seq("DISTINCT", "FROM"): 3040 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3041 return self.expression(klass, this=this, expression=self._parse_expression()) 3042 3043 expression = self._parse_null() or self._parse_boolean() 3044 if not expression: 3045 self._retreat(index) 3046 return None 3047 3048 this = self.expression(exp.Is, this=this, expression=expression) 3049 return self.expression(exp.Not, this=this) if negate else this 3050 3051 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3052 unnest = self._parse_unnest(with_alias=False) 3053 if unnest: 3054 this = self.expression(exp.In, this=this, unnest=unnest) 3055 elif self._match(TokenType.L_PAREN): 3056 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3057 3058 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3059 this = self.expression(exp.In, this=this, query=expressions[0]) 3060 else: 3061 this = self.expression(exp.In, this=this, expressions=expressions) 3062 3063 self._match_r_paren(this) 3064 else: 3065 this = self.expression(exp.In, this=this, field=self._parse_field()) 3066 3067 return this 3068 3069 def _parse_between(self, this: exp.Expression) -> exp.Between: 3070 low = self._parse_bitwise() 3071 self._match(TokenType.AND) 3072 high = self._parse_bitwise() 3073 return self.expression(exp.Between, this=this, low=low, high=high) 3074 3075 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3076 if not self._match(TokenType.ESCAPE): 3077 return this 3078 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3079 3080 def _parse_interval(self) -> t.Optional[exp.Interval]: 3081 index = self._index 3082 3083 if not self._match(TokenType.INTERVAL): 3084 return None 3085 3086 if self._match(TokenType.STRING, advance=False): 3087 this = self._parse_primary() 3088 else: 3089 this = self._parse_term() 3090 3091 if not this: 3092 self._retreat(index) 3093 return None 3094 3095 unit = self._parse_function() or self._parse_var() 3096 3097 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3098 # each INTERVAL expression into this canonical form so it's easy to transpile 3099 if this and this.is_number: 3100 this = exp.Literal.string(this.name) 3101 elif this and this.is_string: 3102 parts = this.name.split() 3103 3104 if len(parts) == 2: 3105 if unit: 3106 # this is not actually a unit, it's something else 3107 unit = None 3108 self._retreat(self._index - 1) 3109 else: 3110 this = exp.Literal.string(parts[0]) 3111 unit = self.expression(exp.Var, this=parts[1]) 3112 3113 return self.expression(exp.Interval, this=this, unit=unit) 3114 3115 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3116 this = self._parse_term() 3117 3118 while True: 3119 if self._match_set(self.BITWISE): 3120 this = self.expression( 3121 self.BITWISE[self._prev.token_type], 3122 this=this, 3123 expression=self._parse_term(), 3124 ) 3125 elif self._match(TokenType.DQMARK): 3126 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3127 elif self._match_pair(TokenType.LT, TokenType.LT): 3128 this = self.expression( 3129 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3130 ) 3131 elif self._match_pair(TokenType.GT, TokenType.GT): 3132 this = self.expression( 3133 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3134 ) 3135 else: 3136 break 3137 3138 return this 3139 3140 def _parse_term(self) -> t.Optional[exp.Expression]: 3141 return self._parse_tokens(self._parse_factor, self.TERM) 3142 3143 def _parse_factor(self) -> t.Optional[exp.Expression]: 3144 return self._parse_tokens(self._parse_unary, self.FACTOR) 3145 3146 def _parse_unary(self) -> t.Optional[exp.Expression]: 3147 if self._match_set(self.UNARY_PARSERS): 3148 return self.UNARY_PARSERS[self._prev.token_type](self) 3149 return self._parse_at_time_zone(self._parse_type()) 3150 3151 def _parse_type(self) -> t.Optional[exp.Expression]: 3152 interval = self._parse_interval() 3153 if interval: 3154 return interval 3155 3156 index = self._index 3157 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3158 this = self._parse_column() 3159 3160 if data_type: 3161 if isinstance(this, exp.Literal): 3162 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3163 if parser: 3164 return parser(self, this, data_type) 3165 return self.expression(exp.Cast, this=this, to=data_type) 3166 if not data_type.expressions: 3167 self._retreat(index) 3168 return self._parse_column() 3169 return self._parse_column_ops(data_type) 3170 3171 return this 3172 3173 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3174 this = self._parse_type() 3175 if not this: 3176 return None 3177 3178 return self.expression( 3179 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3180 ) 3181 3182 def _parse_types( 3183 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3184 ) -> t.Optional[exp.Expression]: 3185 index = self._index 3186 3187 prefix = self._match_text_seq("SYSUDTLIB", ".") 3188 3189 if not self._match_set(self.TYPE_TOKENS): 3190 identifier = allow_identifiers and self._parse_id_var( 3191 any_token=False, tokens=(TokenType.VAR,) 3192 ) 3193 3194 if identifier: 3195 tokens = self._tokenizer.tokenize(identifier.name) 3196 3197 if len(tokens) != 1: 3198 self.raise_error("Unexpected identifier", self._prev) 3199 3200 if tokens[0].token_type in self.TYPE_TOKENS: 3201 self._prev = tokens[0] 3202 elif self.SUPPORTS_USER_DEFINED_TYPES: 3203 return identifier 3204 else: 3205 return None 3206 else: 3207 return None 3208 3209 type_token = self._prev.token_type 3210 3211 if type_token == TokenType.PSEUDO_TYPE: 3212 return self.expression(exp.PseudoType, this=self._prev.text) 3213 3214 nested = type_token in self.NESTED_TYPE_TOKENS 3215 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3216 expressions = None 3217 maybe_func = False 3218 3219 if self._match(TokenType.L_PAREN): 3220 if is_struct: 3221 expressions = self._parse_csv(self._parse_struct_types) 3222 elif nested: 3223 expressions = self._parse_csv( 3224 lambda: self._parse_types( 3225 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3226 ) 3227 ) 3228 elif type_token in self.ENUM_TYPE_TOKENS: 3229 expressions = self._parse_csv(self._parse_equality) 3230 else: 3231 expressions = self._parse_csv(self._parse_type_size) 3232 3233 if not expressions or not self._match(TokenType.R_PAREN): 3234 self._retreat(index) 3235 return None 3236 3237 maybe_func = True 3238 3239 this: t.Optional[exp.Expression] = None 3240 values: t.Optional[t.List[exp.Expression]] = None 3241 3242 if nested and self._match(TokenType.LT): 3243 if is_struct: 3244 expressions = self._parse_csv(self._parse_struct_types) 3245 else: 3246 expressions = self._parse_csv( 3247 lambda: self._parse_types( 3248 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3249 ) 3250 ) 3251 3252 if not self._match(TokenType.GT): 3253 self.raise_error("Expecting >") 3254 3255 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3256 values = self._parse_csv(self._parse_conjunction) 3257 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3258 3259 if type_token in self.TIMESTAMPS: 3260 if self._match_text_seq("WITH", "TIME", "ZONE"): 3261 maybe_func = False 3262 tz_type = ( 3263 exp.DataType.Type.TIMETZ 3264 if type_token in self.TIMES 3265 else exp.DataType.Type.TIMESTAMPTZ 3266 ) 3267 this = exp.DataType(this=tz_type, expressions=expressions) 3268 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3269 maybe_func = False 3270 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3271 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3272 maybe_func = False 3273 elif type_token == TokenType.INTERVAL: 3274 if self._match_text_seq("YEAR", "TO", "MONTH"): 3275 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3276 elif self._match_text_seq("DAY", "TO", "SECOND"): 3277 span = [exp.IntervalDayToSecondSpan()] 3278 else: 3279 span = None 3280 3281 unit = not span and self._parse_var() 3282 if not unit: 3283 this = self.expression( 3284 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3285 ) 3286 else: 3287 this = self.expression(exp.Interval, unit=unit) 3288 3289 if maybe_func and check_func: 3290 index2 = self._index 3291 peek = self._parse_string() 3292 3293 if not peek: 3294 self._retreat(index) 3295 return None 3296 3297 self._retreat(index2) 3298 3299 if not this: 3300 this = exp.DataType( 3301 this=exp.DataType.Type[type_token.value], 3302 expressions=expressions, 3303 nested=nested, 3304 values=values, 3305 prefix=prefix, 3306 ) 3307 3308 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3309 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3310 3311 return this 3312 3313 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3314 this = self._parse_type() or self._parse_id_var() 3315 self._match(TokenType.COLON) 3316 return self._parse_column_def(this) 3317 3318 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3319 if not self._match_text_seq("AT", "TIME", "ZONE"): 3320 return this 3321 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3322 3323 def _parse_column(self) -> t.Optional[exp.Expression]: 3324 this = self._parse_field() 3325 if isinstance(this, exp.Identifier): 3326 this = self.expression(exp.Column, this=this) 3327 elif not this: 3328 return self._parse_bracket(this) 3329 return self._parse_column_ops(this) 3330 3331 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3332 this = self._parse_bracket(this) 3333 3334 while self._match_set(self.COLUMN_OPERATORS): 3335 op_token = self._prev.token_type 3336 op = self.COLUMN_OPERATORS.get(op_token) 3337 3338 if op_token == TokenType.DCOLON: 3339 field = self._parse_types() 3340 if not field: 3341 self.raise_error("Expected type") 3342 elif op and self._curr: 3343 self._advance() 3344 value = self._prev.text 3345 field = ( 3346 exp.Literal.number(value) 3347 if self._prev.token_type == TokenType.NUMBER 3348 else exp.Literal.string(value) 3349 ) 3350 else: 3351 field = self._parse_field(anonymous_func=True, any_token=True) 3352 3353 if isinstance(field, exp.Func): 3354 # bigquery allows function calls like x.y.count(...) 3355 # SAFE.SUBSTR(...) 3356 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3357 this = self._replace_columns_with_dots(this) 3358 3359 if op: 3360 this = op(self, this, field) 3361 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3362 this = self.expression( 3363 exp.Column, 3364 this=field, 3365 table=this.this, 3366 db=this.args.get("table"), 3367 catalog=this.args.get("db"), 3368 ) 3369 else: 3370 this = self.expression(exp.Dot, this=this, expression=field) 3371 this = self._parse_bracket(this) 3372 return this 3373 3374 def _parse_primary(self) -> t.Optional[exp.Expression]: 3375 if self._match_set(self.PRIMARY_PARSERS): 3376 token_type = self._prev.token_type 3377 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3378 3379 if token_type == TokenType.STRING: 3380 expressions = [primary] 3381 while self._match(TokenType.STRING): 3382 expressions.append(exp.Literal.string(self._prev.text)) 3383 3384 if len(expressions) > 1: 3385 return self.expression(exp.Concat, expressions=expressions) 3386 3387 return primary 3388 3389 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3390 return exp.Literal.number(f"0.{self._prev.text}") 3391 3392 if self._match(TokenType.L_PAREN): 3393 comments = self._prev_comments 3394 query = self._parse_select() 3395 3396 if query: 3397 expressions = [query] 3398 else: 3399 expressions = self._parse_expressions() 3400 3401 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3402 3403 if isinstance(this, exp.Subqueryable): 3404 this = self._parse_set_operations( 3405 self._parse_subquery(this=this, parse_alias=False) 3406 ) 3407 elif len(expressions) > 1: 3408 this = self.expression(exp.Tuple, expressions=expressions) 3409 else: 3410 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3411 3412 if this: 3413 this.add_comments(comments) 3414 3415 self._match_r_paren(expression=this) 3416 return this 3417 3418 return None 3419 3420 def _parse_field( 3421 self, 3422 any_token: bool = False, 3423 tokens: t.Optional[t.Collection[TokenType]] = None, 3424 anonymous_func: bool = False, 3425 ) -> t.Optional[exp.Expression]: 3426 return ( 3427 self._parse_primary() 3428 or self._parse_function(anonymous=anonymous_func) 3429 or self._parse_id_var(any_token=any_token, tokens=tokens) 3430 ) 3431 3432 def _parse_function( 3433 self, 3434 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3435 anonymous: bool = False, 3436 optional_parens: bool = True, 3437 ) -> t.Optional[exp.Expression]: 3438 if not self._curr: 3439 return None 3440 3441 token_type = self._curr.token_type 3442 this = self._curr.text 3443 upper = this.upper() 3444 3445 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3446 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3447 self._advance() 3448 return parser(self) 3449 3450 if not self._next or self._next.token_type != TokenType.L_PAREN: 3451 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3452 self._advance() 3453 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3454 3455 return None 3456 3457 if token_type not in self.FUNC_TOKENS: 3458 return None 3459 3460 self._advance(2) 3461 3462 parser = self.FUNCTION_PARSERS.get(upper) 3463 if parser and not anonymous: 3464 this = parser(self) 3465 else: 3466 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3467 3468 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3469 this = self.expression(subquery_predicate, this=self._parse_select()) 3470 self._match_r_paren() 3471 return this 3472 3473 if functions is None: 3474 functions = self.FUNCTIONS 3475 3476 function = functions.get(upper) 3477 3478 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3479 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3480 3481 if function and not anonymous: 3482 func = self.validate_expression(function(args), args) 3483 if not self.NORMALIZE_FUNCTIONS: 3484 func.meta["name"] = this 3485 this = func 3486 else: 3487 this = self.expression(exp.Anonymous, this=this, expressions=args) 3488 3489 self._match_r_paren(this) 3490 return self._parse_window(this) 3491 3492 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3493 return self._parse_column_def(self._parse_id_var()) 3494 3495 def _parse_user_defined_function( 3496 self, kind: t.Optional[TokenType] = None 3497 ) -> t.Optional[exp.Expression]: 3498 this = self._parse_id_var() 3499 3500 while self._match(TokenType.DOT): 3501 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3502 3503 if not self._match(TokenType.L_PAREN): 3504 return this 3505 3506 expressions = self._parse_csv(self._parse_function_parameter) 3507 self._match_r_paren() 3508 return self.expression( 3509 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3510 ) 3511 3512 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3513 literal = self._parse_primary() 3514 if literal: 3515 return self.expression(exp.Introducer, this=token.text, expression=literal) 3516 3517 return self.expression(exp.Identifier, this=token.text) 3518 3519 def _parse_session_parameter(self) -> exp.SessionParameter: 3520 kind = None 3521 this = self._parse_id_var() or self._parse_primary() 3522 3523 if this and self._match(TokenType.DOT): 3524 kind = this.name 3525 this = self._parse_var() or self._parse_primary() 3526 3527 return self.expression(exp.SessionParameter, this=this, kind=kind) 3528 3529 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3530 index = self._index 3531 3532 if self._match(TokenType.L_PAREN): 3533 expressions = t.cast( 3534 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3535 ) 3536 3537 if not self._match(TokenType.R_PAREN): 3538 self._retreat(index) 3539 else: 3540 expressions = [self._parse_id_var()] 3541 3542 if self._match_set(self.LAMBDAS): 3543 return self.LAMBDAS[self._prev.token_type](self, expressions) 3544 3545 self._retreat(index) 3546 3547 this: t.Optional[exp.Expression] 3548 3549 if self._match(TokenType.DISTINCT): 3550 this = self.expression( 3551 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3552 ) 3553 else: 3554 this = self._parse_select_or_expression(alias=alias) 3555 3556 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3557 3558 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3559 index = self._index 3560 3561 if not self.errors: 3562 try: 3563 if self._parse_select(nested=True): 3564 return this 3565 except ParseError: 3566 pass 3567 finally: 3568 self.errors.clear() 3569 self._retreat(index) 3570 3571 if not self._match(TokenType.L_PAREN): 3572 return this 3573 3574 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3575 3576 self._match_r_paren() 3577 return self.expression(exp.Schema, this=this, expressions=args) 3578 3579 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3580 return self._parse_column_def(self._parse_field(any_token=True)) 3581 3582 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3583 # column defs are not really columns, they're identifiers 3584 if isinstance(this, exp.Column): 3585 this = this.this 3586 3587 kind = self._parse_types(schema=True) 3588 3589 if self._match_text_seq("FOR", "ORDINALITY"): 3590 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3591 3592 constraints: t.List[exp.Expression] = [] 3593 3594 if not kind and self._match(TokenType.ALIAS): 3595 constraints.append( 3596 self.expression( 3597 exp.ComputedColumnConstraint, 3598 this=self._parse_conjunction(), 3599 persisted=self._match_text_seq("PERSISTED"), 3600 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3601 ) 3602 ) 3603 3604 while True: 3605 constraint = self._parse_column_constraint() 3606 if not constraint: 3607 break 3608 constraints.append(constraint) 3609 3610 if not kind and not constraints: 3611 return this 3612 3613 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3614 3615 def _parse_auto_increment( 3616 self, 3617 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3618 start = None 3619 increment = None 3620 3621 if self._match(TokenType.L_PAREN, advance=False): 3622 args = self._parse_wrapped_csv(self._parse_bitwise) 3623 start = seq_get(args, 0) 3624 increment = seq_get(args, 1) 3625 elif self._match_text_seq("START"): 3626 start = self._parse_bitwise() 3627 self._match_text_seq("INCREMENT") 3628 increment = self._parse_bitwise() 3629 3630 if start and increment: 3631 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3632 3633 return exp.AutoIncrementColumnConstraint() 3634 3635 def _parse_compress(self) -> exp.CompressColumnConstraint: 3636 if self._match(TokenType.L_PAREN, advance=False): 3637 return self.expression( 3638 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3639 ) 3640 3641 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3642 3643 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3644 if self._match_text_seq("BY", "DEFAULT"): 3645 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3646 this = self.expression( 3647 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3648 ) 3649 else: 3650 self._match_text_seq("ALWAYS") 3651 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3652 3653 self._match(TokenType.ALIAS) 3654 identity = self._match_text_seq("IDENTITY") 3655 3656 if self._match(TokenType.L_PAREN): 3657 if self._match(TokenType.START_WITH): 3658 this.set("start", self._parse_bitwise()) 3659 if self._match_text_seq("INCREMENT", "BY"): 3660 this.set("increment", self._parse_bitwise()) 3661 if self._match_text_seq("MINVALUE"): 3662 this.set("minvalue", self._parse_bitwise()) 3663 if self._match_text_seq("MAXVALUE"): 3664 this.set("maxvalue", self._parse_bitwise()) 3665 3666 if self._match_text_seq("CYCLE"): 3667 this.set("cycle", True) 3668 elif self._match_text_seq("NO", "CYCLE"): 3669 this.set("cycle", False) 3670 3671 if not identity: 3672 this.set("expression", self._parse_bitwise()) 3673 3674 self._match_r_paren() 3675 3676 return this 3677 3678 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3679 self._match_text_seq("LENGTH") 3680 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3681 3682 def _parse_not_constraint( 3683 self, 3684 ) -> t.Optional[exp.Expression]: 3685 if self._match_text_seq("NULL"): 3686 return self.expression(exp.NotNullColumnConstraint) 3687 if self._match_text_seq("CASESPECIFIC"): 3688 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3689 if self._match_text_seq("FOR", "REPLICATION"): 3690 return self.expression(exp.NotForReplicationColumnConstraint) 3691 return None 3692 3693 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3694 if self._match(TokenType.CONSTRAINT): 3695 this = self._parse_id_var() 3696 else: 3697 this = None 3698 3699 if self._match_texts(self.CONSTRAINT_PARSERS): 3700 return self.expression( 3701 exp.ColumnConstraint, 3702 this=this, 3703 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3704 ) 3705 3706 return this 3707 3708 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3709 if not self._match(TokenType.CONSTRAINT): 3710 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3711 3712 this = self._parse_id_var() 3713 expressions = [] 3714 3715 while True: 3716 constraint = self._parse_unnamed_constraint() or self._parse_function() 3717 if not constraint: 3718 break 3719 expressions.append(constraint) 3720 3721 return self.expression(exp.Constraint, this=this, expressions=expressions) 3722 3723 def _parse_unnamed_constraint( 3724 self, constraints: t.Optional[t.Collection[str]] = None 3725 ) -> t.Optional[exp.Expression]: 3726 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3727 return None 3728 3729 constraint = self._prev.text.upper() 3730 if constraint not in self.CONSTRAINT_PARSERS: 3731 self.raise_error(f"No parser found for schema constraint {constraint}.") 3732 3733 return self.CONSTRAINT_PARSERS[constraint](self) 3734 3735 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3736 self._match_text_seq("KEY") 3737 return self.expression( 3738 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3739 ) 3740 3741 def _parse_key_constraint_options(self) -> t.List[str]: 3742 options = [] 3743 while True: 3744 if not self._curr: 3745 break 3746 3747 if self._match(TokenType.ON): 3748 action = None 3749 on = self._advance_any() and self._prev.text 3750 3751 if self._match_text_seq("NO", "ACTION"): 3752 action = "NO ACTION" 3753 elif self._match_text_seq("CASCADE"): 3754 action = "CASCADE" 3755 elif self._match_pair(TokenType.SET, TokenType.NULL): 3756 action = "SET NULL" 3757 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3758 action = "SET DEFAULT" 3759 else: 3760 self.raise_error("Invalid key constraint") 3761 3762 options.append(f"ON {on} {action}") 3763 elif self._match_text_seq("NOT", "ENFORCED"): 3764 options.append("NOT ENFORCED") 3765 elif self._match_text_seq("DEFERRABLE"): 3766 options.append("DEFERRABLE") 3767 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3768 options.append("INITIALLY DEFERRED") 3769 elif self._match_text_seq("NORELY"): 3770 options.append("NORELY") 3771 elif self._match_text_seq("MATCH", "FULL"): 3772 options.append("MATCH FULL") 3773 else: 3774 break 3775 3776 return options 3777 3778 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3779 if match and not self._match(TokenType.REFERENCES): 3780 return None 3781 3782 expressions = None 3783 this = self._parse_table(schema=True) 3784 options = self._parse_key_constraint_options() 3785 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3786 3787 def _parse_foreign_key(self) -> exp.ForeignKey: 3788 expressions = self._parse_wrapped_id_vars() 3789 reference = self._parse_references() 3790 options = {} 3791 3792 while self._match(TokenType.ON): 3793 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3794 self.raise_error("Expected DELETE or UPDATE") 3795 3796 kind = self._prev.text.lower() 3797 3798 if self._match_text_seq("NO", "ACTION"): 3799 action = "NO ACTION" 3800 elif self._match(TokenType.SET): 3801 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3802 action = "SET " + self._prev.text.upper() 3803 else: 3804 self._advance() 3805 action = self._prev.text.upper() 3806 3807 options[kind] = action 3808 3809 return self.expression( 3810 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3811 ) 3812 3813 def _parse_primary_key( 3814 self, wrapped_optional: bool = False, in_props: bool = False 3815 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3816 desc = ( 3817 self._match_set((TokenType.ASC, TokenType.DESC)) 3818 and self._prev.token_type == TokenType.DESC 3819 ) 3820 3821 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3822 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3823 3824 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3825 options = self._parse_key_constraint_options() 3826 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3827 3828 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3829 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3830 return this 3831 3832 bracket_kind = self._prev.token_type 3833 3834 if self._match(TokenType.COLON): 3835 expressions: t.List[exp.Expression] = [ 3836 self.expression(exp.Slice, expression=self._parse_conjunction()) 3837 ] 3838 else: 3839 expressions = self._parse_csv( 3840 lambda: self._parse_slice( 3841 self._parse_alias(self._parse_conjunction(), explicit=True) 3842 ) 3843 ) 3844 3845 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3846 if bracket_kind == TokenType.L_BRACE: 3847 this = self.expression(exp.Struct, expressions=expressions) 3848 elif not this or this.name.upper() == "ARRAY": 3849 this = self.expression(exp.Array, expressions=expressions) 3850 else: 3851 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3852 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3853 3854 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3855 self.raise_error("Expected ]") 3856 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3857 self.raise_error("Expected }") 3858 3859 self._add_comments(this) 3860 return self._parse_bracket(this) 3861 3862 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3863 if self._match(TokenType.COLON): 3864 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3865 return this 3866 3867 def _parse_case(self) -> t.Optional[exp.Expression]: 3868 ifs = [] 3869 default = None 3870 3871 comments = self._prev_comments 3872 expression = self._parse_conjunction() 3873 3874 while self._match(TokenType.WHEN): 3875 this = self._parse_conjunction() 3876 self._match(TokenType.THEN) 3877 then = self._parse_conjunction() 3878 ifs.append(self.expression(exp.If, this=this, true=then)) 3879 3880 if self._match(TokenType.ELSE): 3881 default = self._parse_conjunction() 3882 3883 if not self._match(TokenType.END): 3884 self.raise_error("Expected END after CASE", self._prev) 3885 3886 return self._parse_window( 3887 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3888 ) 3889 3890 def _parse_if(self) -> t.Optional[exp.Expression]: 3891 if self._match(TokenType.L_PAREN): 3892 args = self._parse_csv(self._parse_conjunction) 3893 this = self.validate_expression(exp.If.from_arg_list(args), args) 3894 self._match_r_paren() 3895 else: 3896 index = self._index - 1 3897 condition = self._parse_conjunction() 3898 3899 if not condition: 3900 self._retreat(index) 3901 return None 3902 3903 self._match(TokenType.THEN) 3904 true = self._parse_conjunction() 3905 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3906 self._match(TokenType.END) 3907 this = self.expression(exp.If, this=condition, true=true, false=false) 3908 3909 return self._parse_window(this) 3910 3911 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3912 if not self._match_text_seq("VALUE", "FOR"): 3913 self._retreat(self._index - 1) 3914 return None 3915 3916 return self.expression( 3917 exp.NextValueFor, 3918 this=self._parse_column(), 3919 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3920 ) 3921 3922 def _parse_extract(self) -> exp.Extract: 3923 this = self._parse_function() or self._parse_var() or self._parse_type() 3924 3925 if self._match(TokenType.FROM): 3926 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3927 3928 if not self._match(TokenType.COMMA): 3929 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3930 3931 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3932 3933 def _parse_any_value(self) -> exp.AnyValue: 3934 this = self._parse_lambda() 3935 is_max = None 3936 having = None 3937 3938 if self._match(TokenType.HAVING): 3939 self._match_texts(("MAX", "MIN")) 3940 is_max = self._prev.text == "MAX" 3941 having = self._parse_column() 3942 3943 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3944 3945 def _parse_cast(self, strict: bool) -> exp.Expression: 3946 this = self._parse_conjunction() 3947 3948 if not self._match(TokenType.ALIAS): 3949 if self._match(TokenType.COMMA): 3950 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3951 3952 self.raise_error("Expected AS after CAST") 3953 3954 fmt = None 3955 to = self._parse_types() 3956 3957 if not to: 3958 self.raise_error("Expected TYPE after CAST") 3959 elif isinstance(to, exp.Identifier): 3960 to = exp.DataType.build(to.name, udt=True) 3961 elif to.this == exp.DataType.Type.CHAR: 3962 if self._match(TokenType.CHARACTER_SET): 3963 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3964 elif self._match(TokenType.FORMAT): 3965 fmt_string = self._parse_string() 3966 fmt = self._parse_at_time_zone(fmt_string) 3967 3968 if to.this in exp.DataType.TEMPORAL_TYPES: 3969 this = self.expression( 3970 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3971 this=this, 3972 format=exp.Literal.string( 3973 format_time( 3974 fmt_string.this if fmt_string else "", 3975 self.FORMAT_MAPPING or self.TIME_MAPPING, 3976 self.FORMAT_TRIE or self.TIME_TRIE, 3977 ) 3978 ), 3979 ) 3980 3981 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3982 this.set("zone", fmt.args["zone"]) 3983 3984 return this 3985 3986 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3987 3988 def _parse_concat(self) -> t.Optional[exp.Expression]: 3989 args = self._parse_csv(self._parse_conjunction) 3990 if self.CONCAT_NULL_OUTPUTS_STRING: 3991 args = [ 3992 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3993 for arg in args 3994 if arg 3995 ] 3996 3997 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3998 # we find such a call we replace it with its argument. 3999 if len(args) == 1: 4000 return args[0] 4001 4002 return self.expression( 4003 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4004 ) 4005 4006 def _parse_string_agg(self) -> exp.Expression: 4007 if self._match(TokenType.DISTINCT): 4008 args: t.List[t.Optional[exp.Expression]] = [ 4009 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4010 ] 4011 if self._match(TokenType.COMMA): 4012 args.extend(self._parse_csv(self._parse_conjunction)) 4013 else: 4014 args = self._parse_csv(self._parse_conjunction) # type: ignore 4015 4016 index = self._index 4017 if not self._match(TokenType.R_PAREN) and args: 4018 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4019 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4020 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4021 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4022 4023 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4024 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4025 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4026 if not self._match_text_seq("WITHIN", "GROUP"): 4027 self._retreat(index) 4028 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4029 4030 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4031 order = self._parse_order(this=seq_get(args, 0)) 4032 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4033 4034 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4035 this = self._parse_bitwise() 4036 4037 if self._match(TokenType.USING): 4038 to: t.Optional[exp.Expression] = self.expression( 4039 exp.CharacterSet, this=self._parse_var() 4040 ) 4041 elif self._match(TokenType.COMMA): 4042 to = self._parse_types() 4043 else: 4044 to = None 4045 4046 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4047 4048 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4049 """ 4050 There are generally two variants of the DECODE function: 4051 4052 - DECODE(bin, charset) 4053 - DECODE(expression, search, result [, search, result] ... [, default]) 4054 4055 The second variant will always be parsed into a CASE expression. Note that NULL 4056 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4057 instead of relying on pattern matching. 4058 """ 4059 args = self._parse_csv(self._parse_conjunction) 4060 4061 if len(args) < 3: 4062 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4063 4064 expression, *expressions = args 4065 if not expression: 4066 return None 4067 4068 ifs = [] 4069 for search, result in zip(expressions[::2], expressions[1::2]): 4070 if not search or not result: 4071 return None 4072 4073 if isinstance(search, exp.Literal): 4074 ifs.append( 4075 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4076 ) 4077 elif isinstance(search, exp.Null): 4078 ifs.append( 4079 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4080 ) 4081 else: 4082 cond = exp.or_( 4083 exp.EQ(this=expression.copy(), expression=search), 4084 exp.and_( 4085 exp.Is(this=expression.copy(), expression=exp.Null()), 4086 exp.Is(this=search.copy(), expression=exp.Null()), 4087 copy=False, 4088 ), 4089 copy=False, 4090 ) 4091 ifs.append(exp.If(this=cond, true=result)) 4092 4093 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4094 4095 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4096 self._match_text_seq("KEY") 4097 key = self._parse_field() 4098 self._match(TokenType.COLON) 4099 self._match_text_seq("VALUE") 4100 value = self._parse_field() 4101 4102 if not key and not value: 4103 return None 4104 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4105 4106 def _parse_json_object(self) -> exp.JSONObject: 4107 star = self._parse_star() 4108 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4109 4110 null_handling = None 4111 if self._match_text_seq("NULL", "ON", "NULL"): 4112 null_handling = "NULL ON NULL" 4113 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4114 null_handling = "ABSENT ON NULL" 4115 4116 unique_keys = None 4117 if self._match_text_seq("WITH", "UNIQUE"): 4118 unique_keys = True 4119 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4120 unique_keys = False 4121 4122 self._match_text_seq("KEYS") 4123 4124 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4125 format_json = self._match_text_seq("FORMAT", "JSON") 4126 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4127 4128 return self.expression( 4129 exp.JSONObject, 4130 expressions=expressions, 4131 null_handling=null_handling, 4132 unique_keys=unique_keys, 4133 return_type=return_type, 4134 format_json=format_json, 4135 encoding=encoding, 4136 ) 4137 4138 def _parse_logarithm(self) -> exp.Func: 4139 # Default argument order is base, expression 4140 args = self._parse_csv(self._parse_range) 4141 4142 if len(args) > 1: 4143 if not self.LOG_BASE_FIRST: 4144 args.reverse() 4145 return exp.Log.from_arg_list(args) 4146 4147 return self.expression( 4148 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4149 ) 4150 4151 def _parse_match_against(self) -> exp.MatchAgainst: 4152 expressions = self._parse_csv(self._parse_column) 4153 4154 self._match_text_seq(")", "AGAINST", "(") 4155 4156 this = self._parse_string() 4157 4158 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4159 modifier = "IN NATURAL LANGUAGE MODE" 4160 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4161 modifier = f"{modifier} WITH QUERY EXPANSION" 4162 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4163 modifier = "IN BOOLEAN MODE" 4164 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4165 modifier = "WITH QUERY EXPANSION" 4166 else: 4167 modifier = None 4168 4169 return self.expression( 4170 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4171 ) 4172 4173 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4174 def _parse_open_json(self) -> exp.OpenJSON: 4175 this = self._parse_bitwise() 4176 path = self._match(TokenType.COMMA) and self._parse_string() 4177 4178 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4179 this = self._parse_field(any_token=True) 4180 kind = self._parse_types() 4181 path = self._parse_string() 4182 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4183 4184 return self.expression( 4185 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4186 ) 4187 4188 expressions = None 4189 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4190 self._match_l_paren() 4191 expressions = self._parse_csv(_parse_open_json_column_def) 4192 4193 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4194 4195 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4196 args = self._parse_csv(self._parse_bitwise) 4197 4198 if self._match(TokenType.IN): 4199 return self.expression( 4200 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4201 ) 4202 4203 if haystack_first: 4204 haystack = seq_get(args, 0) 4205 needle = seq_get(args, 1) 4206 else: 4207 needle = seq_get(args, 0) 4208 haystack = seq_get(args, 1) 4209 4210 return self.expression( 4211 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4212 ) 4213 4214 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4215 args = self._parse_csv(self._parse_table) 4216 return exp.JoinHint(this=func_name.upper(), expressions=args) 4217 4218 def _parse_substring(self) -> exp.Substring: 4219 # Postgres supports the form: substring(string [from int] [for int]) 4220 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4221 4222 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4223 4224 if self._match(TokenType.FROM): 4225 args.append(self._parse_bitwise()) 4226 if self._match(TokenType.FOR): 4227 args.append(self._parse_bitwise()) 4228 4229 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4230 4231 def _parse_trim(self) -> exp.Trim: 4232 # https://www.w3resource.com/sql/character-functions/trim.php 4233 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4234 4235 position = None 4236 collation = None 4237 4238 if self._match_texts(self.TRIM_TYPES): 4239 position = self._prev.text.upper() 4240 4241 expression = self._parse_bitwise() 4242 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4243 this = self._parse_bitwise() 4244 else: 4245 this = expression 4246 expression = None 4247 4248 if self._match(TokenType.COLLATE): 4249 collation = self._parse_bitwise() 4250 4251 return self.expression( 4252 exp.Trim, this=this, position=position, expression=expression, collation=collation 4253 ) 4254 4255 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4256 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4257 4258 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4259 return self._parse_window(self._parse_id_var(), alias=True) 4260 4261 def _parse_respect_or_ignore_nulls( 4262 self, this: t.Optional[exp.Expression] 4263 ) -> t.Optional[exp.Expression]: 4264 if self._match_text_seq("IGNORE", "NULLS"): 4265 return self.expression(exp.IgnoreNulls, this=this) 4266 if self._match_text_seq("RESPECT", "NULLS"): 4267 return self.expression(exp.RespectNulls, this=this) 4268 return this 4269 4270 def _parse_window( 4271 self, this: t.Optional[exp.Expression], alias: bool = False 4272 ) -> t.Optional[exp.Expression]: 4273 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4274 self._match(TokenType.WHERE) 4275 this = self.expression( 4276 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4277 ) 4278 self._match_r_paren() 4279 4280 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4281 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4282 if self._match_text_seq("WITHIN", "GROUP"): 4283 order = self._parse_wrapped(self._parse_order) 4284 this = self.expression(exp.WithinGroup, this=this, expression=order) 4285 4286 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4287 # Some dialects choose to implement and some do not. 4288 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4289 4290 # There is some code above in _parse_lambda that handles 4291 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4292 4293 # The below changes handle 4294 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4295 4296 # Oracle allows both formats 4297 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4298 # and Snowflake chose to do the same for familiarity 4299 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4300 this = self._parse_respect_or_ignore_nulls(this) 4301 4302 # bigquery select from window x AS (partition by ...) 4303 if alias: 4304 over = None 4305 self._match(TokenType.ALIAS) 4306 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4307 return this 4308 else: 4309 over = self._prev.text.upper() 4310 4311 if not self._match(TokenType.L_PAREN): 4312 return self.expression( 4313 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4314 ) 4315 4316 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4317 4318 first = self._match(TokenType.FIRST) 4319 if self._match_text_seq("LAST"): 4320 first = False 4321 4322 partition, order = self._parse_partition_and_order() 4323 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4324 4325 if kind: 4326 self._match(TokenType.BETWEEN) 4327 start = self._parse_window_spec() 4328 self._match(TokenType.AND) 4329 end = self._parse_window_spec() 4330 4331 spec = self.expression( 4332 exp.WindowSpec, 4333 kind=kind, 4334 start=start["value"], 4335 start_side=start["side"], 4336 end=end["value"], 4337 end_side=end["side"], 4338 ) 4339 else: 4340 spec = None 4341 4342 self._match_r_paren() 4343 4344 window = self.expression( 4345 exp.Window, 4346 this=this, 4347 partition_by=partition, 4348 order=order, 4349 spec=spec, 4350 alias=window_alias, 4351 over=over, 4352 first=first, 4353 ) 4354 4355 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4356 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4357 return self._parse_window(window, alias=alias) 4358 4359 return window 4360 4361 def _parse_partition_and_order( 4362 self, 4363 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4364 return self._parse_partition_by(), self._parse_order() 4365 4366 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4367 self._match(TokenType.BETWEEN) 4368 4369 return { 4370 "value": ( 4371 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4372 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4373 or self._parse_bitwise() 4374 ), 4375 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4376 } 4377 4378 def _parse_alias( 4379 self, this: t.Optional[exp.Expression], explicit: bool = False 4380 ) -> t.Optional[exp.Expression]: 4381 any_token = self._match(TokenType.ALIAS) 4382 4383 if explicit and not any_token: 4384 return this 4385 4386 if self._match(TokenType.L_PAREN): 4387 aliases = self.expression( 4388 exp.Aliases, 4389 this=this, 4390 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4391 ) 4392 self._match_r_paren(aliases) 4393 return aliases 4394 4395 alias = self._parse_id_var(any_token) 4396 4397 if alias: 4398 return self.expression(exp.Alias, this=this, alias=alias) 4399 4400 return this 4401 4402 def _parse_id_var( 4403 self, 4404 any_token: bool = True, 4405 tokens: t.Optional[t.Collection[TokenType]] = None, 4406 ) -> t.Optional[exp.Expression]: 4407 identifier = self._parse_identifier() 4408 4409 if identifier: 4410 return identifier 4411 4412 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4413 quoted = self._prev.token_type == TokenType.STRING 4414 return exp.Identifier(this=self._prev.text, quoted=quoted) 4415 4416 return None 4417 4418 def _parse_string(self) -> t.Optional[exp.Expression]: 4419 if self._match(TokenType.STRING): 4420 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4421 return self._parse_placeholder() 4422 4423 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4424 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4425 4426 def _parse_number(self) -> t.Optional[exp.Expression]: 4427 if self._match(TokenType.NUMBER): 4428 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4429 return self._parse_placeholder() 4430 4431 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4432 if self._match(TokenType.IDENTIFIER): 4433 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4434 return self._parse_placeholder() 4435 4436 def _parse_var( 4437 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4438 ) -> t.Optional[exp.Expression]: 4439 if ( 4440 (any_token and self._advance_any()) 4441 or self._match(TokenType.VAR) 4442 or (self._match_set(tokens) if tokens else False) 4443 ): 4444 return self.expression(exp.Var, this=self._prev.text) 4445 return self._parse_placeholder() 4446 4447 def _advance_any(self) -> t.Optional[Token]: 4448 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4449 self._advance() 4450 return self._prev 4451 return None 4452 4453 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4454 return self._parse_var() or self._parse_string() 4455 4456 def _parse_null(self) -> t.Optional[exp.Expression]: 4457 if self._match(TokenType.NULL): 4458 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4459 return self._parse_placeholder() 4460 4461 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4462 if self._match(TokenType.TRUE): 4463 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4464 if self._match(TokenType.FALSE): 4465 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4466 return self._parse_placeholder() 4467 4468 def _parse_star(self) -> t.Optional[exp.Expression]: 4469 if self._match(TokenType.STAR): 4470 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4471 return self._parse_placeholder() 4472 4473 def _parse_parameter(self) -> exp.Parameter: 4474 wrapped = self._match(TokenType.L_BRACE) 4475 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4476 self._match(TokenType.R_BRACE) 4477 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4478 4479 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4480 if self._match_set(self.PLACEHOLDER_PARSERS): 4481 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4482 if placeholder: 4483 return placeholder 4484 self._advance(-1) 4485 return None 4486 4487 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4488 if not self._match(TokenType.EXCEPT): 4489 return None 4490 if self._match(TokenType.L_PAREN, advance=False): 4491 return self._parse_wrapped_csv(self._parse_column) 4492 return self._parse_csv(self._parse_column) 4493 4494 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4495 if not self._match(TokenType.REPLACE): 4496 return None 4497 if self._match(TokenType.L_PAREN, advance=False): 4498 return self._parse_wrapped_csv(self._parse_expression) 4499 return self._parse_expressions() 4500 4501 def _parse_csv( 4502 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4503 ) -> t.List[exp.Expression]: 4504 parse_result = parse_method() 4505 items = [parse_result] if parse_result is not None else [] 4506 4507 while self._match(sep): 4508 self._add_comments(parse_result) 4509 parse_result = parse_method() 4510 if parse_result is not None: 4511 items.append(parse_result) 4512 4513 return items 4514 4515 def _parse_tokens( 4516 self, parse_method: t.Callable, expressions: t.Dict 4517 ) -> t.Optional[exp.Expression]: 4518 this = parse_method() 4519 4520 while self._match_set(expressions): 4521 this = self.expression( 4522 expressions[self._prev.token_type], 4523 this=this, 4524 comments=self._prev_comments, 4525 expression=parse_method(), 4526 ) 4527 4528 return this 4529 4530 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4531 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4532 4533 def _parse_wrapped_csv( 4534 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4535 ) -> t.List[exp.Expression]: 4536 return self._parse_wrapped( 4537 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4538 ) 4539 4540 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4541 wrapped = self._match(TokenType.L_PAREN) 4542 if not wrapped and not optional: 4543 self.raise_error("Expecting (") 4544 parse_result = parse_method() 4545 if wrapped: 4546 self._match_r_paren() 4547 return parse_result 4548 4549 def _parse_expressions(self) -> t.List[exp.Expression]: 4550 return self._parse_csv(self._parse_expression) 4551 4552 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4553 return self._parse_select() or self._parse_set_operations( 4554 self._parse_expression() if alias else self._parse_conjunction() 4555 ) 4556 4557 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4558 return self._parse_query_modifiers( 4559 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4560 ) 4561 4562 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4563 this = None 4564 if self._match_texts(self.TRANSACTION_KIND): 4565 this = self._prev.text 4566 4567 self._match_texts({"TRANSACTION", "WORK"}) 4568 4569 modes = [] 4570 while True: 4571 mode = [] 4572 while self._match(TokenType.VAR): 4573 mode.append(self._prev.text) 4574 4575 if mode: 4576 modes.append(" ".join(mode)) 4577 if not self._match(TokenType.COMMA): 4578 break 4579 4580 return self.expression(exp.Transaction, this=this, modes=modes) 4581 4582 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4583 chain = None 4584 savepoint = None 4585 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4586 4587 self._match_texts({"TRANSACTION", "WORK"}) 4588 4589 if self._match_text_seq("TO"): 4590 self._match_text_seq("SAVEPOINT") 4591 savepoint = self._parse_id_var() 4592 4593 if self._match(TokenType.AND): 4594 chain = not self._match_text_seq("NO") 4595 self._match_text_seq("CHAIN") 4596 4597 if is_rollback: 4598 return self.expression(exp.Rollback, savepoint=savepoint) 4599 4600 return self.expression(exp.Commit, chain=chain) 4601 4602 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4603 if not self._match_text_seq("ADD"): 4604 return None 4605 4606 self._match(TokenType.COLUMN) 4607 exists_column = self._parse_exists(not_=True) 4608 expression = self._parse_field_def() 4609 4610 if expression: 4611 expression.set("exists", exists_column) 4612 4613 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4614 if self._match_texts(("FIRST", "AFTER")): 4615 position = self._prev.text 4616 column_position = self.expression( 4617 exp.ColumnPosition, this=self._parse_column(), position=position 4618 ) 4619 expression.set("position", column_position) 4620 4621 return expression 4622 4623 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4624 drop = self._match(TokenType.DROP) and self._parse_drop() 4625 if drop and not isinstance(drop, exp.Command): 4626 drop.set("kind", drop.args.get("kind", "COLUMN")) 4627 return drop 4628 4629 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4630 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4631 return self.expression( 4632 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4633 ) 4634 4635 def _parse_add_constraint(self) -> exp.AddConstraint: 4636 this = None 4637 kind = self._prev.token_type 4638 4639 if kind == TokenType.CONSTRAINT: 4640 this = self._parse_id_var() 4641 4642 if self._match_text_seq("CHECK"): 4643 expression = self._parse_wrapped(self._parse_conjunction) 4644 enforced = self._match_text_seq("ENFORCED") 4645 4646 return self.expression( 4647 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4648 ) 4649 4650 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4651 expression = self._parse_foreign_key() 4652 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4653 expression = self._parse_primary_key() 4654 else: 4655 expression = None 4656 4657 return self.expression(exp.AddConstraint, this=this, expression=expression) 4658 4659 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4660 index = self._index - 1 4661 4662 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4663 return self._parse_csv(self._parse_add_constraint) 4664 4665 self._retreat(index) 4666 return self._parse_csv(self._parse_add_column) 4667 4668 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4669 self._match(TokenType.COLUMN) 4670 column = self._parse_field(any_token=True) 4671 4672 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4673 return self.expression(exp.AlterColumn, this=column, drop=True) 4674 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4675 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4676 4677 self._match_text_seq("SET", "DATA") 4678 return self.expression( 4679 exp.AlterColumn, 4680 this=column, 4681 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4682 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4683 using=self._match(TokenType.USING) and self._parse_conjunction(), 4684 ) 4685 4686 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4687 index = self._index - 1 4688 4689 partition_exists = self._parse_exists() 4690 if self._match(TokenType.PARTITION, advance=False): 4691 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4692 4693 self._retreat(index) 4694 return self._parse_csv(self._parse_drop_column) 4695 4696 def _parse_alter_table_rename(self) -> exp.RenameTable: 4697 self._match_text_seq("TO") 4698 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4699 4700 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4701 start = self._prev 4702 4703 if not self._match(TokenType.TABLE): 4704 return self._parse_as_command(start) 4705 4706 exists = self._parse_exists() 4707 this = self._parse_table(schema=True) 4708 4709 if self._next: 4710 self._advance() 4711 4712 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4713 if parser: 4714 actions = ensure_list(parser(self)) 4715 4716 if not self._curr: 4717 return self.expression( 4718 exp.AlterTable, 4719 this=this, 4720 exists=exists, 4721 actions=actions, 4722 ) 4723 return self._parse_as_command(start) 4724 4725 def _parse_merge(self) -> exp.Merge: 4726 self._match(TokenType.INTO) 4727 target = self._parse_table() 4728 4729 if target and self._match(TokenType.ALIAS, advance=False): 4730 target.set("alias", self._parse_table_alias()) 4731 4732 self._match(TokenType.USING) 4733 using = self._parse_table() 4734 4735 self._match(TokenType.ON) 4736 on = self._parse_conjunction() 4737 4738 whens = [] 4739 while self._match(TokenType.WHEN): 4740 matched = not self._match(TokenType.NOT) 4741 self._match_text_seq("MATCHED") 4742 source = ( 4743 False 4744 if self._match_text_seq("BY", "TARGET") 4745 else self._match_text_seq("BY", "SOURCE") 4746 ) 4747 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4748 4749 self._match(TokenType.THEN) 4750 4751 if self._match(TokenType.INSERT): 4752 _this = self._parse_star() 4753 if _this: 4754 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4755 else: 4756 then = self.expression( 4757 exp.Insert, 4758 this=self._parse_value(), 4759 expression=self._match(TokenType.VALUES) and self._parse_value(), 4760 ) 4761 elif self._match(TokenType.UPDATE): 4762 expressions = self._parse_star() 4763 if expressions: 4764 then = self.expression(exp.Update, expressions=expressions) 4765 else: 4766 then = self.expression( 4767 exp.Update, 4768 expressions=self._match(TokenType.SET) 4769 and self._parse_csv(self._parse_equality), 4770 ) 4771 elif self._match(TokenType.DELETE): 4772 then = self.expression(exp.Var, this=self._prev.text) 4773 else: 4774 then = None 4775 4776 whens.append( 4777 self.expression( 4778 exp.When, 4779 matched=matched, 4780 source=source, 4781 condition=condition, 4782 then=then, 4783 ) 4784 ) 4785 4786 return self.expression( 4787 exp.Merge, 4788 this=target, 4789 using=using, 4790 on=on, 4791 expressions=whens, 4792 ) 4793 4794 def _parse_show(self) -> t.Optional[exp.Expression]: 4795 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4796 if parser: 4797 return parser(self) 4798 self._advance() 4799 return self.expression(exp.Show, this=self._prev.text.upper()) 4800 4801 def _parse_set_item_assignment( 4802 self, kind: t.Optional[str] = None 4803 ) -> t.Optional[exp.Expression]: 4804 index = self._index 4805 4806 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4807 return self._parse_set_transaction(global_=kind == "GLOBAL") 4808 4809 left = self._parse_primary() or self._parse_id_var() 4810 4811 if not self._match_texts(("=", "TO")): 4812 self._retreat(index) 4813 return None 4814 4815 right = self._parse_statement() or self._parse_id_var() 4816 this = self.expression(exp.EQ, this=left, expression=right) 4817 4818 return self.expression(exp.SetItem, this=this, kind=kind) 4819 4820 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4821 self._match_text_seq("TRANSACTION") 4822 characteristics = self._parse_csv( 4823 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4824 ) 4825 return self.expression( 4826 exp.SetItem, 4827 expressions=characteristics, 4828 kind="TRANSACTION", 4829 **{"global": global_}, # type: ignore 4830 ) 4831 4832 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4833 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4834 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4835 4836 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4837 index = self._index 4838 set_ = self.expression( 4839 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4840 ) 4841 4842 if self._curr: 4843 self._retreat(index) 4844 return self._parse_as_command(self._prev) 4845 4846 return set_ 4847 4848 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4849 for option in options: 4850 if self._match_text_seq(*option.split(" ")): 4851 return exp.var(option) 4852 return None 4853 4854 def _parse_as_command(self, start: Token) -> exp.Command: 4855 while self._curr: 4856 self._advance() 4857 text = self._find_sql(start, self._prev) 4858 size = len(start.text) 4859 return exp.Command(this=text[:size], expression=text[size:]) 4860 4861 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4862 settings = [] 4863 4864 self._match_l_paren() 4865 kind = self._parse_id_var() 4866 4867 if self._match(TokenType.L_PAREN): 4868 while True: 4869 key = self._parse_id_var() 4870 value = self._parse_primary() 4871 4872 if not key and value is None: 4873 break 4874 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4875 self._match(TokenType.R_PAREN) 4876 4877 self._match_r_paren() 4878 4879 return self.expression( 4880 exp.DictProperty, 4881 this=this, 4882 kind=kind.this if kind else None, 4883 settings=settings, 4884 ) 4885 4886 def _parse_dict_range(self, this: str) -> exp.DictRange: 4887 self._match_l_paren() 4888 has_min = self._match_text_seq("MIN") 4889 if has_min: 4890 min = self._parse_var() or self._parse_primary() 4891 self._match_text_seq("MAX") 4892 max = self._parse_var() or self._parse_primary() 4893 else: 4894 max = self._parse_var() or self._parse_primary() 4895 min = exp.Literal.number(0) 4896 self._match_r_paren() 4897 return self.expression(exp.DictRange, this=this, min=min, max=max) 4898 4899 def _find_parser( 4900 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4901 ) -> t.Optional[t.Callable]: 4902 if not self._curr: 4903 return None 4904 4905 index = self._index 4906 this = [] 4907 while True: 4908 # The current token might be multiple words 4909 curr = self._curr.text.upper() 4910 key = curr.split(" ") 4911 this.append(curr) 4912 4913 self._advance() 4914 result, trie = in_trie(trie, key) 4915 if result == TrieResult.FAILED: 4916 break 4917 4918 if result == TrieResult.EXISTS: 4919 subparser = parsers[" ".join(this)] 4920 return subparser 4921 4922 self._retreat(index) 4923 return None 4924 4925 def _match(self, token_type, advance=True, expression=None): 4926 if not self._curr: 4927 return None 4928 4929 if self._curr.token_type == token_type: 4930 if advance: 4931 self._advance() 4932 self._add_comments(expression) 4933 return True 4934 4935 return None 4936 4937 def _match_set(self, types, advance=True): 4938 if not self._curr: 4939 return None 4940 4941 if self._curr.token_type in types: 4942 if advance: 4943 self._advance() 4944 return True 4945 4946 return None 4947 4948 def _match_pair(self, token_type_a, token_type_b, advance=True): 4949 if not self._curr or not self._next: 4950 return None 4951 4952 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4953 if advance: 4954 self._advance(2) 4955 return True 4956 4957 return None 4958 4959 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4960 if not self._match(TokenType.L_PAREN, expression=expression): 4961 self.raise_error("Expecting (") 4962 4963 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4964 if not self._match(TokenType.R_PAREN, expression=expression): 4965 self.raise_error("Expecting )") 4966 4967 def _match_texts(self, texts, advance=True): 4968 if self._curr and self._curr.text.upper() in texts: 4969 if advance: 4970 self._advance() 4971 return True 4972 return False 4973 4974 def _match_text_seq(self, *texts, advance=True): 4975 index = self._index 4976 for text in texts: 4977 if self._curr and self._curr.text.upper() == text: 4978 self._advance() 4979 else: 4980 self._retreat(index) 4981 return False 4982 4983 if not advance: 4984 self._retreat(index) 4985 4986 return True 4987 4988 @t.overload 4989 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4990 ... 4991 4992 @t.overload 4993 def _replace_columns_with_dots( 4994 self, this: t.Optional[exp.Expression] 4995 ) -> t.Optional[exp.Expression]: 4996 ... 4997 4998 def _replace_columns_with_dots(self, this): 4999 if isinstance(this, exp.Dot): 5000 exp.replace_children(this, self._replace_columns_with_dots) 5001 elif isinstance(this, exp.Column): 5002 exp.replace_children(this, self._replace_columns_with_dots) 5003 table = this.args.get("table") 5004 this = ( 5005 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5006 ) 5007 5008 return this 5009 5010 def _replace_lambda( 5011 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5012 ) -> t.Optional[exp.Expression]: 5013 if not node: 5014 return node 5015 5016 for column in node.find_all(exp.Column): 5017 if column.parts[0].name in lambda_variables: 5018 dot_or_id = column.to_dot() if column.table else column.this 5019 parent = column.parent 5020 5021 while isinstance(parent, exp.Dot): 5022 if not isinstance(parent.parent, exp.Dot): 5023 parent.replace(dot_or_id) 5024 break 5025 parent = parent.parent 5026 else: 5027 if column is node: 5028 node = dot_or_id 5029 else: 5030 column.replace(dot_or_id) 5031 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 STRUCT_TYPE_TOKENS = { 107 TokenType.NESTED, 108 TokenType.STRUCT, 109 } 110 111 NESTED_TYPE_TOKENS = { 112 TokenType.ARRAY, 113 TokenType.LOWCARDINALITY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 *STRUCT_TYPE_TOKENS, 117 } 118 119 ENUM_TYPE_TOKENS = { 120 TokenType.ENUM, 121 TokenType.ENUM8, 122 TokenType.ENUM16, 123 } 124 125 TYPE_TOKENS = { 126 TokenType.BIT, 127 TokenType.BOOLEAN, 128 TokenType.TINYINT, 129 TokenType.UTINYINT, 130 TokenType.SMALLINT, 131 TokenType.USMALLINT, 132 TokenType.INT, 133 TokenType.UINT, 134 TokenType.BIGINT, 135 TokenType.UBIGINT, 136 TokenType.INT128, 137 TokenType.UINT128, 138 TokenType.INT256, 139 TokenType.UINT256, 140 TokenType.MEDIUMINT, 141 TokenType.FIXEDSTRING, 142 TokenType.FLOAT, 143 TokenType.DOUBLE, 144 TokenType.CHAR, 145 TokenType.NCHAR, 146 TokenType.VARCHAR, 147 TokenType.NVARCHAR, 148 TokenType.TEXT, 149 TokenType.MEDIUMTEXT, 150 TokenType.LONGTEXT, 151 TokenType.MEDIUMBLOB, 152 TokenType.LONGBLOB, 153 TokenType.BINARY, 154 TokenType.VARBINARY, 155 TokenType.JSON, 156 TokenType.JSONB, 157 TokenType.INTERVAL, 158 TokenType.TIME, 159 TokenType.TIMETZ, 160 TokenType.TIMESTAMP, 161 TokenType.TIMESTAMPTZ, 162 TokenType.TIMESTAMPLTZ, 163 TokenType.DATETIME, 164 TokenType.DATETIME64, 165 TokenType.DATE, 166 TokenType.INT4RANGE, 167 TokenType.INT4MULTIRANGE, 168 TokenType.INT8RANGE, 169 TokenType.INT8MULTIRANGE, 170 TokenType.NUMRANGE, 171 TokenType.NUMMULTIRANGE, 172 TokenType.TSRANGE, 173 TokenType.TSMULTIRANGE, 174 TokenType.TSTZRANGE, 175 TokenType.TSTZMULTIRANGE, 176 TokenType.DATERANGE, 177 TokenType.DATEMULTIRANGE, 178 TokenType.DECIMAL, 179 TokenType.BIGDECIMAL, 180 TokenType.UUID, 181 TokenType.GEOGRAPHY, 182 TokenType.GEOMETRY, 183 TokenType.HLLSKETCH, 184 TokenType.HSTORE, 185 TokenType.PSEUDO_TYPE, 186 TokenType.SUPER, 187 TokenType.SERIAL, 188 TokenType.SMALLSERIAL, 189 TokenType.BIGSERIAL, 190 TokenType.XML, 191 TokenType.YEAR, 192 TokenType.UNIQUEIDENTIFIER, 193 TokenType.USERDEFINED, 194 TokenType.MONEY, 195 TokenType.SMALLMONEY, 196 TokenType.ROWVERSION, 197 TokenType.IMAGE, 198 TokenType.VARIANT, 199 TokenType.OBJECT, 200 TokenType.INET, 201 TokenType.IPADDRESS, 202 TokenType.IPPREFIX, 203 TokenType.UNKNOWN, 204 TokenType.NULL, 205 *ENUM_TYPE_TOKENS, 206 *NESTED_TYPE_TOKENS, 207 } 208 209 SUBQUERY_PREDICATES = { 210 TokenType.ANY: exp.Any, 211 TokenType.ALL: exp.All, 212 TokenType.EXISTS: exp.Exists, 213 TokenType.SOME: exp.Any, 214 } 215 216 RESERVED_KEYWORDS = { 217 *Tokenizer.SINGLE_TOKENS.values(), 218 TokenType.SELECT, 219 } 220 221 DB_CREATABLES = { 222 TokenType.DATABASE, 223 TokenType.SCHEMA, 224 TokenType.TABLE, 225 TokenType.VIEW, 226 TokenType.DICTIONARY, 227 } 228 229 CREATABLES = { 230 TokenType.COLUMN, 231 TokenType.FUNCTION, 232 TokenType.INDEX, 233 TokenType.PROCEDURE, 234 *DB_CREATABLES, 235 } 236 237 # Tokens that can represent identifiers 238 ID_VAR_TOKENS = { 239 TokenType.VAR, 240 TokenType.ANTI, 241 TokenType.APPLY, 242 TokenType.ASC, 243 TokenType.AUTO_INCREMENT, 244 TokenType.BEGIN, 245 TokenType.CACHE, 246 TokenType.CASE, 247 TokenType.COLLATE, 248 TokenType.COMMAND, 249 TokenType.COMMENT, 250 TokenType.COMMIT, 251 TokenType.CONSTRAINT, 252 TokenType.DEFAULT, 253 TokenType.DELETE, 254 TokenType.DESC, 255 TokenType.DESCRIBE, 256 TokenType.DICTIONARY, 257 TokenType.DIV, 258 TokenType.END, 259 TokenType.EXECUTE, 260 TokenType.ESCAPE, 261 TokenType.FALSE, 262 TokenType.FIRST, 263 TokenType.FILTER, 264 TokenType.FORMAT, 265 TokenType.FULL, 266 TokenType.IS, 267 TokenType.ISNULL, 268 TokenType.INTERVAL, 269 TokenType.KEEP, 270 TokenType.LEFT, 271 TokenType.LOAD, 272 TokenType.MERGE, 273 TokenType.NATURAL, 274 TokenType.NEXT, 275 TokenType.OFFSET, 276 TokenType.ORDINALITY, 277 TokenType.OVERWRITE, 278 TokenType.PARTITION, 279 TokenType.PERCENT, 280 TokenType.PIVOT, 281 TokenType.PRAGMA, 282 TokenType.RANGE, 283 TokenType.REFERENCES, 284 TokenType.RIGHT, 285 TokenType.ROW, 286 TokenType.ROWS, 287 TokenType.SEMI, 288 TokenType.SET, 289 TokenType.SETTINGS, 290 TokenType.SHOW, 291 TokenType.TEMPORARY, 292 TokenType.TOP, 293 TokenType.TRUE, 294 TokenType.UNIQUE, 295 TokenType.UNPIVOT, 296 TokenType.UPDATE, 297 TokenType.VOLATILE, 298 TokenType.WINDOW, 299 *CREATABLES, 300 *SUBQUERY_PREDICATES, 301 *TYPE_TOKENS, 302 *NO_PAREN_FUNCTIONS, 303 } 304 305 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 306 307 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 308 TokenType.APPLY, 309 TokenType.ASOF, 310 TokenType.FULL, 311 TokenType.LEFT, 312 TokenType.LOCK, 313 TokenType.NATURAL, 314 TokenType.OFFSET, 315 TokenType.RIGHT, 316 TokenType.WINDOW, 317 } 318 319 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 320 321 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 322 323 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 324 325 FUNC_TOKENS = { 326 TokenType.COMMAND, 327 TokenType.CURRENT_DATE, 328 TokenType.CURRENT_DATETIME, 329 TokenType.CURRENT_TIMESTAMP, 330 TokenType.CURRENT_TIME, 331 TokenType.CURRENT_USER, 332 TokenType.FILTER, 333 TokenType.FIRST, 334 TokenType.FORMAT, 335 TokenType.GLOB, 336 TokenType.IDENTIFIER, 337 TokenType.INDEX, 338 TokenType.ISNULL, 339 TokenType.ILIKE, 340 TokenType.INSERT, 341 TokenType.LIKE, 342 TokenType.MERGE, 343 TokenType.OFFSET, 344 TokenType.PRIMARY_KEY, 345 TokenType.RANGE, 346 TokenType.REPLACE, 347 TokenType.RLIKE, 348 TokenType.ROW, 349 TokenType.UNNEST, 350 TokenType.VAR, 351 TokenType.LEFT, 352 TokenType.RIGHT, 353 TokenType.DATE, 354 TokenType.DATETIME, 355 TokenType.TABLE, 356 TokenType.TIMESTAMP, 357 TokenType.TIMESTAMPTZ, 358 TokenType.WINDOW, 359 TokenType.XOR, 360 *TYPE_TOKENS, 361 *SUBQUERY_PREDICATES, 362 } 363 364 CONJUNCTION = { 365 TokenType.AND: exp.And, 366 TokenType.OR: exp.Or, 367 } 368 369 EQUALITY = { 370 TokenType.EQ: exp.EQ, 371 TokenType.NEQ: exp.NEQ, 372 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 373 } 374 375 COMPARISON = { 376 TokenType.GT: exp.GT, 377 TokenType.GTE: exp.GTE, 378 TokenType.LT: exp.LT, 379 TokenType.LTE: exp.LTE, 380 } 381 382 BITWISE = { 383 TokenType.AMP: exp.BitwiseAnd, 384 TokenType.CARET: exp.BitwiseXor, 385 TokenType.PIPE: exp.BitwiseOr, 386 TokenType.DPIPE: exp.DPipe, 387 } 388 389 TERM = { 390 TokenType.DASH: exp.Sub, 391 TokenType.PLUS: exp.Add, 392 TokenType.MOD: exp.Mod, 393 TokenType.COLLATE: exp.Collate, 394 } 395 396 FACTOR = { 397 TokenType.DIV: exp.IntDiv, 398 TokenType.LR_ARROW: exp.Distance, 399 TokenType.SLASH: exp.Div, 400 TokenType.STAR: exp.Mul, 401 } 402 403 TIMES = { 404 TokenType.TIME, 405 TokenType.TIMETZ, 406 } 407 408 TIMESTAMPS = { 409 TokenType.TIMESTAMP, 410 TokenType.TIMESTAMPTZ, 411 TokenType.TIMESTAMPLTZ, 412 *TIMES, 413 } 414 415 SET_OPERATIONS = { 416 TokenType.UNION, 417 TokenType.INTERSECT, 418 TokenType.EXCEPT, 419 } 420 421 JOIN_METHODS = { 422 TokenType.NATURAL, 423 TokenType.ASOF, 424 } 425 426 JOIN_SIDES = { 427 TokenType.LEFT, 428 TokenType.RIGHT, 429 TokenType.FULL, 430 } 431 432 JOIN_KINDS = { 433 TokenType.INNER, 434 TokenType.OUTER, 435 TokenType.CROSS, 436 TokenType.SEMI, 437 TokenType.ANTI, 438 } 439 440 JOIN_HINTS: t.Set[str] = set() 441 442 LAMBDAS = { 443 TokenType.ARROW: lambda self, expressions: self.expression( 444 exp.Lambda, 445 this=self._replace_lambda( 446 self._parse_conjunction(), 447 {node.name for node in expressions}, 448 ), 449 expressions=expressions, 450 ), 451 TokenType.FARROW: lambda self, expressions: self.expression( 452 exp.Kwarg, 453 this=exp.var(expressions[0].name), 454 expression=self._parse_conjunction(), 455 ), 456 } 457 458 COLUMN_OPERATORS = { 459 TokenType.DOT: None, 460 TokenType.DCOLON: lambda self, this, to: self.expression( 461 exp.Cast if self.STRICT_CAST else exp.TryCast, 462 this=this, 463 to=to, 464 ), 465 TokenType.ARROW: lambda self, this, path: self.expression( 466 exp.JSONExtract, 467 this=this, 468 expression=path, 469 ), 470 TokenType.DARROW: lambda self, this, path: self.expression( 471 exp.JSONExtractScalar, 472 this=this, 473 expression=path, 474 ), 475 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 476 exp.JSONBExtract, 477 this=this, 478 expression=path, 479 ), 480 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 481 exp.JSONBExtractScalar, 482 this=this, 483 expression=path, 484 ), 485 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 486 exp.JSONBContains, 487 this=this, 488 expression=key, 489 ), 490 } 491 492 EXPRESSION_PARSERS = { 493 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 494 exp.Column: lambda self: self._parse_column(), 495 exp.Condition: lambda self: self._parse_conjunction(), 496 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 497 exp.Expression: lambda self: self._parse_statement(), 498 exp.From: lambda self: self._parse_from(), 499 exp.Group: lambda self: self._parse_group(), 500 exp.Having: lambda self: self._parse_having(), 501 exp.Identifier: lambda self: self._parse_id_var(), 502 exp.Join: lambda self: self._parse_join(), 503 exp.Lambda: lambda self: self._parse_lambda(), 504 exp.Lateral: lambda self: self._parse_lateral(), 505 exp.Limit: lambda self: self._parse_limit(), 506 exp.Offset: lambda self: self._parse_offset(), 507 exp.Order: lambda self: self._parse_order(), 508 exp.Ordered: lambda self: self._parse_ordered(), 509 exp.Properties: lambda self: self._parse_properties(), 510 exp.Qualify: lambda self: self._parse_qualify(), 511 exp.Returning: lambda self: self._parse_returning(), 512 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 513 exp.Table: lambda self: self._parse_table_parts(), 514 exp.TableAlias: lambda self: self._parse_table_alias(), 515 exp.Where: lambda self: self._parse_where(), 516 exp.Window: lambda self: self._parse_named_window(), 517 exp.With: lambda self: self._parse_with(), 518 "JOIN_TYPE": lambda self: self._parse_join_parts(), 519 } 520 521 STATEMENT_PARSERS = { 522 TokenType.ALTER: lambda self: self._parse_alter(), 523 TokenType.BEGIN: lambda self: self._parse_transaction(), 524 TokenType.CACHE: lambda self: self._parse_cache(), 525 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 526 TokenType.COMMENT: lambda self: self._parse_comment(), 527 TokenType.CREATE: lambda self: self._parse_create(), 528 TokenType.DELETE: lambda self: self._parse_delete(), 529 TokenType.DESC: lambda self: self._parse_describe(), 530 TokenType.DESCRIBE: lambda self: self._parse_describe(), 531 TokenType.DROP: lambda self: self._parse_drop(), 532 TokenType.INSERT: lambda self: self._parse_insert(), 533 TokenType.LOAD: lambda self: self._parse_load(), 534 TokenType.MERGE: lambda self: self._parse_merge(), 535 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 536 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 537 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 538 TokenType.SET: lambda self: self._parse_set(), 539 TokenType.UNCACHE: lambda self: self._parse_uncache(), 540 TokenType.UPDATE: lambda self: self._parse_update(), 541 TokenType.USE: lambda self: self.expression( 542 exp.Use, 543 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 544 and exp.var(self._prev.text), 545 this=self._parse_table(schema=False), 546 ), 547 } 548 549 UNARY_PARSERS = { 550 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 551 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 552 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 553 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 554 } 555 556 PRIMARY_PARSERS = { 557 TokenType.STRING: lambda self, token: self.expression( 558 exp.Literal, this=token.text, is_string=True 559 ), 560 TokenType.NUMBER: lambda self, token: self.expression( 561 exp.Literal, this=token.text, is_string=False 562 ), 563 TokenType.STAR: lambda self, _: self.expression( 564 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 565 ), 566 TokenType.NULL: lambda self, _: self.expression(exp.Null), 567 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 568 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 569 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 570 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 571 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 572 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 573 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 574 exp.National, this=token.text 575 ), 576 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 577 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 578 } 579 580 PLACEHOLDER_PARSERS = { 581 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 582 TokenType.PARAMETER: lambda self: self._parse_parameter(), 583 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 584 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 585 else None, 586 } 587 588 RANGE_PARSERS = { 589 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 590 TokenType.GLOB: binary_range_parser(exp.Glob), 591 TokenType.ILIKE: binary_range_parser(exp.ILike), 592 TokenType.IN: lambda self, this: self._parse_in(this), 593 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 594 TokenType.IS: lambda self, this: self._parse_is(this), 595 TokenType.LIKE: binary_range_parser(exp.Like), 596 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 597 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 598 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 599 } 600 601 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 602 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 603 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 604 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 605 "CHARACTER SET": lambda self: self._parse_character_set(), 606 "CHECKSUM": lambda self: self._parse_checksum(), 607 "CLUSTER BY": lambda self: self._parse_cluster(), 608 "CLUSTERED": lambda self: self._parse_clustered_by(), 609 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 610 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 611 "COPY": lambda self: self._parse_copy_property(), 612 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 613 "DEFINER": lambda self: self._parse_definer(), 614 "DETERMINISTIC": lambda self: self.expression( 615 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 616 ), 617 "DISTKEY": lambda self: self._parse_distkey(), 618 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 619 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 620 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 621 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 622 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 623 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 624 "FREESPACE": lambda self: self._parse_freespace(), 625 "HEAP": lambda self: self.expression(exp.HeapProperty), 626 "IMMUTABLE": lambda self: self.expression( 627 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 628 ), 629 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 630 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 631 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 632 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 633 "LIKE": lambda self: self._parse_create_like(), 634 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 635 "LOCK": lambda self: self._parse_locking(), 636 "LOCKING": lambda self: self._parse_locking(), 637 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 638 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 639 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 640 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 641 "NO": lambda self: self._parse_no_property(), 642 "ON": lambda self: self._parse_on_property(), 643 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 644 "PARTITION BY": lambda self: self._parse_partitioned_by(), 645 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 646 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 647 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 648 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 649 "RETURNS": lambda self: self._parse_returns(), 650 "ROW": lambda self: self._parse_row(), 651 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 652 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 653 "SETTINGS": lambda self: self.expression( 654 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 655 ), 656 "SORTKEY": lambda self: self._parse_sortkey(), 657 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 658 "STABLE": lambda self: self.expression( 659 exp.StabilityProperty, this=exp.Literal.string("STABLE") 660 ), 661 "STORED": lambda self: self._parse_stored(), 662 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 663 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 664 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 665 "TO": lambda self: self._parse_to_table(), 666 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 667 "TTL": lambda self: self._parse_ttl(), 668 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 669 "VOLATILE": lambda self: self._parse_volatile_property(), 670 "WITH": lambda self: self._parse_with_property(), 671 } 672 673 CONSTRAINT_PARSERS = { 674 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 675 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 676 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 677 "CHARACTER SET": lambda self: self.expression( 678 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 679 ), 680 "CHECK": lambda self: self.expression( 681 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 682 ), 683 "COLLATE": lambda self: self.expression( 684 exp.CollateColumnConstraint, this=self._parse_var() 685 ), 686 "COMMENT": lambda self: self.expression( 687 exp.CommentColumnConstraint, this=self._parse_string() 688 ), 689 "COMPRESS": lambda self: self._parse_compress(), 690 "CLUSTERED": lambda self: self.expression( 691 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 692 ), 693 "NONCLUSTERED": lambda self: self.expression( 694 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 695 ), 696 "DEFAULT": lambda self: self.expression( 697 exp.DefaultColumnConstraint, this=self._parse_bitwise() 698 ), 699 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 700 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 701 "FORMAT": lambda self: self.expression( 702 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 703 ), 704 "GENERATED": lambda self: self._parse_generated_as_identity(), 705 "IDENTITY": lambda self: self._parse_auto_increment(), 706 "INLINE": lambda self: self._parse_inline(), 707 "LIKE": lambda self: self._parse_create_like(), 708 "NOT": lambda self: self._parse_not_constraint(), 709 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 710 "ON": lambda self: ( 711 self._match(TokenType.UPDATE) 712 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 713 ) 714 or self.expression(exp.OnProperty, this=self._parse_id_var()), 715 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 716 "PRIMARY KEY": lambda self: self._parse_primary_key(), 717 "REFERENCES": lambda self: self._parse_references(match=False), 718 "TITLE": lambda self: self.expression( 719 exp.TitleColumnConstraint, this=self._parse_var_or_string() 720 ), 721 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 722 "UNIQUE": lambda self: self._parse_unique(), 723 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 724 "WITH": lambda self: self.expression( 725 exp.Properties, expressions=self._parse_wrapped_csv(self._parse_property) 726 ), 727 } 728 729 ALTER_PARSERS = { 730 "ADD": lambda self: self._parse_alter_table_add(), 731 "ALTER": lambda self: self._parse_alter_table_alter(), 732 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 733 "DROP": lambda self: self._parse_alter_table_drop(), 734 "RENAME": lambda self: self._parse_alter_table_rename(), 735 } 736 737 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 738 739 NO_PAREN_FUNCTION_PARSERS = { 740 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 741 "CASE": lambda self: self._parse_case(), 742 "IF": lambda self: self._parse_if(), 743 "NEXT": lambda self: self._parse_next_value_for(), 744 } 745 746 INVALID_FUNC_NAME_TOKENS = { 747 TokenType.IDENTIFIER, 748 TokenType.STRING, 749 } 750 751 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 752 753 FUNCTION_PARSERS = { 754 "ANY_VALUE": lambda self: self._parse_any_value(), 755 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 756 "CONCAT": lambda self: self._parse_concat(), 757 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 758 "DECODE": lambda self: self._parse_decode(), 759 "EXTRACT": lambda self: self._parse_extract(), 760 "JSON_OBJECT": lambda self: self._parse_json_object(), 761 "LOG": lambda self: self._parse_logarithm(), 762 "MATCH": lambda self: self._parse_match_against(), 763 "OPENJSON": lambda self: self._parse_open_json(), 764 "POSITION": lambda self: self._parse_position(), 765 "SAFE_CAST": lambda self: self._parse_cast(False), 766 "STRING_AGG": lambda self: self._parse_string_agg(), 767 "SUBSTRING": lambda self: self._parse_substring(), 768 "TRIM": lambda self: self._parse_trim(), 769 "TRY_CAST": lambda self: self._parse_cast(False), 770 "TRY_CONVERT": lambda self: self._parse_convert(False), 771 } 772 773 QUERY_MODIFIER_PARSERS = { 774 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 775 TokenType.WHERE: lambda self: ("where", self._parse_where()), 776 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 777 TokenType.HAVING: lambda self: ("having", self._parse_having()), 778 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 779 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 780 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 781 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 782 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 783 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 784 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 785 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 786 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 787 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 788 TokenType.CLUSTER_BY: lambda self: ( 789 "cluster", 790 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 791 ), 792 TokenType.DISTRIBUTE_BY: lambda self: ( 793 "distribute", 794 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 795 ), 796 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 797 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 798 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 799 } 800 801 SET_PARSERS = { 802 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 803 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 804 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 805 "TRANSACTION": lambda self: self._parse_set_transaction(), 806 } 807 808 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 809 810 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 811 812 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 813 814 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 815 816 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 817 818 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 819 TRANSACTION_CHARACTERISTICS = { 820 "ISOLATION LEVEL REPEATABLE READ", 821 "ISOLATION LEVEL READ COMMITTED", 822 "ISOLATION LEVEL READ UNCOMMITTED", 823 "ISOLATION LEVEL SERIALIZABLE", 824 "READ WRITE", 825 "READ ONLY", 826 } 827 828 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 829 830 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 831 832 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 833 834 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 835 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 836 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 837 838 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 839 840 DISTINCT_TOKENS = {TokenType.DISTINCT} 841 842 STRICT_CAST = True 843 844 # A NULL arg in CONCAT yields NULL by default 845 CONCAT_NULL_OUTPUTS_STRING = False 846 847 PREFIXED_PIVOT_COLUMNS = False 848 IDENTIFY_PIVOT_STRINGS = False 849 850 LOG_BASE_FIRST = True 851 LOG_DEFAULTS_TO_LN = False 852 853 SUPPORTS_USER_DEFINED_TYPES = True 854 855 __slots__ = ( 856 "error_level", 857 "error_message_context", 858 "max_errors", 859 "sql", 860 "errors", 861 "_tokens", 862 "_index", 863 "_curr", 864 "_next", 865 "_prev", 866 "_prev_comments", 867 "_tokenizer", 868 ) 869 870 # Autofilled 871 TOKENIZER_CLASS: t.Type[Tokenizer] = Tokenizer 872 INDEX_OFFSET: int = 0 873 UNNEST_COLUMN_ONLY: bool = False 874 ALIAS_POST_TABLESAMPLE: bool = False 875 STRICT_STRING_CONCAT = False 876 NORMALIZE_FUNCTIONS = "upper" 877 NULL_ORDERING: str = "nulls_are_small" 878 SHOW_TRIE: t.Dict = {} 879 SET_TRIE: t.Dict = {} 880 FORMAT_MAPPING: t.Dict[str, str] = {} 881 FORMAT_TRIE: t.Dict = {} 882 TIME_MAPPING: t.Dict[str, str] = {} 883 TIME_TRIE: t.Dict = {} 884 885 def __init__( 886 self, 887 error_level: t.Optional[ErrorLevel] = None, 888 error_message_context: int = 100, 889 max_errors: int = 3, 890 ): 891 self.error_level = error_level or ErrorLevel.IMMEDIATE 892 self.error_message_context = error_message_context 893 self.max_errors = max_errors 894 self._tokenizer = self.TOKENIZER_CLASS() 895 self.reset() 896 897 def reset(self): 898 self.sql = "" 899 self.errors = [] 900 self._tokens = [] 901 self._index = 0 902 self._curr = None 903 self._next = None 904 self._prev = None 905 self._prev_comments = None 906 907 def parse( 908 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 909 ) -> t.List[t.Optional[exp.Expression]]: 910 """ 911 Parses a list of tokens and returns a list of syntax trees, one tree 912 per parsed SQL statement. 913 914 Args: 915 raw_tokens: The list of tokens. 916 sql: The original SQL string, used to produce helpful debug messages. 917 918 Returns: 919 The list of the produced syntax trees. 920 """ 921 return self._parse( 922 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 923 ) 924 925 def parse_into( 926 self, 927 expression_types: exp.IntoType, 928 raw_tokens: t.List[Token], 929 sql: t.Optional[str] = None, 930 ) -> t.List[t.Optional[exp.Expression]]: 931 """ 932 Parses a list of tokens into a given Expression type. If a collection of Expression 933 types is given instead, this method will try to parse the token list into each one 934 of them, stopping at the first for which the parsing succeeds. 935 936 Args: 937 expression_types: The expression type(s) to try and parse the token list into. 938 raw_tokens: The list of tokens. 939 sql: The original SQL string, used to produce helpful debug messages. 940 941 Returns: 942 The target Expression. 943 """ 944 errors = [] 945 for expression_type in ensure_list(expression_types): 946 parser = self.EXPRESSION_PARSERS.get(expression_type) 947 if not parser: 948 raise TypeError(f"No parser registered for {expression_type}") 949 950 try: 951 return self._parse(parser, raw_tokens, sql) 952 except ParseError as e: 953 e.errors[0]["into_expression"] = expression_type 954 errors.append(e) 955 956 raise ParseError( 957 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 958 errors=merge_errors(errors), 959 ) from errors[-1] 960 961 def _parse( 962 self, 963 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 964 raw_tokens: t.List[Token], 965 sql: t.Optional[str] = None, 966 ) -> t.List[t.Optional[exp.Expression]]: 967 self.reset() 968 self.sql = sql or "" 969 970 total = len(raw_tokens) 971 chunks: t.List[t.List[Token]] = [[]] 972 973 for i, token in enumerate(raw_tokens): 974 if token.token_type == TokenType.SEMICOLON: 975 if i < total - 1: 976 chunks.append([]) 977 else: 978 chunks[-1].append(token) 979 980 expressions = [] 981 982 for tokens in chunks: 983 self._index = -1 984 self._tokens = tokens 985 self._advance() 986 987 expressions.append(parse_method(self)) 988 989 if self._index < len(self._tokens): 990 self.raise_error("Invalid expression / Unexpected token") 991 992 self.check_errors() 993 994 return expressions 995 996 def check_errors(self) -> None: 997 """Logs or raises any found errors, depending on the chosen error level setting.""" 998 if self.error_level == ErrorLevel.WARN: 999 for error in self.errors: 1000 logger.error(str(error)) 1001 elif self.error_level == ErrorLevel.RAISE and self.errors: 1002 raise ParseError( 1003 concat_messages(self.errors, self.max_errors), 1004 errors=merge_errors(self.errors), 1005 ) 1006 1007 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1008 """ 1009 Appends an error in the list of recorded errors or raises it, depending on the chosen 1010 error level setting. 1011 """ 1012 token = token or self._curr or self._prev or Token.string("") 1013 start = token.start 1014 end = token.end + 1 1015 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1016 highlight = self.sql[start:end] 1017 end_context = self.sql[end : end + self.error_message_context] 1018 1019 error = ParseError.new( 1020 f"{message}. Line {token.line}, Col: {token.col}.\n" 1021 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1022 description=message, 1023 line=token.line, 1024 col=token.col, 1025 start_context=start_context, 1026 highlight=highlight, 1027 end_context=end_context, 1028 ) 1029 1030 if self.error_level == ErrorLevel.IMMEDIATE: 1031 raise error 1032 1033 self.errors.append(error) 1034 1035 def expression( 1036 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1037 ) -> E: 1038 """ 1039 Creates a new, validated Expression. 1040 1041 Args: 1042 exp_class: The expression class to instantiate. 1043 comments: An optional list of comments to attach to the expression. 1044 kwargs: The arguments to set for the expression along with their respective values. 1045 1046 Returns: 1047 The target expression. 1048 """ 1049 instance = exp_class(**kwargs) 1050 instance.add_comments(comments) if comments else self._add_comments(instance) 1051 return self.validate_expression(instance) 1052 1053 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1054 if expression and self._prev_comments: 1055 expression.add_comments(self._prev_comments) 1056 self._prev_comments = None 1057 1058 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1059 """ 1060 Validates an Expression, making sure that all its mandatory arguments are set. 1061 1062 Args: 1063 expression: The expression to validate. 1064 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1065 1066 Returns: 1067 The validated expression. 1068 """ 1069 if self.error_level != ErrorLevel.IGNORE: 1070 for error_message in expression.error_messages(args): 1071 self.raise_error(error_message) 1072 1073 return expression 1074 1075 def _find_sql(self, start: Token, end: Token) -> str: 1076 return self.sql[start.start : end.end + 1] 1077 1078 def _advance(self, times: int = 1) -> None: 1079 self._index += times 1080 self._curr = seq_get(self._tokens, self._index) 1081 self._next = seq_get(self._tokens, self._index + 1) 1082 1083 if self._index > 0: 1084 self._prev = self._tokens[self._index - 1] 1085 self._prev_comments = self._prev.comments 1086 else: 1087 self._prev = None 1088 self._prev_comments = None 1089 1090 def _retreat(self, index: int) -> None: 1091 if index != self._index: 1092 self._advance(index - self._index) 1093 1094 def _parse_command(self) -> exp.Command: 1095 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1096 1097 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1098 start = self._prev 1099 exists = self._parse_exists() if allow_exists else None 1100 1101 self._match(TokenType.ON) 1102 1103 kind = self._match_set(self.CREATABLES) and self._prev 1104 if not kind: 1105 return self._parse_as_command(start) 1106 1107 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1108 this = self._parse_user_defined_function(kind=kind.token_type) 1109 elif kind.token_type == TokenType.TABLE: 1110 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1111 elif kind.token_type == TokenType.COLUMN: 1112 this = self._parse_column() 1113 else: 1114 this = self._parse_id_var() 1115 1116 self._match(TokenType.IS) 1117 1118 return self.expression( 1119 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1120 ) 1121 1122 def _parse_to_table( 1123 self, 1124 ) -> exp.ToTableProperty: 1125 table = self._parse_table_parts(schema=True) 1126 return self.expression(exp.ToTableProperty, this=table) 1127 1128 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1129 def _parse_ttl(self) -> exp.Expression: 1130 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1131 this = self._parse_bitwise() 1132 1133 if self._match_text_seq("DELETE"): 1134 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1135 if self._match_text_seq("RECOMPRESS"): 1136 return self.expression( 1137 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1138 ) 1139 if self._match_text_seq("TO", "DISK"): 1140 return self.expression( 1141 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1142 ) 1143 if self._match_text_seq("TO", "VOLUME"): 1144 return self.expression( 1145 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1146 ) 1147 1148 return this 1149 1150 expressions = self._parse_csv(_parse_ttl_action) 1151 where = self._parse_where() 1152 group = self._parse_group() 1153 1154 aggregates = None 1155 if group and self._match(TokenType.SET): 1156 aggregates = self._parse_csv(self._parse_set_item) 1157 1158 return self.expression( 1159 exp.MergeTreeTTL, 1160 expressions=expressions, 1161 where=where, 1162 group=group, 1163 aggregates=aggregates, 1164 ) 1165 1166 def _parse_statement(self) -> t.Optional[exp.Expression]: 1167 if self._curr is None: 1168 return None 1169 1170 if self._match_set(self.STATEMENT_PARSERS): 1171 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1172 1173 if self._match_set(Tokenizer.COMMANDS): 1174 return self._parse_command() 1175 1176 expression = self._parse_expression() 1177 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1178 return self._parse_query_modifiers(expression) 1179 1180 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1181 start = self._prev 1182 temporary = self._match(TokenType.TEMPORARY) 1183 materialized = self._match_text_seq("MATERIALIZED") 1184 1185 kind = self._match_set(self.CREATABLES) and self._prev.text 1186 if not kind: 1187 return self._parse_as_command(start) 1188 1189 return self.expression( 1190 exp.Drop, 1191 comments=start.comments, 1192 exists=exists or self._parse_exists(), 1193 this=self._parse_table(schema=True), 1194 kind=kind, 1195 temporary=temporary, 1196 materialized=materialized, 1197 cascade=self._match_text_seq("CASCADE"), 1198 constraints=self._match_text_seq("CONSTRAINTS"), 1199 purge=self._match_text_seq("PURGE"), 1200 ) 1201 1202 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1203 return ( 1204 self._match_text_seq("IF") 1205 and (not not_ or self._match(TokenType.NOT)) 1206 and self._match(TokenType.EXISTS) 1207 ) 1208 1209 def _parse_create(self) -> exp.Create | exp.Command: 1210 # Note: this can't be None because we've matched a statement parser 1211 start = self._prev 1212 comments = self._prev_comments 1213 1214 replace = start.text.upper() == "REPLACE" or self._match_pair( 1215 TokenType.OR, TokenType.REPLACE 1216 ) 1217 unique = self._match(TokenType.UNIQUE) 1218 1219 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1220 self._advance() 1221 1222 properties = None 1223 create_token = self._match_set(self.CREATABLES) and self._prev 1224 1225 if not create_token: 1226 # exp.Properties.Location.POST_CREATE 1227 properties = self._parse_properties() 1228 create_token = self._match_set(self.CREATABLES) and self._prev 1229 1230 if not properties or not create_token: 1231 return self._parse_as_command(start) 1232 1233 exists = self._parse_exists(not_=True) 1234 this = None 1235 expression: t.Optional[exp.Expression] = None 1236 indexes = None 1237 no_schema_binding = None 1238 begin = None 1239 clone = None 1240 1241 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1242 nonlocal properties 1243 if properties and temp_props: 1244 properties.expressions.extend(temp_props.expressions) 1245 elif temp_props: 1246 properties = temp_props 1247 1248 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1249 this = self._parse_user_defined_function(kind=create_token.token_type) 1250 1251 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1252 extend_props(self._parse_properties()) 1253 1254 self._match(TokenType.ALIAS) 1255 1256 if self._match(TokenType.COMMAND): 1257 expression = self._parse_as_command(self._prev) 1258 else: 1259 begin = self._match(TokenType.BEGIN) 1260 return_ = self._match_text_seq("RETURN") 1261 expression = self._parse_statement() 1262 1263 if return_: 1264 expression = self.expression(exp.Return, this=expression) 1265 elif create_token.token_type == TokenType.INDEX: 1266 this = self._parse_index(index=self._parse_id_var()) 1267 elif create_token.token_type in self.DB_CREATABLES: 1268 table_parts = self._parse_table_parts(schema=True) 1269 1270 # exp.Properties.Location.POST_NAME 1271 self._match(TokenType.COMMA) 1272 extend_props(self._parse_properties(before=True)) 1273 1274 this = self._parse_schema(this=table_parts) 1275 1276 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1277 extend_props(self._parse_properties()) 1278 1279 self._match(TokenType.ALIAS) 1280 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1281 # exp.Properties.Location.POST_ALIAS 1282 extend_props(self._parse_properties()) 1283 1284 expression = self._parse_ddl_select() 1285 1286 if create_token.token_type == TokenType.TABLE: 1287 # exp.Properties.Location.POST_EXPRESSION 1288 extend_props(self._parse_properties()) 1289 1290 indexes = [] 1291 while True: 1292 index = self._parse_index() 1293 1294 # exp.Properties.Location.POST_INDEX 1295 extend_props(self._parse_properties()) 1296 1297 if not index: 1298 break 1299 else: 1300 self._match(TokenType.COMMA) 1301 indexes.append(index) 1302 elif create_token.token_type == TokenType.VIEW: 1303 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1304 no_schema_binding = True 1305 1306 if self._match_text_seq("CLONE"): 1307 clone = self._parse_table(schema=True) 1308 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1309 clone_kind = ( 1310 self._match(TokenType.L_PAREN) 1311 and self._match_texts(self.CLONE_KINDS) 1312 and self._prev.text.upper() 1313 ) 1314 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1315 self._match(TokenType.R_PAREN) 1316 clone = self.expression( 1317 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1318 ) 1319 1320 return self.expression( 1321 exp.Create, 1322 comments=comments, 1323 this=this, 1324 kind=create_token.text, 1325 replace=replace, 1326 unique=unique, 1327 expression=expression, 1328 exists=exists, 1329 properties=properties, 1330 indexes=indexes, 1331 no_schema_binding=no_schema_binding, 1332 begin=begin, 1333 clone=clone, 1334 ) 1335 1336 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1337 # only used for teradata currently 1338 self._match(TokenType.COMMA) 1339 1340 kwargs = { 1341 "no": self._match_text_seq("NO"), 1342 "dual": self._match_text_seq("DUAL"), 1343 "before": self._match_text_seq("BEFORE"), 1344 "default": self._match_text_seq("DEFAULT"), 1345 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1346 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1347 "after": self._match_text_seq("AFTER"), 1348 "minimum": self._match_texts(("MIN", "MINIMUM")), 1349 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1350 } 1351 1352 if self._match_texts(self.PROPERTY_PARSERS): 1353 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1354 try: 1355 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1356 except TypeError: 1357 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1358 1359 return None 1360 1361 def _parse_property(self) -> t.Optional[exp.Expression]: 1362 if self._match_texts(self.PROPERTY_PARSERS): 1363 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1364 1365 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1366 return self._parse_character_set(default=True) 1367 1368 if self._match_text_seq("COMPOUND", "SORTKEY"): 1369 return self._parse_sortkey(compound=True) 1370 1371 if self._match_text_seq("SQL", "SECURITY"): 1372 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1373 1374 assignment = self._match_pair( 1375 TokenType.VAR, TokenType.EQ, advance=False 1376 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1377 1378 if assignment: 1379 key = self._parse_var_or_string() 1380 self._match(TokenType.EQ) 1381 return self.expression( 1382 exp.Property, 1383 this=key, 1384 value=self._parse_column() or self._parse_var(any_token=True), 1385 ) 1386 1387 return None 1388 1389 def _parse_stored(self) -> exp.FileFormatProperty: 1390 self._match(TokenType.ALIAS) 1391 1392 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1393 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1394 1395 return self.expression( 1396 exp.FileFormatProperty, 1397 this=self.expression( 1398 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1399 ) 1400 if input_format or output_format 1401 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1402 ) 1403 1404 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1405 self._match(TokenType.EQ) 1406 self._match(TokenType.ALIAS) 1407 return self.expression(exp_class, this=self._parse_field()) 1408 1409 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1410 properties = [] 1411 while True: 1412 if before: 1413 prop = self._parse_property_before() 1414 else: 1415 prop = self._parse_property() 1416 1417 if not prop: 1418 break 1419 for p in ensure_list(prop): 1420 properties.append(p) 1421 1422 if properties: 1423 return self.expression(exp.Properties, expressions=properties) 1424 1425 return None 1426 1427 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1428 return self.expression( 1429 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1430 ) 1431 1432 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1433 if self._index >= 2: 1434 pre_volatile_token = self._tokens[self._index - 2] 1435 else: 1436 pre_volatile_token = None 1437 1438 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1439 return exp.VolatileProperty() 1440 1441 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1442 1443 def _parse_with_property( 1444 self, 1445 ) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1446 if self._match(TokenType.L_PAREN, advance=False): 1447 return self._parse_wrapped_csv(self._parse_property) 1448 1449 if self._match_text_seq("JOURNAL"): 1450 return self._parse_withjournaltable() 1451 1452 if self._match_text_seq("DATA"): 1453 return self._parse_withdata(no=False) 1454 elif self._match_text_seq("NO", "DATA"): 1455 return self._parse_withdata(no=True) 1456 1457 if not self._next: 1458 return None 1459 1460 return self._parse_withisolatedloading() 1461 1462 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1463 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1464 self._match(TokenType.EQ) 1465 1466 user = self._parse_id_var() 1467 self._match(TokenType.PARAMETER) 1468 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1469 1470 if not user or not host: 1471 return None 1472 1473 return exp.DefinerProperty(this=f"{user}@{host}") 1474 1475 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1476 self._match(TokenType.TABLE) 1477 self._match(TokenType.EQ) 1478 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1479 1480 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1481 return self.expression(exp.LogProperty, no=no) 1482 1483 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1484 return self.expression(exp.JournalProperty, **kwargs) 1485 1486 def _parse_checksum(self) -> exp.ChecksumProperty: 1487 self._match(TokenType.EQ) 1488 1489 on = None 1490 if self._match(TokenType.ON): 1491 on = True 1492 elif self._match_text_seq("OFF"): 1493 on = False 1494 1495 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1496 1497 def _parse_cluster(self) -> exp.Cluster: 1498 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1499 1500 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1501 self._match_text_seq("BY") 1502 1503 self._match_l_paren() 1504 expressions = self._parse_csv(self._parse_column) 1505 self._match_r_paren() 1506 1507 if self._match_text_seq("SORTED", "BY"): 1508 self._match_l_paren() 1509 sorted_by = self._parse_csv(self._parse_ordered) 1510 self._match_r_paren() 1511 else: 1512 sorted_by = None 1513 1514 self._match(TokenType.INTO) 1515 buckets = self._parse_number() 1516 self._match_text_seq("BUCKETS") 1517 1518 return self.expression( 1519 exp.ClusteredByProperty, 1520 expressions=expressions, 1521 sorted_by=sorted_by, 1522 buckets=buckets, 1523 ) 1524 1525 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1526 if not self._match_text_seq("GRANTS"): 1527 self._retreat(self._index - 1) 1528 return None 1529 1530 return self.expression(exp.CopyGrantsProperty) 1531 1532 def _parse_freespace(self) -> exp.FreespaceProperty: 1533 self._match(TokenType.EQ) 1534 return self.expression( 1535 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1536 ) 1537 1538 def _parse_mergeblockratio( 1539 self, no: bool = False, default: bool = False 1540 ) -> exp.MergeBlockRatioProperty: 1541 if self._match(TokenType.EQ): 1542 return self.expression( 1543 exp.MergeBlockRatioProperty, 1544 this=self._parse_number(), 1545 percent=self._match(TokenType.PERCENT), 1546 ) 1547 1548 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1549 1550 def _parse_datablocksize( 1551 self, 1552 default: t.Optional[bool] = None, 1553 minimum: t.Optional[bool] = None, 1554 maximum: t.Optional[bool] = None, 1555 ) -> exp.DataBlocksizeProperty: 1556 self._match(TokenType.EQ) 1557 size = self._parse_number() 1558 1559 units = None 1560 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1561 units = self._prev.text 1562 1563 return self.expression( 1564 exp.DataBlocksizeProperty, 1565 size=size, 1566 units=units, 1567 default=default, 1568 minimum=minimum, 1569 maximum=maximum, 1570 ) 1571 1572 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1573 self._match(TokenType.EQ) 1574 always = self._match_text_seq("ALWAYS") 1575 manual = self._match_text_seq("MANUAL") 1576 never = self._match_text_seq("NEVER") 1577 default = self._match_text_seq("DEFAULT") 1578 1579 autotemp = None 1580 if self._match_text_seq("AUTOTEMP"): 1581 autotemp = self._parse_schema() 1582 1583 return self.expression( 1584 exp.BlockCompressionProperty, 1585 always=always, 1586 manual=manual, 1587 never=never, 1588 default=default, 1589 autotemp=autotemp, 1590 ) 1591 1592 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1593 no = self._match_text_seq("NO") 1594 concurrent = self._match_text_seq("CONCURRENT") 1595 self._match_text_seq("ISOLATED", "LOADING") 1596 for_all = self._match_text_seq("FOR", "ALL") 1597 for_insert = self._match_text_seq("FOR", "INSERT") 1598 for_none = self._match_text_seq("FOR", "NONE") 1599 return self.expression( 1600 exp.IsolatedLoadingProperty, 1601 no=no, 1602 concurrent=concurrent, 1603 for_all=for_all, 1604 for_insert=for_insert, 1605 for_none=for_none, 1606 ) 1607 1608 def _parse_locking(self) -> exp.LockingProperty: 1609 if self._match(TokenType.TABLE): 1610 kind = "TABLE" 1611 elif self._match(TokenType.VIEW): 1612 kind = "VIEW" 1613 elif self._match(TokenType.ROW): 1614 kind = "ROW" 1615 elif self._match_text_seq("DATABASE"): 1616 kind = "DATABASE" 1617 else: 1618 kind = None 1619 1620 if kind in ("DATABASE", "TABLE", "VIEW"): 1621 this = self._parse_table_parts() 1622 else: 1623 this = None 1624 1625 if self._match(TokenType.FOR): 1626 for_or_in = "FOR" 1627 elif self._match(TokenType.IN): 1628 for_or_in = "IN" 1629 else: 1630 for_or_in = None 1631 1632 if self._match_text_seq("ACCESS"): 1633 lock_type = "ACCESS" 1634 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1635 lock_type = "EXCLUSIVE" 1636 elif self._match_text_seq("SHARE"): 1637 lock_type = "SHARE" 1638 elif self._match_text_seq("READ"): 1639 lock_type = "READ" 1640 elif self._match_text_seq("WRITE"): 1641 lock_type = "WRITE" 1642 elif self._match_text_seq("CHECKSUM"): 1643 lock_type = "CHECKSUM" 1644 else: 1645 lock_type = None 1646 1647 override = self._match_text_seq("OVERRIDE") 1648 1649 return self.expression( 1650 exp.LockingProperty, 1651 this=this, 1652 kind=kind, 1653 for_or_in=for_or_in, 1654 lock_type=lock_type, 1655 override=override, 1656 ) 1657 1658 def _parse_partition_by(self) -> t.List[exp.Expression]: 1659 if self._match(TokenType.PARTITION_BY): 1660 return self._parse_csv(self._parse_conjunction) 1661 return [] 1662 1663 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1664 self._match(TokenType.EQ) 1665 return self.expression( 1666 exp.PartitionedByProperty, 1667 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1668 ) 1669 1670 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1671 if self._match_text_seq("AND", "STATISTICS"): 1672 statistics = True 1673 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1674 statistics = False 1675 else: 1676 statistics = None 1677 1678 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1679 1680 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1681 if self._match_text_seq("PRIMARY", "INDEX"): 1682 return exp.NoPrimaryIndexProperty() 1683 return None 1684 1685 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1686 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1687 return exp.OnCommitProperty() 1688 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1689 return exp.OnCommitProperty(delete=True) 1690 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 1691 1692 def _parse_distkey(self) -> exp.DistKeyProperty: 1693 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1694 1695 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1696 table = self._parse_table(schema=True) 1697 1698 options = [] 1699 while self._match_texts(("INCLUDING", "EXCLUDING")): 1700 this = self._prev.text.upper() 1701 1702 id_var = self._parse_id_var() 1703 if not id_var: 1704 return None 1705 1706 options.append( 1707 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1708 ) 1709 1710 return self.expression(exp.LikeProperty, this=table, expressions=options) 1711 1712 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1713 return self.expression( 1714 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1715 ) 1716 1717 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1718 self._match(TokenType.EQ) 1719 return self.expression( 1720 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1721 ) 1722 1723 def _parse_returns(self) -> exp.ReturnsProperty: 1724 value: t.Optional[exp.Expression] 1725 is_table = self._match(TokenType.TABLE) 1726 1727 if is_table: 1728 if self._match(TokenType.LT): 1729 value = self.expression( 1730 exp.Schema, 1731 this="TABLE", 1732 expressions=self._parse_csv(self._parse_struct_types), 1733 ) 1734 if not self._match(TokenType.GT): 1735 self.raise_error("Expecting >") 1736 else: 1737 value = self._parse_schema(exp.var("TABLE")) 1738 else: 1739 value = self._parse_types() 1740 1741 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1742 1743 def _parse_describe(self) -> exp.Describe: 1744 kind = self._match_set(self.CREATABLES) and self._prev.text 1745 this = self._parse_table() 1746 return self.expression(exp.Describe, this=this, kind=kind) 1747 1748 def _parse_insert(self) -> exp.Insert: 1749 comments = ensure_list(self._prev_comments) 1750 overwrite = self._match(TokenType.OVERWRITE) 1751 ignore = self._match(TokenType.IGNORE) 1752 local = self._match_text_seq("LOCAL") 1753 alternative = None 1754 1755 if self._match_text_seq("DIRECTORY"): 1756 this: t.Optional[exp.Expression] = self.expression( 1757 exp.Directory, 1758 this=self._parse_var_or_string(), 1759 local=local, 1760 row_format=self._parse_row_format(match_row=True), 1761 ) 1762 else: 1763 if self._match(TokenType.OR): 1764 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1765 1766 self._match(TokenType.INTO) 1767 comments += ensure_list(self._prev_comments) 1768 self._match(TokenType.TABLE) 1769 this = self._parse_table(schema=True) 1770 1771 returning = self._parse_returning() 1772 1773 return self.expression( 1774 exp.Insert, 1775 comments=comments, 1776 this=this, 1777 by_name=self._match_text_seq("BY", "NAME"), 1778 exists=self._parse_exists(), 1779 partition=self._parse_partition(), 1780 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1781 and self._parse_conjunction(), 1782 expression=self._parse_ddl_select(), 1783 conflict=self._parse_on_conflict(), 1784 returning=returning or self._parse_returning(), 1785 overwrite=overwrite, 1786 alternative=alternative, 1787 ignore=ignore, 1788 ) 1789 1790 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1791 conflict = self._match_text_seq("ON", "CONFLICT") 1792 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1793 1794 if not conflict and not duplicate: 1795 return None 1796 1797 nothing = None 1798 expressions = None 1799 key = None 1800 constraint = None 1801 1802 if conflict: 1803 if self._match_text_seq("ON", "CONSTRAINT"): 1804 constraint = self._parse_id_var() 1805 else: 1806 key = self._parse_csv(self._parse_value) 1807 1808 self._match_text_seq("DO") 1809 if self._match_text_seq("NOTHING"): 1810 nothing = True 1811 else: 1812 self._match(TokenType.UPDATE) 1813 self._match(TokenType.SET) 1814 expressions = self._parse_csv(self._parse_equality) 1815 1816 return self.expression( 1817 exp.OnConflict, 1818 duplicate=duplicate, 1819 expressions=expressions, 1820 nothing=nothing, 1821 key=key, 1822 constraint=constraint, 1823 ) 1824 1825 def _parse_returning(self) -> t.Optional[exp.Returning]: 1826 if not self._match(TokenType.RETURNING): 1827 return None 1828 return self.expression( 1829 exp.Returning, 1830 expressions=self._parse_csv(self._parse_expression), 1831 into=self._match(TokenType.INTO) and self._parse_table_part(), 1832 ) 1833 1834 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1835 if not self._match(TokenType.FORMAT): 1836 return None 1837 return self._parse_row_format() 1838 1839 def _parse_row_format( 1840 self, match_row: bool = False 1841 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1842 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1843 return None 1844 1845 if self._match_text_seq("SERDE"): 1846 this = self._parse_string() 1847 1848 serde_properties = None 1849 if self._match(TokenType.SERDE_PROPERTIES): 1850 serde_properties = self.expression( 1851 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1852 ) 1853 1854 return self.expression( 1855 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1856 ) 1857 1858 self._match_text_seq("DELIMITED") 1859 1860 kwargs = {} 1861 1862 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1863 kwargs["fields"] = self._parse_string() 1864 if self._match_text_seq("ESCAPED", "BY"): 1865 kwargs["escaped"] = self._parse_string() 1866 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1867 kwargs["collection_items"] = self._parse_string() 1868 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1869 kwargs["map_keys"] = self._parse_string() 1870 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1871 kwargs["lines"] = self._parse_string() 1872 if self._match_text_seq("NULL", "DEFINED", "AS"): 1873 kwargs["null"] = self._parse_string() 1874 1875 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1876 1877 def _parse_load(self) -> exp.LoadData | exp.Command: 1878 if self._match_text_seq("DATA"): 1879 local = self._match_text_seq("LOCAL") 1880 self._match_text_seq("INPATH") 1881 inpath = self._parse_string() 1882 overwrite = self._match(TokenType.OVERWRITE) 1883 self._match_pair(TokenType.INTO, TokenType.TABLE) 1884 1885 return self.expression( 1886 exp.LoadData, 1887 this=self._parse_table(schema=True), 1888 local=local, 1889 overwrite=overwrite, 1890 inpath=inpath, 1891 partition=self._parse_partition(), 1892 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1893 serde=self._match_text_seq("SERDE") and self._parse_string(), 1894 ) 1895 return self._parse_as_command(self._prev) 1896 1897 def _parse_delete(self) -> exp.Delete: 1898 # This handles MySQL's "Multiple-Table Syntax" 1899 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1900 tables = None 1901 comments = self._prev_comments 1902 if not self._match(TokenType.FROM, advance=False): 1903 tables = self._parse_csv(self._parse_table) or None 1904 1905 returning = self._parse_returning() 1906 1907 return self.expression( 1908 exp.Delete, 1909 comments=comments, 1910 tables=tables, 1911 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1912 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1913 where=self._parse_where(), 1914 returning=returning or self._parse_returning(), 1915 limit=self._parse_limit(), 1916 ) 1917 1918 def _parse_update(self) -> exp.Update: 1919 comments = self._prev_comments 1920 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1921 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1922 returning = self._parse_returning() 1923 return self.expression( 1924 exp.Update, 1925 comments=comments, 1926 **{ # type: ignore 1927 "this": this, 1928 "expressions": expressions, 1929 "from": self._parse_from(joins=True), 1930 "where": self._parse_where(), 1931 "returning": returning or self._parse_returning(), 1932 "limit": self._parse_limit(), 1933 }, 1934 ) 1935 1936 def _parse_uncache(self) -> exp.Uncache: 1937 if not self._match(TokenType.TABLE): 1938 self.raise_error("Expecting TABLE after UNCACHE") 1939 1940 return self.expression( 1941 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1942 ) 1943 1944 def _parse_cache(self) -> exp.Cache: 1945 lazy = self._match_text_seq("LAZY") 1946 self._match(TokenType.TABLE) 1947 table = self._parse_table(schema=True) 1948 1949 options = [] 1950 if self._match_text_seq("OPTIONS"): 1951 self._match_l_paren() 1952 k = self._parse_string() 1953 self._match(TokenType.EQ) 1954 v = self._parse_string() 1955 options = [k, v] 1956 self._match_r_paren() 1957 1958 self._match(TokenType.ALIAS) 1959 return self.expression( 1960 exp.Cache, 1961 this=table, 1962 lazy=lazy, 1963 options=options, 1964 expression=self._parse_select(nested=True), 1965 ) 1966 1967 def _parse_partition(self) -> t.Optional[exp.Partition]: 1968 if not self._match(TokenType.PARTITION): 1969 return None 1970 1971 return self.expression( 1972 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1973 ) 1974 1975 def _parse_value(self) -> exp.Tuple: 1976 if self._match(TokenType.L_PAREN): 1977 expressions = self._parse_csv(self._parse_conjunction) 1978 self._match_r_paren() 1979 return self.expression(exp.Tuple, expressions=expressions) 1980 1981 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1982 # https://prestodb.io/docs/current/sql/values.html 1983 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1984 1985 def _parse_projections(self) -> t.List[exp.Expression]: 1986 return self._parse_expressions() 1987 1988 def _parse_select( 1989 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1990 ) -> t.Optional[exp.Expression]: 1991 cte = self._parse_with() 1992 1993 if cte: 1994 this = self._parse_statement() 1995 1996 if not this: 1997 self.raise_error("Failed to parse any statement following CTE") 1998 return cte 1999 2000 if "with" in this.arg_types: 2001 this.set("with", cte) 2002 else: 2003 self.raise_error(f"{this.key} does not support CTE") 2004 this = cte 2005 2006 return this 2007 2008 # duckdb supports leading with FROM x 2009 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2010 2011 if self._match(TokenType.SELECT): 2012 comments = self._prev_comments 2013 2014 hint = self._parse_hint() 2015 all_ = self._match(TokenType.ALL) 2016 distinct = self._match_set(self.DISTINCT_TOKENS) 2017 2018 kind = ( 2019 self._match(TokenType.ALIAS) 2020 and self._match_texts(("STRUCT", "VALUE")) 2021 and self._prev.text 2022 ) 2023 2024 if distinct: 2025 distinct = self.expression( 2026 exp.Distinct, 2027 on=self._parse_value() if self._match(TokenType.ON) else None, 2028 ) 2029 2030 if all_ and distinct: 2031 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2032 2033 limit = self._parse_limit(top=True) 2034 projections = self._parse_projections() 2035 2036 this = self.expression( 2037 exp.Select, 2038 kind=kind, 2039 hint=hint, 2040 distinct=distinct, 2041 expressions=projections, 2042 limit=limit, 2043 ) 2044 this.comments = comments 2045 2046 into = self._parse_into() 2047 if into: 2048 this.set("into", into) 2049 2050 if not from_: 2051 from_ = self._parse_from() 2052 2053 if from_: 2054 this.set("from", from_) 2055 2056 this = self._parse_query_modifiers(this) 2057 elif (table or nested) and self._match(TokenType.L_PAREN): 2058 if self._match(TokenType.PIVOT): 2059 this = self._parse_simplified_pivot() 2060 elif self._match(TokenType.FROM): 2061 this = exp.select("*").from_( 2062 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2063 ) 2064 else: 2065 this = self._parse_table() if table else self._parse_select(nested=True) 2066 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2067 2068 self._match_r_paren() 2069 2070 # We return early here so that the UNION isn't attached to the subquery by the 2071 # following call to _parse_set_operations, but instead becomes the parent node 2072 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2073 elif self._match(TokenType.VALUES): 2074 this = self.expression( 2075 exp.Values, 2076 expressions=self._parse_csv(self._parse_value), 2077 alias=self._parse_table_alias(), 2078 ) 2079 elif from_: 2080 this = exp.select("*").from_(from_.this, copy=False) 2081 else: 2082 this = None 2083 2084 return self._parse_set_operations(this) 2085 2086 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2087 if not skip_with_token and not self._match(TokenType.WITH): 2088 return None 2089 2090 comments = self._prev_comments 2091 recursive = self._match(TokenType.RECURSIVE) 2092 2093 expressions = [] 2094 while True: 2095 expressions.append(self._parse_cte()) 2096 2097 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2098 break 2099 else: 2100 self._match(TokenType.WITH) 2101 2102 return self.expression( 2103 exp.With, comments=comments, expressions=expressions, recursive=recursive 2104 ) 2105 2106 def _parse_cte(self) -> exp.CTE: 2107 alias = self._parse_table_alias() 2108 if not alias or not alias.this: 2109 self.raise_error("Expected CTE to have alias") 2110 2111 self._match(TokenType.ALIAS) 2112 return self.expression( 2113 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2114 ) 2115 2116 def _parse_table_alias( 2117 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2118 ) -> t.Optional[exp.TableAlias]: 2119 any_token = self._match(TokenType.ALIAS) 2120 alias = ( 2121 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2122 or self._parse_string_as_identifier() 2123 ) 2124 2125 index = self._index 2126 if self._match(TokenType.L_PAREN): 2127 columns = self._parse_csv(self._parse_function_parameter) 2128 self._match_r_paren() if columns else self._retreat(index) 2129 else: 2130 columns = None 2131 2132 if not alias and not columns: 2133 return None 2134 2135 return self.expression(exp.TableAlias, this=alias, columns=columns) 2136 2137 def _parse_subquery( 2138 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2139 ) -> t.Optional[exp.Subquery]: 2140 if not this: 2141 return None 2142 2143 return self.expression( 2144 exp.Subquery, 2145 this=this, 2146 pivots=self._parse_pivots(), 2147 alias=self._parse_table_alias() if parse_alias else None, 2148 ) 2149 2150 def _parse_query_modifiers( 2151 self, this: t.Optional[exp.Expression] 2152 ) -> t.Optional[exp.Expression]: 2153 if isinstance(this, self.MODIFIABLES): 2154 for join in iter(self._parse_join, None): 2155 this.append("joins", join) 2156 for lateral in iter(self._parse_lateral, None): 2157 this.append("laterals", lateral) 2158 2159 while True: 2160 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2161 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2162 key, expression = parser(self) 2163 2164 if expression: 2165 this.set(key, expression) 2166 if key == "limit": 2167 offset = expression.args.pop("offset", None) 2168 if offset: 2169 this.set("offset", exp.Offset(expression=offset)) 2170 continue 2171 break 2172 return this 2173 2174 def _parse_hint(self) -> t.Optional[exp.Hint]: 2175 if self._match(TokenType.HINT): 2176 hints = [] 2177 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2178 hints.extend(hint) 2179 2180 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2181 self.raise_error("Expected */ after HINT") 2182 2183 return self.expression(exp.Hint, expressions=hints) 2184 2185 return None 2186 2187 def _parse_into(self) -> t.Optional[exp.Into]: 2188 if not self._match(TokenType.INTO): 2189 return None 2190 2191 temp = self._match(TokenType.TEMPORARY) 2192 unlogged = self._match_text_seq("UNLOGGED") 2193 self._match(TokenType.TABLE) 2194 2195 return self.expression( 2196 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2197 ) 2198 2199 def _parse_from( 2200 self, joins: bool = False, skip_from_token: bool = False 2201 ) -> t.Optional[exp.From]: 2202 if not skip_from_token and not self._match(TokenType.FROM): 2203 return None 2204 2205 return self.expression( 2206 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2207 ) 2208 2209 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2210 if not self._match(TokenType.MATCH_RECOGNIZE): 2211 return None 2212 2213 self._match_l_paren() 2214 2215 partition = self._parse_partition_by() 2216 order = self._parse_order() 2217 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2218 2219 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2220 rows = exp.var("ONE ROW PER MATCH") 2221 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2222 text = "ALL ROWS PER MATCH" 2223 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2224 text += f" SHOW EMPTY MATCHES" 2225 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2226 text += f" OMIT EMPTY MATCHES" 2227 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2228 text += f" WITH UNMATCHED ROWS" 2229 rows = exp.var(text) 2230 else: 2231 rows = None 2232 2233 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2234 text = "AFTER MATCH SKIP" 2235 if self._match_text_seq("PAST", "LAST", "ROW"): 2236 text += f" PAST LAST ROW" 2237 elif self._match_text_seq("TO", "NEXT", "ROW"): 2238 text += f" TO NEXT ROW" 2239 elif self._match_text_seq("TO", "FIRST"): 2240 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2241 elif self._match_text_seq("TO", "LAST"): 2242 text += f" TO LAST {self._advance_any().text}" # type: ignore 2243 after = exp.var(text) 2244 else: 2245 after = None 2246 2247 if self._match_text_seq("PATTERN"): 2248 self._match_l_paren() 2249 2250 if not self._curr: 2251 self.raise_error("Expecting )", self._curr) 2252 2253 paren = 1 2254 start = self._curr 2255 2256 while self._curr and paren > 0: 2257 if self._curr.token_type == TokenType.L_PAREN: 2258 paren += 1 2259 if self._curr.token_type == TokenType.R_PAREN: 2260 paren -= 1 2261 2262 end = self._prev 2263 self._advance() 2264 2265 if paren > 0: 2266 self.raise_error("Expecting )", self._curr) 2267 2268 pattern = exp.var(self._find_sql(start, end)) 2269 else: 2270 pattern = None 2271 2272 define = ( 2273 self._parse_csv( 2274 lambda: self.expression( 2275 exp.Alias, 2276 alias=self._parse_id_var(any_token=True), 2277 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2278 ) 2279 ) 2280 if self._match_text_seq("DEFINE") 2281 else None 2282 ) 2283 2284 self._match_r_paren() 2285 2286 return self.expression( 2287 exp.MatchRecognize, 2288 partition_by=partition, 2289 order=order, 2290 measures=measures, 2291 rows=rows, 2292 after=after, 2293 pattern=pattern, 2294 define=define, 2295 alias=self._parse_table_alias(), 2296 ) 2297 2298 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2299 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2300 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2301 2302 if outer_apply or cross_apply: 2303 this = self._parse_select(table=True) 2304 view = None 2305 outer = not cross_apply 2306 elif self._match(TokenType.LATERAL): 2307 this = self._parse_select(table=True) 2308 view = self._match(TokenType.VIEW) 2309 outer = self._match(TokenType.OUTER) 2310 else: 2311 return None 2312 2313 if not this: 2314 this = ( 2315 self._parse_unnest() 2316 or self._parse_function() 2317 or self._parse_id_var(any_token=False) 2318 ) 2319 2320 while self._match(TokenType.DOT): 2321 this = exp.Dot( 2322 this=this, 2323 expression=self._parse_function() or self._parse_id_var(any_token=False), 2324 ) 2325 2326 if view: 2327 table = self._parse_id_var(any_token=False) 2328 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2329 table_alias: t.Optional[exp.TableAlias] = self.expression( 2330 exp.TableAlias, this=table, columns=columns 2331 ) 2332 elif isinstance(this, exp.Subquery) and this.alias: 2333 # Ensures parity between the Subquery's and the Lateral's "alias" args 2334 table_alias = this.args["alias"].copy() 2335 else: 2336 table_alias = self._parse_table_alias() 2337 2338 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2339 2340 def _parse_join_parts( 2341 self, 2342 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2343 return ( 2344 self._match_set(self.JOIN_METHODS) and self._prev, 2345 self._match_set(self.JOIN_SIDES) and self._prev, 2346 self._match_set(self.JOIN_KINDS) and self._prev, 2347 ) 2348 2349 def _parse_join( 2350 self, skip_join_token: bool = False, parse_bracket: bool = False 2351 ) -> t.Optional[exp.Join]: 2352 if self._match(TokenType.COMMA): 2353 return self.expression(exp.Join, this=self._parse_table()) 2354 2355 index = self._index 2356 method, side, kind = self._parse_join_parts() 2357 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2358 join = self._match(TokenType.JOIN) 2359 2360 if not skip_join_token and not join: 2361 self._retreat(index) 2362 kind = None 2363 method = None 2364 side = None 2365 2366 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2367 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2368 2369 if not skip_join_token and not join and not outer_apply and not cross_apply: 2370 return None 2371 2372 if outer_apply: 2373 side = Token(TokenType.LEFT, "LEFT") 2374 2375 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2376 2377 if method: 2378 kwargs["method"] = method.text 2379 if side: 2380 kwargs["side"] = side.text 2381 if kind: 2382 kwargs["kind"] = kind.text 2383 if hint: 2384 kwargs["hint"] = hint 2385 2386 if self._match(TokenType.ON): 2387 kwargs["on"] = self._parse_conjunction() 2388 elif self._match(TokenType.USING): 2389 kwargs["using"] = self._parse_wrapped_id_vars() 2390 elif not (kind and kind.token_type == TokenType.CROSS): 2391 index = self._index 2392 joins = self._parse_joins() 2393 2394 if joins and self._match(TokenType.ON): 2395 kwargs["on"] = self._parse_conjunction() 2396 elif joins and self._match(TokenType.USING): 2397 kwargs["using"] = self._parse_wrapped_id_vars() 2398 else: 2399 joins = None 2400 self._retreat(index) 2401 2402 kwargs["this"].set("joins", joins) 2403 2404 comments = [c for token in (method, side, kind) if token for c in token.comments] 2405 return self.expression(exp.Join, comments=comments, **kwargs) 2406 2407 def _parse_index( 2408 self, 2409 index: t.Optional[exp.Expression] = None, 2410 ) -> t.Optional[exp.Index]: 2411 if index: 2412 unique = None 2413 primary = None 2414 amp = None 2415 2416 self._match(TokenType.ON) 2417 self._match(TokenType.TABLE) # hive 2418 table = self._parse_table_parts(schema=True) 2419 else: 2420 unique = self._match(TokenType.UNIQUE) 2421 primary = self._match_text_seq("PRIMARY") 2422 amp = self._match_text_seq("AMP") 2423 2424 if not self._match(TokenType.INDEX): 2425 return None 2426 2427 index = self._parse_id_var() 2428 table = None 2429 2430 using = self._parse_field() if self._match(TokenType.USING) else None 2431 2432 if self._match(TokenType.L_PAREN, advance=False): 2433 columns = self._parse_wrapped_csv(self._parse_ordered) 2434 else: 2435 columns = None 2436 2437 return self.expression( 2438 exp.Index, 2439 this=index, 2440 table=table, 2441 using=using, 2442 columns=columns, 2443 unique=unique, 2444 primary=primary, 2445 amp=amp, 2446 partition_by=self._parse_partition_by(), 2447 ) 2448 2449 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2450 hints: t.List[exp.Expression] = [] 2451 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2452 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2453 hints.append( 2454 self.expression( 2455 exp.WithTableHint, 2456 expressions=self._parse_csv( 2457 lambda: self._parse_function() or self._parse_var(any_token=True) 2458 ), 2459 ) 2460 ) 2461 self._match_r_paren() 2462 else: 2463 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2464 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2465 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2466 2467 self._match_texts({"INDEX", "KEY"}) 2468 if self._match(TokenType.FOR): 2469 hint.set("target", self._advance_any() and self._prev.text.upper()) 2470 2471 hint.set("expressions", self._parse_wrapped_id_vars()) 2472 hints.append(hint) 2473 2474 return hints or None 2475 2476 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2477 return ( 2478 (not schema and self._parse_function(optional_parens=False)) 2479 or self._parse_id_var(any_token=False) 2480 or self._parse_string_as_identifier() 2481 or self._parse_placeholder() 2482 ) 2483 2484 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2485 catalog = None 2486 db = None 2487 table = self._parse_table_part(schema=schema) 2488 2489 while self._match(TokenType.DOT): 2490 if catalog: 2491 # This allows nesting the table in arbitrarily many dot expressions if needed 2492 table = self.expression( 2493 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2494 ) 2495 else: 2496 catalog = db 2497 db = table 2498 table = self._parse_table_part(schema=schema) 2499 2500 if not table: 2501 self.raise_error(f"Expected table name but got {self._curr}") 2502 2503 return self.expression( 2504 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2505 ) 2506 2507 def _parse_table( 2508 self, 2509 schema: bool = False, 2510 joins: bool = False, 2511 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2512 parse_bracket: bool = False, 2513 ) -> t.Optional[exp.Expression]: 2514 lateral = self._parse_lateral() 2515 if lateral: 2516 return lateral 2517 2518 unnest = self._parse_unnest() 2519 if unnest: 2520 return unnest 2521 2522 values = self._parse_derived_table_values() 2523 if values: 2524 return values 2525 2526 subquery = self._parse_select(table=True) 2527 if subquery: 2528 if not subquery.args.get("pivots"): 2529 subquery.set("pivots", self._parse_pivots()) 2530 return subquery 2531 2532 bracket = parse_bracket and self._parse_bracket(None) 2533 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2534 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2535 2536 if schema: 2537 return self._parse_schema(this=this) 2538 2539 if self.ALIAS_POST_TABLESAMPLE: 2540 table_sample = self._parse_table_sample() 2541 2542 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2543 if alias: 2544 this.set("alias", alias) 2545 2546 if not this.args.get("pivots"): 2547 this.set("pivots", self._parse_pivots()) 2548 2549 this.set("hints", self._parse_table_hints()) 2550 2551 if not self.ALIAS_POST_TABLESAMPLE: 2552 table_sample = self._parse_table_sample() 2553 2554 if table_sample: 2555 table_sample.set("this", this) 2556 this = table_sample 2557 2558 if joins: 2559 for join in iter(self._parse_join, None): 2560 this.append("joins", join) 2561 2562 return this 2563 2564 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2565 if not self._match(TokenType.UNNEST): 2566 return None 2567 2568 expressions = self._parse_wrapped_csv(self._parse_type) 2569 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2570 2571 alias = self._parse_table_alias() if with_alias else None 2572 2573 if alias and self.UNNEST_COLUMN_ONLY: 2574 if alias.args.get("columns"): 2575 self.raise_error("Unexpected extra column alias in unnest.") 2576 2577 alias.set("columns", [alias.this]) 2578 alias.set("this", None) 2579 2580 offset = None 2581 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2582 self._match(TokenType.ALIAS) 2583 offset = self._parse_id_var() or exp.to_identifier("offset") 2584 2585 return self.expression( 2586 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2587 ) 2588 2589 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2590 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2591 if not is_derived and not self._match(TokenType.VALUES): 2592 return None 2593 2594 expressions = self._parse_csv(self._parse_value) 2595 alias = self._parse_table_alias() 2596 2597 if is_derived: 2598 self._match_r_paren() 2599 2600 return self.expression( 2601 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2602 ) 2603 2604 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2605 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2606 as_modifier and self._match_text_seq("USING", "SAMPLE") 2607 ): 2608 return None 2609 2610 bucket_numerator = None 2611 bucket_denominator = None 2612 bucket_field = None 2613 percent = None 2614 rows = None 2615 size = None 2616 seed = None 2617 2618 kind = ( 2619 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2620 ) 2621 method = self._parse_var(tokens=(TokenType.ROW,)) 2622 2623 self._match(TokenType.L_PAREN) 2624 2625 num = self._parse_number() 2626 2627 if self._match_text_seq("BUCKET"): 2628 bucket_numerator = self._parse_number() 2629 self._match_text_seq("OUT", "OF") 2630 bucket_denominator = bucket_denominator = self._parse_number() 2631 self._match(TokenType.ON) 2632 bucket_field = self._parse_field() 2633 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2634 percent = num 2635 elif self._match(TokenType.ROWS): 2636 rows = num 2637 else: 2638 size = num 2639 2640 self._match(TokenType.R_PAREN) 2641 2642 if self._match(TokenType.L_PAREN): 2643 method = self._parse_var() 2644 seed = self._match(TokenType.COMMA) and self._parse_number() 2645 self._match_r_paren() 2646 elif self._match_texts(("SEED", "REPEATABLE")): 2647 seed = self._parse_wrapped(self._parse_number) 2648 2649 return self.expression( 2650 exp.TableSample, 2651 method=method, 2652 bucket_numerator=bucket_numerator, 2653 bucket_denominator=bucket_denominator, 2654 bucket_field=bucket_field, 2655 percent=percent, 2656 rows=rows, 2657 size=size, 2658 seed=seed, 2659 kind=kind, 2660 ) 2661 2662 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2663 return list(iter(self._parse_pivot, None)) or None 2664 2665 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2666 return list(iter(self._parse_join, None)) or None 2667 2668 # https://duckdb.org/docs/sql/statements/pivot 2669 def _parse_simplified_pivot(self) -> exp.Pivot: 2670 def _parse_on() -> t.Optional[exp.Expression]: 2671 this = self._parse_bitwise() 2672 return self._parse_in(this) if self._match(TokenType.IN) else this 2673 2674 this = self._parse_table() 2675 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2676 using = self._match(TokenType.USING) and self._parse_csv( 2677 lambda: self._parse_alias(self._parse_function()) 2678 ) 2679 group = self._parse_group() 2680 return self.expression( 2681 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2682 ) 2683 2684 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2685 index = self._index 2686 include_nulls = None 2687 2688 if self._match(TokenType.PIVOT): 2689 unpivot = False 2690 elif self._match(TokenType.UNPIVOT): 2691 unpivot = True 2692 2693 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 2694 if self._match_text_seq("INCLUDE", "NULLS"): 2695 include_nulls = True 2696 elif self._match_text_seq("EXCLUDE", "NULLS"): 2697 include_nulls = False 2698 else: 2699 return None 2700 2701 expressions = [] 2702 field = None 2703 2704 if not self._match(TokenType.L_PAREN): 2705 self._retreat(index) 2706 return None 2707 2708 if unpivot: 2709 expressions = self._parse_csv(self._parse_column) 2710 else: 2711 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2712 2713 if not expressions: 2714 self.raise_error("Failed to parse PIVOT's aggregation list") 2715 2716 if not self._match(TokenType.FOR): 2717 self.raise_error("Expecting FOR") 2718 2719 value = self._parse_column() 2720 2721 if not self._match(TokenType.IN): 2722 self.raise_error("Expecting IN") 2723 2724 field = self._parse_in(value, alias=True) 2725 2726 self._match_r_paren() 2727 2728 pivot = self.expression( 2729 exp.Pivot, 2730 expressions=expressions, 2731 field=field, 2732 unpivot=unpivot, 2733 include_nulls=include_nulls, 2734 ) 2735 2736 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2737 pivot.set("alias", self._parse_table_alias()) 2738 2739 if not unpivot: 2740 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2741 2742 columns: t.List[exp.Expression] = [] 2743 for fld in pivot.args["field"].expressions: 2744 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2745 for name in names: 2746 if self.PREFIXED_PIVOT_COLUMNS: 2747 name = f"{name}_{field_name}" if name else field_name 2748 else: 2749 name = f"{field_name}_{name}" if name else field_name 2750 2751 columns.append(exp.to_identifier(name)) 2752 2753 pivot.set("columns", columns) 2754 2755 return pivot 2756 2757 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2758 return [agg.alias for agg in aggregations] 2759 2760 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2761 if not skip_where_token and not self._match(TokenType.WHERE): 2762 return None 2763 2764 return self.expression( 2765 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2766 ) 2767 2768 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2769 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2770 return None 2771 2772 elements = defaultdict(list) 2773 2774 if self._match(TokenType.ALL): 2775 return self.expression(exp.Group, all=True) 2776 2777 while True: 2778 expressions = self._parse_csv(self._parse_conjunction) 2779 if expressions: 2780 elements["expressions"].extend(expressions) 2781 2782 grouping_sets = self._parse_grouping_sets() 2783 if grouping_sets: 2784 elements["grouping_sets"].extend(grouping_sets) 2785 2786 rollup = None 2787 cube = None 2788 totals = None 2789 2790 with_ = self._match(TokenType.WITH) 2791 if self._match(TokenType.ROLLUP): 2792 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2793 elements["rollup"].extend(ensure_list(rollup)) 2794 2795 if self._match(TokenType.CUBE): 2796 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2797 elements["cube"].extend(ensure_list(cube)) 2798 2799 if self._match_text_seq("TOTALS"): 2800 totals = True 2801 elements["totals"] = True # type: ignore 2802 2803 if not (grouping_sets or rollup or cube or totals): 2804 break 2805 2806 return self.expression(exp.Group, **elements) # type: ignore 2807 2808 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 2809 if not self._match(TokenType.GROUPING_SETS): 2810 return None 2811 2812 return self._parse_wrapped_csv(self._parse_grouping_set) 2813 2814 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2815 if self._match(TokenType.L_PAREN): 2816 grouping_set = self._parse_csv(self._parse_column) 2817 self._match_r_paren() 2818 return self.expression(exp.Tuple, expressions=grouping_set) 2819 2820 return self._parse_column() 2821 2822 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2823 if not skip_having_token and not self._match(TokenType.HAVING): 2824 return None 2825 return self.expression(exp.Having, this=self._parse_conjunction()) 2826 2827 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2828 if not self._match(TokenType.QUALIFY): 2829 return None 2830 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2831 2832 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 2833 if skip_start_token: 2834 start = None 2835 elif self._match(TokenType.START_WITH): 2836 start = self._parse_conjunction() 2837 else: 2838 return None 2839 2840 self._match(TokenType.CONNECT_BY) 2841 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 2842 exp.Prior, this=self._parse_bitwise() 2843 ) 2844 connect = self._parse_conjunction() 2845 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 2846 return self.expression(exp.Connect, start=start, connect=connect) 2847 2848 def _parse_order( 2849 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2850 ) -> t.Optional[exp.Expression]: 2851 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2852 return this 2853 2854 return self.expression( 2855 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2856 ) 2857 2858 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2859 if not self._match(token): 2860 return None 2861 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2862 2863 def _parse_ordered(self) -> exp.Ordered: 2864 this = self._parse_conjunction() 2865 self._match(TokenType.ASC) 2866 2867 is_desc = self._match(TokenType.DESC) 2868 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2869 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2870 desc = is_desc or False 2871 asc = not desc 2872 nulls_first = is_nulls_first or False 2873 explicitly_null_ordered = is_nulls_first or is_nulls_last 2874 2875 if ( 2876 not explicitly_null_ordered 2877 and ( 2878 (asc and self.NULL_ORDERING == "nulls_are_small") 2879 or (desc and self.NULL_ORDERING != "nulls_are_small") 2880 ) 2881 and self.NULL_ORDERING != "nulls_are_last" 2882 ): 2883 nulls_first = True 2884 2885 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2886 2887 def _parse_limit( 2888 self, this: t.Optional[exp.Expression] = None, top: bool = False 2889 ) -> t.Optional[exp.Expression]: 2890 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2891 comments = self._prev_comments 2892 if top: 2893 limit_paren = self._match(TokenType.L_PAREN) 2894 expression = self._parse_number() 2895 2896 if limit_paren: 2897 self._match_r_paren() 2898 else: 2899 expression = self._parse_term() 2900 2901 if self._match(TokenType.COMMA): 2902 offset = expression 2903 expression = self._parse_term() 2904 else: 2905 offset = None 2906 2907 limit_exp = self.expression( 2908 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2909 ) 2910 2911 return limit_exp 2912 2913 if self._match(TokenType.FETCH): 2914 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2915 direction = self._prev.text if direction else "FIRST" 2916 2917 count = self._parse_number() 2918 percent = self._match(TokenType.PERCENT) 2919 2920 self._match_set((TokenType.ROW, TokenType.ROWS)) 2921 2922 only = self._match_text_seq("ONLY") 2923 with_ties = self._match_text_seq("WITH", "TIES") 2924 2925 if only and with_ties: 2926 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2927 2928 return self.expression( 2929 exp.Fetch, 2930 direction=direction, 2931 count=count, 2932 percent=percent, 2933 with_ties=with_ties, 2934 ) 2935 2936 return this 2937 2938 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2939 if not self._match(TokenType.OFFSET): 2940 return this 2941 2942 count = self._parse_term() 2943 self._match_set((TokenType.ROW, TokenType.ROWS)) 2944 return self.expression(exp.Offset, this=this, expression=count) 2945 2946 def _parse_locks(self) -> t.List[exp.Lock]: 2947 locks = [] 2948 while True: 2949 if self._match_text_seq("FOR", "UPDATE"): 2950 update = True 2951 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2952 "LOCK", "IN", "SHARE", "MODE" 2953 ): 2954 update = False 2955 else: 2956 break 2957 2958 expressions = None 2959 if self._match_text_seq("OF"): 2960 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2961 2962 wait: t.Optional[bool | exp.Expression] = None 2963 if self._match_text_seq("NOWAIT"): 2964 wait = True 2965 elif self._match_text_seq("WAIT"): 2966 wait = self._parse_primary() 2967 elif self._match_text_seq("SKIP", "LOCKED"): 2968 wait = False 2969 2970 locks.append( 2971 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2972 ) 2973 2974 return locks 2975 2976 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2977 if not self._match_set(self.SET_OPERATIONS): 2978 return this 2979 2980 token_type = self._prev.token_type 2981 2982 if token_type == TokenType.UNION: 2983 expression = exp.Union 2984 elif token_type == TokenType.EXCEPT: 2985 expression = exp.Except 2986 else: 2987 expression = exp.Intersect 2988 2989 return self.expression( 2990 expression, 2991 this=this, 2992 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2993 by_name=self._match_text_seq("BY", "NAME"), 2994 expression=self._parse_set_operations(self._parse_select(nested=True)), 2995 ) 2996 2997 def _parse_expression(self) -> t.Optional[exp.Expression]: 2998 return self._parse_alias(self._parse_conjunction()) 2999 3000 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3001 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3002 3003 def _parse_equality(self) -> t.Optional[exp.Expression]: 3004 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3005 3006 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3007 return self._parse_tokens(self._parse_range, self.COMPARISON) 3008 3009 def _parse_range(self) -> t.Optional[exp.Expression]: 3010 this = self._parse_bitwise() 3011 negate = self._match(TokenType.NOT) 3012 3013 if self._match_set(self.RANGE_PARSERS): 3014 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3015 if not expression: 3016 return this 3017 3018 this = expression 3019 elif self._match(TokenType.ISNULL): 3020 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3021 3022 # Postgres supports ISNULL and NOTNULL for conditions. 3023 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3024 if self._match(TokenType.NOTNULL): 3025 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3026 this = self.expression(exp.Not, this=this) 3027 3028 if negate: 3029 this = self.expression(exp.Not, this=this) 3030 3031 if self._match(TokenType.IS): 3032 this = self._parse_is(this) 3033 3034 return this 3035 3036 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3037 index = self._index - 1 3038 negate = self._match(TokenType.NOT) 3039 3040 if self._match_text_seq("DISTINCT", "FROM"): 3041 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3042 return self.expression(klass, this=this, expression=self._parse_expression()) 3043 3044 expression = self._parse_null() or self._parse_boolean() 3045 if not expression: 3046 self._retreat(index) 3047 return None 3048 3049 this = self.expression(exp.Is, this=this, expression=expression) 3050 return self.expression(exp.Not, this=this) if negate else this 3051 3052 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3053 unnest = self._parse_unnest(with_alias=False) 3054 if unnest: 3055 this = self.expression(exp.In, this=this, unnest=unnest) 3056 elif self._match(TokenType.L_PAREN): 3057 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3058 3059 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 3060 this = self.expression(exp.In, this=this, query=expressions[0]) 3061 else: 3062 this = self.expression(exp.In, this=this, expressions=expressions) 3063 3064 self._match_r_paren(this) 3065 else: 3066 this = self.expression(exp.In, this=this, field=self._parse_field()) 3067 3068 return this 3069 3070 def _parse_between(self, this: exp.Expression) -> exp.Between: 3071 low = self._parse_bitwise() 3072 self._match(TokenType.AND) 3073 high = self._parse_bitwise() 3074 return self.expression(exp.Between, this=this, low=low, high=high) 3075 3076 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3077 if not self._match(TokenType.ESCAPE): 3078 return this 3079 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3080 3081 def _parse_interval(self) -> t.Optional[exp.Interval]: 3082 index = self._index 3083 3084 if not self._match(TokenType.INTERVAL): 3085 return None 3086 3087 if self._match(TokenType.STRING, advance=False): 3088 this = self._parse_primary() 3089 else: 3090 this = self._parse_term() 3091 3092 if not this: 3093 self._retreat(index) 3094 return None 3095 3096 unit = self._parse_function() or self._parse_var() 3097 3098 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3099 # each INTERVAL expression into this canonical form so it's easy to transpile 3100 if this and this.is_number: 3101 this = exp.Literal.string(this.name) 3102 elif this and this.is_string: 3103 parts = this.name.split() 3104 3105 if len(parts) == 2: 3106 if unit: 3107 # this is not actually a unit, it's something else 3108 unit = None 3109 self._retreat(self._index - 1) 3110 else: 3111 this = exp.Literal.string(parts[0]) 3112 unit = self.expression(exp.Var, this=parts[1]) 3113 3114 return self.expression(exp.Interval, this=this, unit=unit) 3115 3116 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3117 this = self._parse_term() 3118 3119 while True: 3120 if self._match_set(self.BITWISE): 3121 this = self.expression( 3122 self.BITWISE[self._prev.token_type], 3123 this=this, 3124 expression=self._parse_term(), 3125 ) 3126 elif self._match(TokenType.DQMARK): 3127 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3128 elif self._match_pair(TokenType.LT, TokenType.LT): 3129 this = self.expression( 3130 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3131 ) 3132 elif self._match_pair(TokenType.GT, TokenType.GT): 3133 this = self.expression( 3134 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3135 ) 3136 else: 3137 break 3138 3139 return this 3140 3141 def _parse_term(self) -> t.Optional[exp.Expression]: 3142 return self._parse_tokens(self._parse_factor, self.TERM) 3143 3144 def _parse_factor(self) -> t.Optional[exp.Expression]: 3145 return self._parse_tokens(self._parse_unary, self.FACTOR) 3146 3147 def _parse_unary(self) -> t.Optional[exp.Expression]: 3148 if self._match_set(self.UNARY_PARSERS): 3149 return self.UNARY_PARSERS[self._prev.token_type](self) 3150 return self._parse_at_time_zone(self._parse_type()) 3151 3152 def _parse_type(self) -> t.Optional[exp.Expression]: 3153 interval = self._parse_interval() 3154 if interval: 3155 return interval 3156 3157 index = self._index 3158 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3159 this = self._parse_column() 3160 3161 if data_type: 3162 if isinstance(this, exp.Literal): 3163 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3164 if parser: 3165 return parser(self, this, data_type) 3166 return self.expression(exp.Cast, this=this, to=data_type) 3167 if not data_type.expressions: 3168 self._retreat(index) 3169 return self._parse_column() 3170 return self._parse_column_ops(data_type) 3171 3172 return this 3173 3174 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 3175 this = self._parse_type() 3176 if not this: 3177 return None 3178 3179 return self.expression( 3180 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 3181 ) 3182 3183 def _parse_types( 3184 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 3185 ) -> t.Optional[exp.Expression]: 3186 index = self._index 3187 3188 prefix = self._match_text_seq("SYSUDTLIB", ".") 3189 3190 if not self._match_set(self.TYPE_TOKENS): 3191 identifier = allow_identifiers and self._parse_id_var( 3192 any_token=False, tokens=(TokenType.VAR,) 3193 ) 3194 3195 if identifier: 3196 tokens = self._tokenizer.tokenize(identifier.name) 3197 3198 if len(tokens) != 1: 3199 self.raise_error("Unexpected identifier", self._prev) 3200 3201 if tokens[0].token_type in self.TYPE_TOKENS: 3202 self._prev = tokens[0] 3203 elif self.SUPPORTS_USER_DEFINED_TYPES: 3204 return identifier 3205 else: 3206 return None 3207 else: 3208 return None 3209 3210 type_token = self._prev.token_type 3211 3212 if type_token == TokenType.PSEUDO_TYPE: 3213 return self.expression(exp.PseudoType, this=self._prev.text) 3214 3215 nested = type_token in self.NESTED_TYPE_TOKENS 3216 is_struct = type_token in self.STRUCT_TYPE_TOKENS 3217 expressions = None 3218 maybe_func = False 3219 3220 if self._match(TokenType.L_PAREN): 3221 if is_struct: 3222 expressions = self._parse_csv(self._parse_struct_types) 3223 elif nested: 3224 expressions = self._parse_csv( 3225 lambda: self._parse_types( 3226 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3227 ) 3228 ) 3229 elif type_token in self.ENUM_TYPE_TOKENS: 3230 expressions = self._parse_csv(self._parse_equality) 3231 else: 3232 expressions = self._parse_csv(self._parse_type_size) 3233 3234 if not expressions or not self._match(TokenType.R_PAREN): 3235 self._retreat(index) 3236 return None 3237 3238 maybe_func = True 3239 3240 this: t.Optional[exp.Expression] = None 3241 values: t.Optional[t.List[exp.Expression]] = None 3242 3243 if nested and self._match(TokenType.LT): 3244 if is_struct: 3245 expressions = self._parse_csv(self._parse_struct_types) 3246 else: 3247 expressions = self._parse_csv( 3248 lambda: self._parse_types( 3249 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 3250 ) 3251 ) 3252 3253 if not self._match(TokenType.GT): 3254 self.raise_error("Expecting >") 3255 3256 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3257 values = self._parse_csv(self._parse_conjunction) 3258 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3259 3260 if type_token in self.TIMESTAMPS: 3261 if self._match_text_seq("WITH", "TIME", "ZONE"): 3262 maybe_func = False 3263 tz_type = ( 3264 exp.DataType.Type.TIMETZ 3265 if type_token in self.TIMES 3266 else exp.DataType.Type.TIMESTAMPTZ 3267 ) 3268 this = exp.DataType(this=tz_type, expressions=expressions) 3269 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3270 maybe_func = False 3271 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3272 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3273 maybe_func = False 3274 elif type_token == TokenType.INTERVAL: 3275 if self._match_text_seq("YEAR", "TO", "MONTH"): 3276 span: t.Optional[t.List[exp.Expression]] = [exp.IntervalYearToMonthSpan()] 3277 elif self._match_text_seq("DAY", "TO", "SECOND"): 3278 span = [exp.IntervalDayToSecondSpan()] 3279 else: 3280 span = None 3281 3282 unit = not span and self._parse_var() 3283 if not unit: 3284 this = self.expression( 3285 exp.DataType, this=exp.DataType.Type.INTERVAL, expressions=span 3286 ) 3287 else: 3288 this = self.expression(exp.Interval, unit=unit) 3289 3290 if maybe_func and check_func: 3291 index2 = self._index 3292 peek = self._parse_string() 3293 3294 if not peek: 3295 self._retreat(index) 3296 return None 3297 3298 self._retreat(index2) 3299 3300 if not this: 3301 this = exp.DataType( 3302 this=exp.DataType.Type[type_token.value], 3303 expressions=expressions, 3304 nested=nested, 3305 values=values, 3306 prefix=prefix, 3307 ) 3308 3309 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3310 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3311 3312 return this 3313 3314 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3315 this = self._parse_type() or self._parse_id_var() 3316 self._match(TokenType.COLON) 3317 return self._parse_column_def(this) 3318 3319 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3320 if not self._match_text_seq("AT", "TIME", "ZONE"): 3321 return this 3322 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3323 3324 def _parse_column(self) -> t.Optional[exp.Expression]: 3325 this = self._parse_field() 3326 if isinstance(this, exp.Identifier): 3327 this = self.expression(exp.Column, this=this) 3328 elif not this: 3329 return self._parse_bracket(this) 3330 return self._parse_column_ops(this) 3331 3332 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3333 this = self._parse_bracket(this) 3334 3335 while self._match_set(self.COLUMN_OPERATORS): 3336 op_token = self._prev.token_type 3337 op = self.COLUMN_OPERATORS.get(op_token) 3338 3339 if op_token == TokenType.DCOLON: 3340 field = self._parse_types() 3341 if not field: 3342 self.raise_error("Expected type") 3343 elif op and self._curr: 3344 self._advance() 3345 value = self._prev.text 3346 field = ( 3347 exp.Literal.number(value) 3348 if self._prev.token_type == TokenType.NUMBER 3349 else exp.Literal.string(value) 3350 ) 3351 else: 3352 field = self._parse_field(anonymous_func=True, any_token=True) 3353 3354 if isinstance(field, exp.Func): 3355 # bigquery allows function calls like x.y.count(...) 3356 # SAFE.SUBSTR(...) 3357 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3358 this = self._replace_columns_with_dots(this) 3359 3360 if op: 3361 this = op(self, this, field) 3362 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3363 this = self.expression( 3364 exp.Column, 3365 this=field, 3366 table=this.this, 3367 db=this.args.get("table"), 3368 catalog=this.args.get("db"), 3369 ) 3370 else: 3371 this = self.expression(exp.Dot, this=this, expression=field) 3372 this = self._parse_bracket(this) 3373 return this 3374 3375 def _parse_primary(self) -> t.Optional[exp.Expression]: 3376 if self._match_set(self.PRIMARY_PARSERS): 3377 token_type = self._prev.token_type 3378 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3379 3380 if token_type == TokenType.STRING: 3381 expressions = [primary] 3382 while self._match(TokenType.STRING): 3383 expressions.append(exp.Literal.string(self._prev.text)) 3384 3385 if len(expressions) > 1: 3386 return self.expression(exp.Concat, expressions=expressions) 3387 3388 return primary 3389 3390 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3391 return exp.Literal.number(f"0.{self._prev.text}") 3392 3393 if self._match(TokenType.L_PAREN): 3394 comments = self._prev_comments 3395 query = self._parse_select() 3396 3397 if query: 3398 expressions = [query] 3399 else: 3400 expressions = self._parse_expressions() 3401 3402 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3403 3404 if isinstance(this, exp.Subqueryable): 3405 this = self._parse_set_operations( 3406 self._parse_subquery(this=this, parse_alias=False) 3407 ) 3408 elif len(expressions) > 1: 3409 this = self.expression(exp.Tuple, expressions=expressions) 3410 else: 3411 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3412 3413 if this: 3414 this.add_comments(comments) 3415 3416 self._match_r_paren(expression=this) 3417 return this 3418 3419 return None 3420 3421 def _parse_field( 3422 self, 3423 any_token: bool = False, 3424 tokens: t.Optional[t.Collection[TokenType]] = None, 3425 anonymous_func: bool = False, 3426 ) -> t.Optional[exp.Expression]: 3427 return ( 3428 self._parse_primary() 3429 or self._parse_function(anonymous=anonymous_func) 3430 or self._parse_id_var(any_token=any_token, tokens=tokens) 3431 ) 3432 3433 def _parse_function( 3434 self, 3435 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3436 anonymous: bool = False, 3437 optional_parens: bool = True, 3438 ) -> t.Optional[exp.Expression]: 3439 if not self._curr: 3440 return None 3441 3442 token_type = self._curr.token_type 3443 this = self._curr.text 3444 upper = this.upper() 3445 3446 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 3447 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 3448 self._advance() 3449 return parser(self) 3450 3451 if not self._next or self._next.token_type != TokenType.L_PAREN: 3452 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3453 self._advance() 3454 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3455 3456 return None 3457 3458 if token_type not in self.FUNC_TOKENS: 3459 return None 3460 3461 self._advance(2) 3462 3463 parser = self.FUNCTION_PARSERS.get(upper) 3464 if parser and not anonymous: 3465 this = parser(self) 3466 else: 3467 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3468 3469 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3470 this = self.expression(subquery_predicate, this=self._parse_select()) 3471 self._match_r_paren() 3472 return this 3473 3474 if functions is None: 3475 functions = self.FUNCTIONS 3476 3477 function = functions.get(upper) 3478 3479 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3480 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3481 3482 if function and not anonymous: 3483 func = self.validate_expression(function(args), args) 3484 if not self.NORMALIZE_FUNCTIONS: 3485 func.meta["name"] = this 3486 this = func 3487 else: 3488 this = self.expression(exp.Anonymous, this=this, expressions=args) 3489 3490 self._match_r_paren(this) 3491 return self._parse_window(this) 3492 3493 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3494 return self._parse_column_def(self._parse_id_var()) 3495 3496 def _parse_user_defined_function( 3497 self, kind: t.Optional[TokenType] = None 3498 ) -> t.Optional[exp.Expression]: 3499 this = self._parse_id_var() 3500 3501 while self._match(TokenType.DOT): 3502 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3503 3504 if not self._match(TokenType.L_PAREN): 3505 return this 3506 3507 expressions = self._parse_csv(self._parse_function_parameter) 3508 self._match_r_paren() 3509 return self.expression( 3510 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3511 ) 3512 3513 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3514 literal = self._parse_primary() 3515 if literal: 3516 return self.expression(exp.Introducer, this=token.text, expression=literal) 3517 3518 return self.expression(exp.Identifier, this=token.text) 3519 3520 def _parse_session_parameter(self) -> exp.SessionParameter: 3521 kind = None 3522 this = self._parse_id_var() or self._parse_primary() 3523 3524 if this and self._match(TokenType.DOT): 3525 kind = this.name 3526 this = self._parse_var() or self._parse_primary() 3527 3528 return self.expression(exp.SessionParameter, this=this, kind=kind) 3529 3530 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3531 index = self._index 3532 3533 if self._match(TokenType.L_PAREN): 3534 expressions = t.cast( 3535 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 3536 ) 3537 3538 if not self._match(TokenType.R_PAREN): 3539 self._retreat(index) 3540 else: 3541 expressions = [self._parse_id_var()] 3542 3543 if self._match_set(self.LAMBDAS): 3544 return self.LAMBDAS[self._prev.token_type](self, expressions) 3545 3546 self._retreat(index) 3547 3548 this: t.Optional[exp.Expression] 3549 3550 if self._match(TokenType.DISTINCT): 3551 this = self.expression( 3552 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3553 ) 3554 else: 3555 this = self._parse_select_or_expression(alias=alias) 3556 3557 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3558 3559 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3560 index = self._index 3561 3562 if not self.errors: 3563 try: 3564 if self._parse_select(nested=True): 3565 return this 3566 except ParseError: 3567 pass 3568 finally: 3569 self.errors.clear() 3570 self._retreat(index) 3571 3572 if not self._match(TokenType.L_PAREN): 3573 return this 3574 3575 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 3576 3577 self._match_r_paren() 3578 return self.expression(exp.Schema, this=this, expressions=args) 3579 3580 def _parse_field_def(self) -> t.Optional[exp.Expression]: 3581 return self._parse_column_def(self._parse_field(any_token=True)) 3582 3583 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3584 # column defs are not really columns, they're identifiers 3585 if isinstance(this, exp.Column): 3586 this = this.this 3587 3588 kind = self._parse_types(schema=True) 3589 3590 if self._match_text_seq("FOR", "ORDINALITY"): 3591 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3592 3593 constraints: t.List[exp.Expression] = [] 3594 3595 if not kind and self._match(TokenType.ALIAS): 3596 constraints.append( 3597 self.expression( 3598 exp.ComputedColumnConstraint, 3599 this=self._parse_conjunction(), 3600 persisted=self._match_text_seq("PERSISTED"), 3601 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 3602 ) 3603 ) 3604 3605 while True: 3606 constraint = self._parse_column_constraint() 3607 if not constraint: 3608 break 3609 constraints.append(constraint) 3610 3611 if not kind and not constraints: 3612 return this 3613 3614 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3615 3616 def _parse_auto_increment( 3617 self, 3618 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3619 start = None 3620 increment = None 3621 3622 if self._match(TokenType.L_PAREN, advance=False): 3623 args = self._parse_wrapped_csv(self._parse_bitwise) 3624 start = seq_get(args, 0) 3625 increment = seq_get(args, 1) 3626 elif self._match_text_seq("START"): 3627 start = self._parse_bitwise() 3628 self._match_text_seq("INCREMENT") 3629 increment = self._parse_bitwise() 3630 3631 if start and increment: 3632 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3633 3634 return exp.AutoIncrementColumnConstraint() 3635 3636 def _parse_compress(self) -> exp.CompressColumnConstraint: 3637 if self._match(TokenType.L_PAREN, advance=False): 3638 return self.expression( 3639 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3640 ) 3641 3642 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3643 3644 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3645 if self._match_text_seq("BY", "DEFAULT"): 3646 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3647 this = self.expression( 3648 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3649 ) 3650 else: 3651 self._match_text_seq("ALWAYS") 3652 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3653 3654 self._match(TokenType.ALIAS) 3655 identity = self._match_text_seq("IDENTITY") 3656 3657 if self._match(TokenType.L_PAREN): 3658 if self._match(TokenType.START_WITH): 3659 this.set("start", self._parse_bitwise()) 3660 if self._match_text_seq("INCREMENT", "BY"): 3661 this.set("increment", self._parse_bitwise()) 3662 if self._match_text_seq("MINVALUE"): 3663 this.set("minvalue", self._parse_bitwise()) 3664 if self._match_text_seq("MAXVALUE"): 3665 this.set("maxvalue", self._parse_bitwise()) 3666 3667 if self._match_text_seq("CYCLE"): 3668 this.set("cycle", True) 3669 elif self._match_text_seq("NO", "CYCLE"): 3670 this.set("cycle", False) 3671 3672 if not identity: 3673 this.set("expression", self._parse_bitwise()) 3674 3675 self._match_r_paren() 3676 3677 return this 3678 3679 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3680 self._match_text_seq("LENGTH") 3681 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3682 3683 def _parse_not_constraint( 3684 self, 3685 ) -> t.Optional[exp.Expression]: 3686 if self._match_text_seq("NULL"): 3687 return self.expression(exp.NotNullColumnConstraint) 3688 if self._match_text_seq("CASESPECIFIC"): 3689 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3690 if self._match_text_seq("FOR", "REPLICATION"): 3691 return self.expression(exp.NotForReplicationColumnConstraint) 3692 return None 3693 3694 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3695 if self._match(TokenType.CONSTRAINT): 3696 this = self._parse_id_var() 3697 else: 3698 this = None 3699 3700 if self._match_texts(self.CONSTRAINT_PARSERS): 3701 return self.expression( 3702 exp.ColumnConstraint, 3703 this=this, 3704 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3705 ) 3706 3707 return this 3708 3709 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3710 if not self._match(TokenType.CONSTRAINT): 3711 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3712 3713 this = self._parse_id_var() 3714 expressions = [] 3715 3716 while True: 3717 constraint = self._parse_unnamed_constraint() or self._parse_function() 3718 if not constraint: 3719 break 3720 expressions.append(constraint) 3721 3722 return self.expression(exp.Constraint, this=this, expressions=expressions) 3723 3724 def _parse_unnamed_constraint( 3725 self, constraints: t.Optional[t.Collection[str]] = None 3726 ) -> t.Optional[exp.Expression]: 3727 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3728 return None 3729 3730 constraint = self._prev.text.upper() 3731 if constraint not in self.CONSTRAINT_PARSERS: 3732 self.raise_error(f"No parser found for schema constraint {constraint}.") 3733 3734 return self.CONSTRAINT_PARSERS[constraint](self) 3735 3736 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3737 self._match_text_seq("KEY") 3738 return self.expression( 3739 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3740 ) 3741 3742 def _parse_key_constraint_options(self) -> t.List[str]: 3743 options = [] 3744 while True: 3745 if not self._curr: 3746 break 3747 3748 if self._match(TokenType.ON): 3749 action = None 3750 on = self._advance_any() and self._prev.text 3751 3752 if self._match_text_seq("NO", "ACTION"): 3753 action = "NO ACTION" 3754 elif self._match_text_seq("CASCADE"): 3755 action = "CASCADE" 3756 elif self._match_pair(TokenType.SET, TokenType.NULL): 3757 action = "SET NULL" 3758 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3759 action = "SET DEFAULT" 3760 else: 3761 self.raise_error("Invalid key constraint") 3762 3763 options.append(f"ON {on} {action}") 3764 elif self._match_text_seq("NOT", "ENFORCED"): 3765 options.append("NOT ENFORCED") 3766 elif self._match_text_seq("DEFERRABLE"): 3767 options.append("DEFERRABLE") 3768 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3769 options.append("INITIALLY DEFERRED") 3770 elif self._match_text_seq("NORELY"): 3771 options.append("NORELY") 3772 elif self._match_text_seq("MATCH", "FULL"): 3773 options.append("MATCH FULL") 3774 else: 3775 break 3776 3777 return options 3778 3779 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3780 if match and not self._match(TokenType.REFERENCES): 3781 return None 3782 3783 expressions = None 3784 this = self._parse_table(schema=True) 3785 options = self._parse_key_constraint_options() 3786 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3787 3788 def _parse_foreign_key(self) -> exp.ForeignKey: 3789 expressions = self._parse_wrapped_id_vars() 3790 reference = self._parse_references() 3791 options = {} 3792 3793 while self._match(TokenType.ON): 3794 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3795 self.raise_error("Expected DELETE or UPDATE") 3796 3797 kind = self._prev.text.lower() 3798 3799 if self._match_text_seq("NO", "ACTION"): 3800 action = "NO ACTION" 3801 elif self._match(TokenType.SET): 3802 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3803 action = "SET " + self._prev.text.upper() 3804 else: 3805 self._advance() 3806 action = self._prev.text.upper() 3807 3808 options[kind] = action 3809 3810 return self.expression( 3811 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3812 ) 3813 3814 def _parse_primary_key( 3815 self, wrapped_optional: bool = False, in_props: bool = False 3816 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3817 desc = ( 3818 self._match_set((TokenType.ASC, TokenType.DESC)) 3819 and self._prev.token_type == TokenType.DESC 3820 ) 3821 3822 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3823 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3824 3825 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3826 options = self._parse_key_constraint_options() 3827 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3828 3829 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3830 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3831 return this 3832 3833 bracket_kind = self._prev.token_type 3834 3835 if self._match(TokenType.COLON): 3836 expressions: t.List[exp.Expression] = [ 3837 self.expression(exp.Slice, expression=self._parse_conjunction()) 3838 ] 3839 else: 3840 expressions = self._parse_csv( 3841 lambda: self._parse_slice( 3842 self._parse_alias(self._parse_conjunction(), explicit=True) 3843 ) 3844 ) 3845 3846 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3847 if bracket_kind == TokenType.L_BRACE: 3848 this = self.expression(exp.Struct, expressions=expressions) 3849 elif not this or this.name.upper() == "ARRAY": 3850 this = self.expression(exp.Array, expressions=expressions) 3851 else: 3852 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3853 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3854 3855 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3856 self.raise_error("Expected ]") 3857 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3858 self.raise_error("Expected }") 3859 3860 self._add_comments(this) 3861 return self._parse_bracket(this) 3862 3863 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3864 if self._match(TokenType.COLON): 3865 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3866 return this 3867 3868 def _parse_case(self) -> t.Optional[exp.Expression]: 3869 ifs = [] 3870 default = None 3871 3872 comments = self._prev_comments 3873 expression = self._parse_conjunction() 3874 3875 while self._match(TokenType.WHEN): 3876 this = self._parse_conjunction() 3877 self._match(TokenType.THEN) 3878 then = self._parse_conjunction() 3879 ifs.append(self.expression(exp.If, this=this, true=then)) 3880 3881 if self._match(TokenType.ELSE): 3882 default = self._parse_conjunction() 3883 3884 if not self._match(TokenType.END): 3885 self.raise_error("Expected END after CASE", self._prev) 3886 3887 return self._parse_window( 3888 self.expression(exp.Case, comments=comments, this=expression, ifs=ifs, default=default) 3889 ) 3890 3891 def _parse_if(self) -> t.Optional[exp.Expression]: 3892 if self._match(TokenType.L_PAREN): 3893 args = self._parse_csv(self._parse_conjunction) 3894 this = self.validate_expression(exp.If.from_arg_list(args), args) 3895 self._match_r_paren() 3896 else: 3897 index = self._index - 1 3898 condition = self._parse_conjunction() 3899 3900 if not condition: 3901 self._retreat(index) 3902 return None 3903 3904 self._match(TokenType.THEN) 3905 true = self._parse_conjunction() 3906 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3907 self._match(TokenType.END) 3908 this = self.expression(exp.If, this=condition, true=true, false=false) 3909 3910 return self._parse_window(this) 3911 3912 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 3913 if not self._match_text_seq("VALUE", "FOR"): 3914 self._retreat(self._index - 1) 3915 return None 3916 3917 return self.expression( 3918 exp.NextValueFor, 3919 this=self._parse_column(), 3920 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 3921 ) 3922 3923 def _parse_extract(self) -> exp.Extract: 3924 this = self._parse_function() or self._parse_var() or self._parse_type() 3925 3926 if self._match(TokenType.FROM): 3927 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3928 3929 if not self._match(TokenType.COMMA): 3930 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3931 3932 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3933 3934 def _parse_any_value(self) -> exp.AnyValue: 3935 this = self._parse_lambda() 3936 is_max = None 3937 having = None 3938 3939 if self._match(TokenType.HAVING): 3940 self._match_texts(("MAX", "MIN")) 3941 is_max = self._prev.text == "MAX" 3942 having = self._parse_column() 3943 3944 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3945 3946 def _parse_cast(self, strict: bool) -> exp.Expression: 3947 this = self._parse_conjunction() 3948 3949 if not self._match(TokenType.ALIAS): 3950 if self._match(TokenType.COMMA): 3951 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 3952 3953 self.raise_error("Expected AS after CAST") 3954 3955 fmt = None 3956 to = self._parse_types() 3957 3958 if not to: 3959 self.raise_error("Expected TYPE after CAST") 3960 elif isinstance(to, exp.Identifier): 3961 to = exp.DataType.build(to.name, udt=True) 3962 elif to.this == exp.DataType.Type.CHAR: 3963 if self._match(TokenType.CHARACTER_SET): 3964 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3965 elif self._match(TokenType.FORMAT): 3966 fmt_string = self._parse_string() 3967 fmt = self._parse_at_time_zone(fmt_string) 3968 3969 if to.this in exp.DataType.TEMPORAL_TYPES: 3970 this = self.expression( 3971 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3972 this=this, 3973 format=exp.Literal.string( 3974 format_time( 3975 fmt_string.this if fmt_string else "", 3976 self.FORMAT_MAPPING or self.TIME_MAPPING, 3977 self.FORMAT_TRIE or self.TIME_TRIE, 3978 ) 3979 ), 3980 ) 3981 3982 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3983 this.set("zone", fmt.args["zone"]) 3984 3985 return this 3986 3987 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3988 3989 def _parse_concat(self) -> t.Optional[exp.Expression]: 3990 args = self._parse_csv(self._parse_conjunction) 3991 if self.CONCAT_NULL_OUTPUTS_STRING: 3992 args = [ 3993 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3994 for arg in args 3995 if arg 3996 ] 3997 3998 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3999 # we find such a call we replace it with its argument. 4000 if len(args) == 1: 4001 return args[0] 4002 4003 return self.expression( 4004 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 4005 ) 4006 4007 def _parse_string_agg(self) -> exp.Expression: 4008 if self._match(TokenType.DISTINCT): 4009 args: t.List[t.Optional[exp.Expression]] = [ 4010 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4011 ] 4012 if self._match(TokenType.COMMA): 4013 args.extend(self._parse_csv(self._parse_conjunction)) 4014 else: 4015 args = self._parse_csv(self._parse_conjunction) # type: ignore 4016 4017 index = self._index 4018 if not self._match(TokenType.R_PAREN) and args: 4019 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4020 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4021 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4022 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4023 4024 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 4025 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 4026 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 4027 if not self._match_text_seq("WITHIN", "GROUP"): 4028 self._retreat(index) 4029 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 4030 4031 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 4032 order = self._parse_order(this=seq_get(args, 0)) 4033 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 4034 4035 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 4036 this = self._parse_bitwise() 4037 4038 if self._match(TokenType.USING): 4039 to: t.Optional[exp.Expression] = self.expression( 4040 exp.CharacterSet, this=self._parse_var() 4041 ) 4042 elif self._match(TokenType.COMMA): 4043 to = self._parse_types() 4044 else: 4045 to = None 4046 4047 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 4048 4049 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 4050 """ 4051 There are generally two variants of the DECODE function: 4052 4053 - DECODE(bin, charset) 4054 - DECODE(expression, search, result [, search, result] ... [, default]) 4055 4056 The second variant will always be parsed into a CASE expression. Note that NULL 4057 needs special treatment, since we need to explicitly check for it with `IS NULL`, 4058 instead of relying on pattern matching. 4059 """ 4060 args = self._parse_csv(self._parse_conjunction) 4061 4062 if len(args) < 3: 4063 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 4064 4065 expression, *expressions = args 4066 if not expression: 4067 return None 4068 4069 ifs = [] 4070 for search, result in zip(expressions[::2], expressions[1::2]): 4071 if not search or not result: 4072 return None 4073 4074 if isinstance(search, exp.Literal): 4075 ifs.append( 4076 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 4077 ) 4078 elif isinstance(search, exp.Null): 4079 ifs.append( 4080 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 4081 ) 4082 else: 4083 cond = exp.or_( 4084 exp.EQ(this=expression.copy(), expression=search), 4085 exp.and_( 4086 exp.Is(this=expression.copy(), expression=exp.Null()), 4087 exp.Is(this=search.copy(), expression=exp.Null()), 4088 copy=False, 4089 ), 4090 copy=False, 4091 ) 4092 ifs.append(exp.If(this=cond, true=result)) 4093 4094 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 4095 4096 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 4097 self._match_text_seq("KEY") 4098 key = self._parse_field() 4099 self._match(TokenType.COLON) 4100 self._match_text_seq("VALUE") 4101 value = self._parse_field() 4102 4103 if not key and not value: 4104 return None 4105 return self.expression(exp.JSONKeyValue, this=key, expression=value) 4106 4107 def _parse_json_object(self) -> exp.JSONObject: 4108 star = self._parse_star() 4109 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 4110 4111 null_handling = None 4112 if self._match_text_seq("NULL", "ON", "NULL"): 4113 null_handling = "NULL ON NULL" 4114 elif self._match_text_seq("ABSENT", "ON", "NULL"): 4115 null_handling = "ABSENT ON NULL" 4116 4117 unique_keys = None 4118 if self._match_text_seq("WITH", "UNIQUE"): 4119 unique_keys = True 4120 elif self._match_text_seq("WITHOUT", "UNIQUE"): 4121 unique_keys = False 4122 4123 self._match_text_seq("KEYS") 4124 4125 return_type = self._match_text_seq("RETURNING") and self._parse_type() 4126 format_json = self._match_text_seq("FORMAT", "JSON") 4127 encoding = self._match_text_seq("ENCODING") and self._parse_var() 4128 4129 return self.expression( 4130 exp.JSONObject, 4131 expressions=expressions, 4132 null_handling=null_handling, 4133 unique_keys=unique_keys, 4134 return_type=return_type, 4135 format_json=format_json, 4136 encoding=encoding, 4137 ) 4138 4139 def _parse_logarithm(self) -> exp.Func: 4140 # Default argument order is base, expression 4141 args = self._parse_csv(self._parse_range) 4142 4143 if len(args) > 1: 4144 if not self.LOG_BASE_FIRST: 4145 args.reverse() 4146 return exp.Log.from_arg_list(args) 4147 4148 return self.expression( 4149 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 4150 ) 4151 4152 def _parse_match_against(self) -> exp.MatchAgainst: 4153 expressions = self._parse_csv(self._parse_column) 4154 4155 self._match_text_seq(")", "AGAINST", "(") 4156 4157 this = self._parse_string() 4158 4159 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4160 modifier = "IN NATURAL LANGUAGE MODE" 4161 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4162 modifier = f"{modifier} WITH QUERY EXPANSION" 4163 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4164 modifier = "IN BOOLEAN MODE" 4165 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4166 modifier = "WITH QUERY EXPANSION" 4167 else: 4168 modifier = None 4169 4170 return self.expression( 4171 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4172 ) 4173 4174 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4175 def _parse_open_json(self) -> exp.OpenJSON: 4176 this = self._parse_bitwise() 4177 path = self._match(TokenType.COMMA) and self._parse_string() 4178 4179 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4180 this = self._parse_field(any_token=True) 4181 kind = self._parse_types() 4182 path = self._parse_string() 4183 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4184 4185 return self.expression( 4186 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4187 ) 4188 4189 expressions = None 4190 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4191 self._match_l_paren() 4192 expressions = self._parse_csv(_parse_open_json_column_def) 4193 4194 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4195 4196 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4197 args = self._parse_csv(self._parse_bitwise) 4198 4199 if self._match(TokenType.IN): 4200 return self.expression( 4201 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4202 ) 4203 4204 if haystack_first: 4205 haystack = seq_get(args, 0) 4206 needle = seq_get(args, 1) 4207 else: 4208 needle = seq_get(args, 0) 4209 haystack = seq_get(args, 1) 4210 4211 return self.expression( 4212 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4213 ) 4214 4215 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4216 args = self._parse_csv(self._parse_table) 4217 return exp.JoinHint(this=func_name.upper(), expressions=args) 4218 4219 def _parse_substring(self) -> exp.Substring: 4220 # Postgres supports the form: substring(string [from int] [for int]) 4221 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4222 4223 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 4224 4225 if self._match(TokenType.FROM): 4226 args.append(self._parse_bitwise()) 4227 if self._match(TokenType.FOR): 4228 args.append(self._parse_bitwise()) 4229 4230 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4231 4232 def _parse_trim(self) -> exp.Trim: 4233 # https://www.w3resource.com/sql/character-functions/trim.php 4234 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4235 4236 position = None 4237 collation = None 4238 4239 if self._match_texts(self.TRIM_TYPES): 4240 position = self._prev.text.upper() 4241 4242 expression = self._parse_bitwise() 4243 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4244 this = self._parse_bitwise() 4245 else: 4246 this = expression 4247 expression = None 4248 4249 if self._match(TokenType.COLLATE): 4250 collation = self._parse_bitwise() 4251 4252 return self.expression( 4253 exp.Trim, this=this, position=position, expression=expression, collation=collation 4254 ) 4255 4256 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 4257 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4258 4259 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4260 return self._parse_window(self._parse_id_var(), alias=True) 4261 4262 def _parse_respect_or_ignore_nulls( 4263 self, this: t.Optional[exp.Expression] 4264 ) -> t.Optional[exp.Expression]: 4265 if self._match_text_seq("IGNORE", "NULLS"): 4266 return self.expression(exp.IgnoreNulls, this=this) 4267 if self._match_text_seq("RESPECT", "NULLS"): 4268 return self.expression(exp.RespectNulls, this=this) 4269 return this 4270 4271 def _parse_window( 4272 self, this: t.Optional[exp.Expression], alias: bool = False 4273 ) -> t.Optional[exp.Expression]: 4274 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4275 self._match(TokenType.WHERE) 4276 this = self.expression( 4277 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4278 ) 4279 self._match_r_paren() 4280 4281 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4282 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4283 if self._match_text_seq("WITHIN", "GROUP"): 4284 order = self._parse_wrapped(self._parse_order) 4285 this = self.expression(exp.WithinGroup, this=this, expression=order) 4286 4287 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4288 # Some dialects choose to implement and some do not. 4289 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4290 4291 # There is some code above in _parse_lambda that handles 4292 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4293 4294 # The below changes handle 4295 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4296 4297 # Oracle allows both formats 4298 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4299 # and Snowflake chose to do the same for familiarity 4300 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4301 this = self._parse_respect_or_ignore_nulls(this) 4302 4303 # bigquery select from window x AS (partition by ...) 4304 if alias: 4305 over = None 4306 self._match(TokenType.ALIAS) 4307 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4308 return this 4309 else: 4310 over = self._prev.text.upper() 4311 4312 if not self._match(TokenType.L_PAREN): 4313 return self.expression( 4314 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4315 ) 4316 4317 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4318 4319 first = self._match(TokenType.FIRST) 4320 if self._match_text_seq("LAST"): 4321 first = False 4322 4323 partition, order = self._parse_partition_and_order() 4324 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4325 4326 if kind: 4327 self._match(TokenType.BETWEEN) 4328 start = self._parse_window_spec() 4329 self._match(TokenType.AND) 4330 end = self._parse_window_spec() 4331 4332 spec = self.expression( 4333 exp.WindowSpec, 4334 kind=kind, 4335 start=start["value"], 4336 start_side=start["side"], 4337 end=end["value"], 4338 end_side=end["side"], 4339 ) 4340 else: 4341 spec = None 4342 4343 self._match_r_paren() 4344 4345 window = self.expression( 4346 exp.Window, 4347 this=this, 4348 partition_by=partition, 4349 order=order, 4350 spec=spec, 4351 alias=window_alias, 4352 over=over, 4353 first=first, 4354 ) 4355 4356 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4357 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4358 return self._parse_window(window, alias=alias) 4359 4360 return window 4361 4362 def _parse_partition_and_order( 4363 self, 4364 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 4365 return self._parse_partition_by(), self._parse_order() 4366 4367 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4368 self._match(TokenType.BETWEEN) 4369 4370 return { 4371 "value": ( 4372 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4373 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4374 or self._parse_bitwise() 4375 ), 4376 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4377 } 4378 4379 def _parse_alias( 4380 self, this: t.Optional[exp.Expression], explicit: bool = False 4381 ) -> t.Optional[exp.Expression]: 4382 any_token = self._match(TokenType.ALIAS) 4383 4384 if explicit and not any_token: 4385 return this 4386 4387 if self._match(TokenType.L_PAREN): 4388 aliases = self.expression( 4389 exp.Aliases, 4390 this=this, 4391 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4392 ) 4393 self._match_r_paren(aliases) 4394 return aliases 4395 4396 alias = self._parse_id_var(any_token) 4397 4398 if alias: 4399 return self.expression(exp.Alias, this=this, alias=alias) 4400 4401 return this 4402 4403 def _parse_id_var( 4404 self, 4405 any_token: bool = True, 4406 tokens: t.Optional[t.Collection[TokenType]] = None, 4407 ) -> t.Optional[exp.Expression]: 4408 identifier = self._parse_identifier() 4409 4410 if identifier: 4411 return identifier 4412 4413 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4414 quoted = self._prev.token_type == TokenType.STRING 4415 return exp.Identifier(this=self._prev.text, quoted=quoted) 4416 4417 return None 4418 4419 def _parse_string(self) -> t.Optional[exp.Expression]: 4420 if self._match(TokenType.STRING): 4421 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4422 return self._parse_placeholder() 4423 4424 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4425 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4426 4427 def _parse_number(self) -> t.Optional[exp.Expression]: 4428 if self._match(TokenType.NUMBER): 4429 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4430 return self._parse_placeholder() 4431 4432 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4433 if self._match(TokenType.IDENTIFIER): 4434 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4435 return self._parse_placeholder() 4436 4437 def _parse_var( 4438 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4439 ) -> t.Optional[exp.Expression]: 4440 if ( 4441 (any_token and self._advance_any()) 4442 or self._match(TokenType.VAR) 4443 or (self._match_set(tokens) if tokens else False) 4444 ): 4445 return self.expression(exp.Var, this=self._prev.text) 4446 return self._parse_placeholder() 4447 4448 def _advance_any(self) -> t.Optional[Token]: 4449 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4450 self._advance() 4451 return self._prev 4452 return None 4453 4454 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4455 return self._parse_var() or self._parse_string() 4456 4457 def _parse_null(self) -> t.Optional[exp.Expression]: 4458 if self._match(TokenType.NULL): 4459 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4460 return self._parse_placeholder() 4461 4462 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4463 if self._match(TokenType.TRUE): 4464 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4465 if self._match(TokenType.FALSE): 4466 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4467 return self._parse_placeholder() 4468 4469 def _parse_star(self) -> t.Optional[exp.Expression]: 4470 if self._match(TokenType.STAR): 4471 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4472 return self._parse_placeholder() 4473 4474 def _parse_parameter(self) -> exp.Parameter: 4475 wrapped = self._match(TokenType.L_BRACE) 4476 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4477 self._match(TokenType.R_BRACE) 4478 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4479 4480 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4481 if self._match_set(self.PLACEHOLDER_PARSERS): 4482 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4483 if placeholder: 4484 return placeholder 4485 self._advance(-1) 4486 return None 4487 4488 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 4489 if not self._match(TokenType.EXCEPT): 4490 return None 4491 if self._match(TokenType.L_PAREN, advance=False): 4492 return self._parse_wrapped_csv(self._parse_column) 4493 return self._parse_csv(self._parse_column) 4494 4495 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 4496 if not self._match(TokenType.REPLACE): 4497 return None 4498 if self._match(TokenType.L_PAREN, advance=False): 4499 return self._parse_wrapped_csv(self._parse_expression) 4500 return self._parse_expressions() 4501 4502 def _parse_csv( 4503 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4504 ) -> t.List[exp.Expression]: 4505 parse_result = parse_method() 4506 items = [parse_result] if parse_result is not None else [] 4507 4508 while self._match(sep): 4509 self._add_comments(parse_result) 4510 parse_result = parse_method() 4511 if parse_result is not None: 4512 items.append(parse_result) 4513 4514 return items 4515 4516 def _parse_tokens( 4517 self, parse_method: t.Callable, expressions: t.Dict 4518 ) -> t.Optional[exp.Expression]: 4519 this = parse_method() 4520 4521 while self._match_set(expressions): 4522 this = self.expression( 4523 expressions[self._prev.token_type], 4524 this=this, 4525 comments=self._prev_comments, 4526 expression=parse_method(), 4527 ) 4528 4529 return this 4530 4531 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 4532 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4533 4534 def _parse_wrapped_csv( 4535 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4536 ) -> t.List[exp.Expression]: 4537 return self._parse_wrapped( 4538 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4539 ) 4540 4541 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4542 wrapped = self._match(TokenType.L_PAREN) 4543 if not wrapped and not optional: 4544 self.raise_error("Expecting (") 4545 parse_result = parse_method() 4546 if wrapped: 4547 self._match_r_paren() 4548 return parse_result 4549 4550 def _parse_expressions(self) -> t.List[exp.Expression]: 4551 return self._parse_csv(self._parse_expression) 4552 4553 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4554 return self._parse_select() or self._parse_set_operations( 4555 self._parse_expression() if alias else self._parse_conjunction() 4556 ) 4557 4558 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4559 return self._parse_query_modifiers( 4560 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4561 ) 4562 4563 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4564 this = None 4565 if self._match_texts(self.TRANSACTION_KIND): 4566 this = self._prev.text 4567 4568 self._match_texts({"TRANSACTION", "WORK"}) 4569 4570 modes = [] 4571 while True: 4572 mode = [] 4573 while self._match(TokenType.VAR): 4574 mode.append(self._prev.text) 4575 4576 if mode: 4577 modes.append(" ".join(mode)) 4578 if not self._match(TokenType.COMMA): 4579 break 4580 4581 return self.expression(exp.Transaction, this=this, modes=modes) 4582 4583 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4584 chain = None 4585 savepoint = None 4586 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4587 4588 self._match_texts({"TRANSACTION", "WORK"}) 4589 4590 if self._match_text_seq("TO"): 4591 self._match_text_seq("SAVEPOINT") 4592 savepoint = self._parse_id_var() 4593 4594 if self._match(TokenType.AND): 4595 chain = not self._match_text_seq("NO") 4596 self._match_text_seq("CHAIN") 4597 4598 if is_rollback: 4599 return self.expression(exp.Rollback, savepoint=savepoint) 4600 4601 return self.expression(exp.Commit, chain=chain) 4602 4603 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4604 if not self._match_text_seq("ADD"): 4605 return None 4606 4607 self._match(TokenType.COLUMN) 4608 exists_column = self._parse_exists(not_=True) 4609 expression = self._parse_field_def() 4610 4611 if expression: 4612 expression.set("exists", exists_column) 4613 4614 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4615 if self._match_texts(("FIRST", "AFTER")): 4616 position = self._prev.text 4617 column_position = self.expression( 4618 exp.ColumnPosition, this=self._parse_column(), position=position 4619 ) 4620 expression.set("position", column_position) 4621 4622 return expression 4623 4624 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4625 drop = self._match(TokenType.DROP) and self._parse_drop() 4626 if drop and not isinstance(drop, exp.Command): 4627 drop.set("kind", drop.args.get("kind", "COLUMN")) 4628 return drop 4629 4630 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4631 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4632 return self.expression( 4633 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4634 ) 4635 4636 def _parse_add_constraint(self) -> exp.AddConstraint: 4637 this = None 4638 kind = self._prev.token_type 4639 4640 if kind == TokenType.CONSTRAINT: 4641 this = self._parse_id_var() 4642 4643 if self._match_text_seq("CHECK"): 4644 expression = self._parse_wrapped(self._parse_conjunction) 4645 enforced = self._match_text_seq("ENFORCED") 4646 4647 return self.expression( 4648 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4649 ) 4650 4651 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4652 expression = self._parse_foreign_key() 4653 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4654 expression = self._parse_primary_key() 4655 else: 4656 expression = None 4657 4658 return self.expression(exp.AddConstraint, this=this, expression=expression) 4659 4660 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 4661 index = self._index - 1 4662 4663 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4664 return self._parse_csv(self._parse_add_constraint) 4665 4666 self._retreat(index) 4667 return self._parse_csv(self._parse_add_column) 4668 4669 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4670 self._match(TokenType.COLUMN) 4671 column = self._parse_field(any_token=True) 4672 4673 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4674 return self.expression(exp.AlterColumn, this=column, drop=True) 4675 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4676 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4677 4678 self._match_text_seq("SET", "DATA") 4679 return self.expression( 4680 exp.AlterColumn, 4681 this=column, 4682 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4683 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4684 using=self._match(TokenType.USING) and self._parse_conjunction(), 4685 ) 4686 4687 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 4688 index = self._index - 1 4689 4690 partition_exists = self._parse_exists() 4691 if self._match(TokenType.PARTITION, advance=False): 4692 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4693 4694 self._retreat(index) 4695 return self._parse_csv(self._parse_drop_column) 4696 4697 def _parse_alter_table_rename(self) -> exp.RenameTable: 4698 self._match_text_seq("TO") 4699 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4700 4701 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4702 start = self._prev 4703 4704 if not self._match(TokenType.TABLE): 4705 return self._parse_as_command(start) 4706 4707 exists = self._parse_exists() 4708 this = self._parse_table(schema=True) 4709 4710 if self._next: 4711 self._advance() 4712 4713 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4714 if parser: 4715 actions = ensure_list(parser(self)) 4716 4717 if not self._curr: 4718 return self.expression( 4719 exp.AlterTable, 4720 this=this, 4721 exists=exists, 4722 actions=actions, 4723 ) 4724 return self._parse_as_command(start) 4725 4726 def _parse_merge(self) -> exp.Merge: 4727 self._match(TokenType.INTO) 4728 target = self._parse_table() 4729 4730 if target and self._match(TokenType.ALIAS, advance=False): 4731 target.set("alias", self._parse_table_alias()) 4732 4733 self._match(TokenType.USING) 4734 using = self._parse_table() 4735 4736 self._match(TokenType.ON) 4737 on = self._parse_conjunction() 4738 4739 whens = [] 4740 while self._match(TokenType.WHEN): 4741 matched = not self._match(TokenType.NOT) 4742 self._match_text_seq("MATCHED") 4743 source = ( 4744 False 4745 if self._match_text_seq("BY", "TARGET") 4746 else self._match_text_seq("BY", "SOURCE") 4747 ) 4748 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4749 4750 self._match(TokenType.THEN) 4751 4752 if self._match(TokenType.INSERT): 4753 _this = self._parse_star() 4754 if _this: 4755 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4756 else: 4757 then = self.expression( 4758 exp.Insert, 4759 this=self._parse_value(), 4760 expression=self._match(TokenType.VALUES) and self._parse_value(), 4761 ) 4762 elif self._match(TokenType.UPDATE): 4763 expressions = self._parse_star() 4764 if expressions: 4765 then = self.expression(exp.Update, expressions=expressions) 4766 else: 4767 then = self.expression( 4768 exp.Update, 4769 expressions=self._match(TokenType.SET) 4770 and self._parse_csv(self._parse_equality), 4771 ) 4772 elif self._match(TokenType.DELETE): 4773 then = self.expression(exp.Var, this=self._prev.text) 4774 else: 4775 then = None 4776 4777 whens.append( 4778 self.expression( 4779 exp.When, 4780 matched=matched, 4781 source=source, 4782 condition=condition, 4783 then=then, 4784 ) 4785 ) 4786 4787 return self.expression( 4788 exp.Merge, 4789 this=target, 4790 using=using, 4791 on=on, 4792 expressions=whens, 4793 ) 4794 4795 def _parse_show(self) -> t.Optional[exp.Expression]: 4796 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4797 if parser: 4798 return parser(self) 4799 self._advance() 4800 return self.expression(exp.Show, this=self._prev.text.upper()) 4801 4802 def _parse_set_item_assignment( 4803 self, kind: t.Optional[str] = None 4804 ) -> t.Optional[exp.Expression]: 4805 index = self._index 4806 4807 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4808 return self._parse_set_transaction(global_=kind == "GLOBAL") 4809 4810 left = self._parse_primary() or self._parse_id_var() 4811 4812 if not self._match_texts(("=", "TO")): 4813 self._retreat(index) 4814 return None 4815 4816 right = self._parse_statement() or self._parse_id_var() 4817 this = self.expression(exp.EQ, this=left, expression=right) 4818 4819 return self.expression(exp.SetItem, this=this, kind=kind) 4820 4821 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4822 self._match_text_seq("TRANSACTION") 4823 characteristics = self._parse_csv( 4824 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4825 ) 4826 return self.expression( 4827 exp.SetItem, 4828 expressions=characteristics, 4829 kind="TRANSACTION", 4830 **{"global": global_}, # type: ignore 4831 ) 4832 4833 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4834 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4835 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4836 4837 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4838 index = self._index 4839 set_ = self.expression( 4840 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4841 ) 4842 4843 if self._curr: 4844 self._retreat(index) 4845 return self._parse_as_command(self._prev) 4846 4847 return set_ 4848 4849 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4850 for option in options: 4851 if self._match_text_seq(*option.split(" ")): 4852 return exp.var(option) 4853 return None 4854 4855 def _parse_as_command(self, start: Token) -> exp.Command: 4856 while self._curr: 4857 self._advance() 4858 text = self._find_sql(start, self._prev) 4859 size = len(start.text) 4860 return exp.Command(this=text[:size], expression=text[size:]) 4861 4862 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4863 settings = [] 4864 4865 self._match_l_paren() 4866 kind = self._parse_id_var() 4867 4868 if self._match(TokenType.L_PAREN): 4869 while True: 4870 key = self._parse_id_var() 4871 value = self._parse_primary() 4872 4873 if not key and value is None: 4874 break 4875 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4876 self._match(TokenType.R_PAREN) 4877 4878 self._match_r_paren() 4879 4880 return self.expression( 4881 exp.DictProperty, 4882 this=this, 4883 kind=kind.this if kind else None, 4884 settings=settings, 4885 ) 4886 4887 def _parse_dict_range(self, this: str) -> exp.DictRange: 4888 self._match_l_paren() 4889 has_min = self._match_text_seq("MIN") 4890 if has_min: 4891 min = self._parse_var() or self._parse_primary() 4892 self._match_text_seq("MAX") 4893 max = self._parse_var() or self._parse_primary() 4894 else: 4895 max = self._parse_var() or self._parse_primary() 4896 min = exp.Literal.number(0) 4897 self._match_r_paren() 4898 return self.expression(exp.DictRange, this=this, min=min, max=max) 4899 4900 def _find_parser( 4901 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4902 ) -> t.Optional[t.Callable]: 4903 if not self._curr: 4904 return None 4905 4906 index = self._index 4907 this = [] 4908 while True: 4909 # The current token might be multiple words 4910 curr = self._curr.text.upper() 4911 key = curr.split(" ") 4912 this.append(curr) 4913 4914 self._advance() 4915 result, trie = in_trie(trie, key) 4916 if result == TrieResult.FAILED: 4917 break 4918 4919 if result == TrieResult.EXISTS: 4920 subparser = parsers[" ".join(this)] 4921 return subparser 4922 4923 self._retreat(index) 4924 return None 4925 4926 def _match(self, token_type, advance=True, expression=None): 4927 if not self._curr: 4928 return None 4929 4930 if self._curr.token_type == token_type: 4931 if advance: 4932 self._advance() 4933 self._add_comments(expression) 4934 return True 4935 4936 return None 4937 4938 def _match_set(self, types, advance=True): 4939 if not self._curr: 4940 return None 4941 4942 if self._curr.token_type in types: 4943 if advance: 4944 self._advance() 4945 return True 4946 4947 return None 4948 4949 def _match_pair(self, token_type_a, token_type_b, advance=True): 4950 if not self._curr or not self._next: 4951 return None 4952 4953 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4954 if advance: 4955 self._advance(2) 4956 return True 4957 4958 return None 4959 4960 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4961 if not self._match(TokenType.L_PAREN, expression=expression): 4962 self.raise_error("Expecting (") 4963 4964 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4965 if not self._match(TokenType.R_PAREN, expression=expression): 4966 self.raise_error("Expecting )") 4967 4968 def _match_texts(self, texts, advance=True): 4969 if self._curr and self._curr.text.upper() in texts: 4970 if advance: 4971 self._advance() 4972 return True 4973 return False 4974 4975 def _match_text_seq(self, *texts, advance=True): 4976 index = self._index 4977 for text in texts: 4978 if self._curr and self._curr.text.upper() == text: 4979 self._advance() 4980 else: 4981 self._retreat(index) 4982 return False 4983 4984 if not advance: 4985 self._retreat(index) 4986 4987 return True 4988 4989 @t.overload 4990 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4991 ... 4992 4993 @t.overload 4994 def _replace_columns_with_dots( 4995 self, this: t.Optional[exp.Expression] 4996 ) -> t.Optional[exp.Expression]: 4997 ... 4998 4999 def _replace_columns_with_dots(self, this): 5000 if isinstance(this, exp.Dot): 5001 exp.replace_children(this, self._replace_columns_with_dots) 5002 elif isinstance(this, exp.Column): 5003 exp.replace_children(this, self._replace_columns_with_dots) 5004 table = this.args.get("table") 5005 this = ( 5006 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 5007 ) 5008 5009 return this 5010 5011 def _replace_lambda( 5012 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 5013 ) -> t.Optional[exp.Expression]: 5014 if not node: 5015 return node 5016 5017 for column in node.find_all(exp.Column): 5018 if column.parts[0].name in lambda_variables: 5019 dot_or_id = column.to_dot() if column.table else column.this 5020 parent = column.parent 5021 5022 while isinstance(parent, exp.Dot): 5023 if not isinstance(parent.parent, exp.Dot): 5024 parent.replace(dot_or_id) 5025 break 5026 parent = parent.parent 5027 else: 5028 if column is node: 5029 node = dot_or_id 5030 else: 5031 column.replace(dot_or_id) 5032 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
885 def __init__( 886 self, 887 error_level: t.Optional[ErrorLevel] = None, 888 error_message_context: int = 100, 889 max_errors: int = 3, 890 ): 891 self.error_level = error_level or ErrorLevel.IMMEDIATE 892 self.error_message_context = error_message_context 893 self.max_errors = max_errors 894 self._tokenizer = self.TOKENIZER_CLASS() 895 self.reset()
907 def parse( 908 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 909 ) -> t.List[t.Optional[exp.Expression]]: 910 """ 911 Parses a list of tokens and returns a list of syntax trees, one tree 912 per parsed SQL statement. 913 914 Args: 915 raw_tokens: The list of tokens. 916 sql: The original SQL string, used to produce helpful debug messages. 917 918 Returns: 919 The list of the produced syntax trees. 920 """ 921 return self._parse( 922 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 923 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
925 def parse_into( 926 self, 927 expression_types: exp.IntoType, 928 raw_tokens: t.List[Token], 929 sql: t.Optional[str] = None, 930 ) -> t.List[t.Optional[exp.Expression]]: 931 """ 932 Parses a list of tokens into a given Expression type. If a collection of Expression 933 types is given instead, this method will try to parse the token list into each one 934 of them, stopping at the first for which the parsing succeeds. 935 936 Args: 937 expression_types: The expression type(s) to try and parse the token list into. 938 raw_tokens: The list of tokens. 939 sql: The original SQL string, used to produce helpful debug messages. 940 941 Returns: 942 The target Expression. 943 """ 944 errors = [] 945 for expression_type in ensure_list(expression_types): 946 parser = self.EXPRESSION_PARSERS.get(expression_type) 947 if not parser: 948 raise TypeError(f"No parser registered for {expression_type}") 949 950 try: 951 return self._parse(parser, raw_tokens, sql) 952 except ParseError as e: 953 e.errors[0]["into_expression"] = expression_type 954 errors.append(e) 955 956 raise ParseError( 957 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 958 errors=merge_errors(errors), 959 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
996 def check_errors(self) -> None: 997 """Logs or raises any found errors, depending on the chosen error level setting.""" 998 if self.error_level == ErrorLevel.WARN: 999 for error in self.errors: 1000 logger.error(str(error)) 1001 elif self.error_level == ErrorLevel.RAISE and self.errors: 1002 raise ParseError( 1003 concat_messages(self.errors, self.max_errors), 1004 errors=merge_errors(self.errors), 1005 )
Logs or raises any found errors, depending on the chosen error level setting.
1007 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1008 """ 1009 Appends an error in the list of recorded errors or raises it, depending on the chosen 1010 error level setting. 1011 """ 1012 token = token or self._curr or self._prev or Token.string("") 1013 start = token.start 1014 end = token.end + 1 1015 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1016 highlight = self.sql[start:end] 1017 end_context = self.sql[end : end + self.error_message_context] 1018 1019 error = ParseError.new( 1020 f"{message}. Line {token.line}, Col: {token.col}.\n" 1021 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1022 description=message, 1023 line=token.line, 1024 col=token.col, 1025 start_context=start_context, 1026 highlight=highlight, 1027 end_context=end_context, 1028 ) 1029 1030 if self.error_level == ErrorLevel.IMMEDIATE: 1031 raise error 1032 1033 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1035 def expression( 1036 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1037 ) -> E: 1038 """ 1039 Creates a new, validated Expression. 1040 1041 Args: 1042 exp_class: The expression class to instantiate. 1043 comments: An optional list of comments to attach to the expression. 1044 kwargs: The arguments to set for the expression along with their respective values. 1045 1046 Returns: 1047 The target expression. 1048 """ 1049 instance = exp_class(**kwargs) 1050 instance.add_comments(comments) if comments else self._add_comments(instance) 1051 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1058 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1059 """ 1060 Validates an Expression, making sure that all its mandatory arguments are set. 1061 1062 Args: 1063 expression: The expression to validate. 1064 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1065 1066 Returns: 1067 The validated expression. 1068 """ 1069 if self.error_level != ErrorLevel.IGNORE: 1070 for error_message in expression.error_messages(args): 1071 self.raise_error(error_message) 1072 1073 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.