sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 create_with_partitions_sql, 10 format_time_lambda, 11 if_sql, 12 left_to_substring_sql, 13 locate_to_strposition, 14 max_or_greatest, 15 min_or_least, 16 no_ilike_sql, 17 no_recursive_cte_sql, 18 no_safe_divide_sql, 19 no_trycast_sql, 20 regexp_extract_sql, 21 regexp_replace_sql, 22 rename_func, 23 right_to_substring_sql, 24 strposition_to_locate_sql, 25 struct_extract_sql, 26 time_format, 27 timestrtotime_sql, 28 var_map_sql, 29) 30from sqlglot.helper import seq_get 31from sqlglot.parser import parse_var_map 32from sqlglot.tokens import TokenType 33 34# (FuncType, Multiplier) 35DATE_DELTA_INTERVAL = { 36 "YEAR": ("ADD_MONTHS", 12), 37 "MONTH": ("ADD_MONTHS", 1), 38 "QUARTER": ("ADD_MONTHS", 3), 39 "WEEK": ("DATE_ADD", 7), 40 "DAY": ("DATE_ADD", 1), 41} 42 43TIME_DIFF_FACTOR = { 44 "MILLISECOND": " * 1000", 45 "SECOND": "", 46 "MINUTE": " / 60", 47 "HOUR": " / 3600", 48} 49 50DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 51 52 53def _add_date_sql(self: generator.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 54 unit = expression.text("unit").upper() 55 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 56 57 if isinstance(expression, exp.DateSub): 58 multiplier *= -1 59 60 if expression.expression.is_number: 61 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 62 else: 63 modified_increment = expression.expression.copy() 64 if multiplier != 1: 65 modified_increment = exp.Mul( # type: ignore 66 this=modified_increment, expression=exp.Literal.number(multiplier) 67 ) 68 69 return self.func(func, expression.this, modified_increment) 70 71 72def _date_diff_sql(self: generator.Generator, expression: exp.DateDiff) -> str: 73 unit = expression.text("unit").upper() 74 75 factor = TIME_DIFF_FACTOR.get(unit) 76 if factor is not None: 77 left = self.sql(expression, "this") 78 right = self.sql(expression, "expression") 79 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 80 return f"({sec_diff}){factor}" if factor else sec_diff 81 82 sql_func = "MONTHS_BETWEEN" if unit in DIFF_MONTH_SWITCH else "DATEDIFF" 83 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 84 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 85 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 86 87 return f"{diff_sql}{multiplier_sql}" 88 89 90def _json_format_sql(self: generator.Generator, expression: exp.JSONFormat) -> str: 91 this = expression.this 92 if isinstance(this, exp.Cast) and this.is_type("json") and this.this.is_string: 93 # Since FROM_JSON requires a nested type, we always wrap the json string with 94 # an array to ensure that "naked" strings like "'a'" will be handled correctly 95 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 96 97 from_json = self.func("FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)) 98 to_json = self.func("TO_JSON", from_json) 99 100 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 101 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 102 103 return self.func("TO_JSON", this, expression.args.get("options")) 104 105 106def _array_sort_sql(self: generator.Generator, expression: exp.ArraySort) -> str: 107 if expression.expression: 108 self.unsupported("Hive SORT_ARRAY does not support a comparator") 109 return f"SORT_ARRAY({self.sql(expression, 'this')})" 110 111 112def _property_sql(self: generator.Generator, expression: exp.Property) -> str: 113 return f"'{expression.name}'={self.sql(expression, 'value')}" 114 115 116def _str_to_unix_sql(self: generator.Generator, expression: exp.StrToUnix) -> str: 117 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 118 119 120def _str_to_date_sql(self: generator.Generator, expression: exp.StrToDate) -> str: 121 this = self.sql(expression, "this") 122 time_format = self.format_time(expression) 123 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 124 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 125 return f"CAST({this} AS DATE)" 126 127 128def _str_to_time_sql(self: generator.Generator, expression: exp.StrToTime) -> str: 129 this = self.sql(expression, "this") 130 time_format = self.format_time(expression) 131 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 132 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 133 return f"CAST({this} AS TIMESTAMP)" 134 135 136def _time_to_str(self: generator.Generator, expression: exp.TimeToStr) -> str: 137 this = self.sql(expression, "this") 138 time_format = self.format_time(expression) 139 return f"DATE_FORMAT({this}, {time_format})" 140 141 142def _to_date_sql(self: generator.Generator, expression: exp.TsOrDsToDate) -> str: 143 this = self.sql(expression, "this") 144 time_format = self.format_time(expression) 145 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 146 return f"TO_DATE({this}, {time_format})" 147 return f"TO_DATE({this})" 148 149 150class Hive(Dialect): 151 ALIAS_POST_TABLESAMPLE = True 152 IDENTIFIERS_CAN_START_WITH_DIGIT = True 153 154 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 155 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 156 157 TIME_MAPPING = { 158 "y": "%Y", 159 "Y": "%Y", 160 "YYYY": "%Y", 161 "yyyy": "%Y", 162 "YY": "%y", 163 "yy": "%y", 164 "MMMM": "%B", 165 "MMM": "%b", 166 "MM": "%m", 167 "M": "%-m", 168 "dd": "%d", 169 "d": "%-d", 170 "HH": "%H", 171 "H": "%-H", 172 "hh": "%I", 173 "h": "%-I", 174 "mm": "%M", 175 "m": "%-M", 176 "ss": "%S", 177 "s": "%-S", 178 "SSSSSS": "%f", 179 "a": "%p", 180 "DD": "%j", 181 "D": "%-j", 182 "E": "%a", 183 "EE": "%a", 184 "EEE": "%a", 185 "EEEE": "%A", 186 } 187 188 DATE_FORMAT = "'yyyy-MM-dd'" 189 DATEINT_FORMAT = "'yyyyMMdd'" 190 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 191 192 class Tokenizer(tokens.Tokenizer): 193 QUOTES = ["'", '"'] 194 IDENTIFIERS = ["`"] 195 STRING_ESCAPES = ["\\"] 196 ENCODE = "utf-8" 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ADD ARCHIVE": TokenType.COMMAND, 201 "ADD ARCHIVES": TokenType.COMMAND, 202 "ADD FILE": TokenType.COMMAND, 203 "ADD FILES": TokenType.COMMAND, 204 "ADD JAR": TokenType.COMMAND, 205 "ADD JARS": TokenType.COMMAND, 206 "MSCK REPAIR": TokenType.COMMAND, 207 "REFRESH": TokenType.COMMAND, 208 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 209 } 210 211 NUMERIC_LITERALS = { 212 "L": "BIGINT", 213 "S": "SMALLINT", 214 "Y": "TINYINT", 215 "D": "DOUBLE", 216 "F": "FLOAT", 217 "BD": "DECIMAL", 218 } 219 220 class Parser(parser.Parser): 221 LOG_DEFAULTS_TO_LN = True 222 STRICT_CAST = False 223 SUPPORTS_USER_DEFINED_TYPES = False 224 225 FUNCTIONS = { 226 **parser.Parser.FUNCTIONS, 227 "BASE64": exp.ToBase64.from_arg_list, 228 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 229 "COLLECT_SET": exp.SetAgg.from_arg_list, 230 "DATE_ADD": lambda args: exp.TsOrDsAdd( 231 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 232 ), 233 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 234 [ 235 exp.TimeStrToTime(this=seq_get(args, 0)), 236 seq_get(args, 1), 237 ] 238 ), 239 "DATE_SUB": lambda args: exp.TsOrDsAdd( 240 this=seq_get(args, 0), 241 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 242 unit=exp.Literal.string("DAY"), 243 ), 244 "DATEDIFF": lambda args: exp.DateDiff( 245 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 246 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 247 ), 248 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 249 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 250 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 251 "LOCATE": locate_to_strposition, 252 "MAP": parse_var_map, 253 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 254 "PERCENTILE": exp.Quantile.from_arg_list, 255 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 256 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 257 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 258 ), 259 "SIZE": exp.ArraySize.from_arg_list, 260 "SPLIT": exp.RegexpSplit.from_arg_list, 261 "STR_TO_MAP": lambda args: exp.StrToMap( 262 this=seq_get(args, 0), 263 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 264 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 265 ), 266 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 267 "TO_JSON": exp.JSONFormat.from_arg_list, 268 "UNBASE64": exp.FromBase64.from_arg_list, 269 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 270 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 271 } 272 273 NO_PAREN_FUNCTION_PARSERS = { 274 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 275 "TRANSFORM": lambda self: self._parse_transform(), 276 } 277 278 PROPERTY_PARSERS = { 279 **parser.Parser.PROPERTY_PARSERS, 280 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 281 expressions=self._parse_wrapped_csv(self._parse_property) 282 ), 283 } 284 285 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 286 if not self._match(TokenType.L_PAREN, advance=False): 287 self._retreat(self._index - 1) 288 return None 289 290 args = self._parse_wrapped_csv(self._parse_lambda) 291 row_format_before = self._parse_row_format(match_row=True) 292 293 record_writer = None 294 if self._match_text_seq("RECORDWRITER"): 295 record_writer = self._parse_string() 296 297 if not self._match(TokenType.USING): 298 return exp.Transform.from_arg_list(args) 299 300 command_script = self._parse_string() 301 302 self._match(TokenType.ALIAS) 303 schema = self._parse_schema() 304 305 row_format_after = self._parse_row_format(match_row=True) 306 record_reader = None 307 if self._match_text_seq("RECORDREADER"): 308 record_reader = self._parse_string() 309 310 return self.expression( 311 exp.QueryTransform, 312 expressions=args, 313 command_script=command_script, 314 schema=schema, 315 row_format_before=row_format_before, 316 record_writer=record_writer, 317 row_format_after=row_format_after, 318 record_reader=record_reader, 319 ) 320 321 def _parse_types( 322 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 323 ) -> t.Optional[exp.Expression]: 324 """ 325 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 326 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 327 328 spark-sql (default)> select cast(1234 as varchar(2)); 329 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 330 char/varchar type and simply treats them as string type. Please use string type 331 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 332 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 333 334 1234 335 Time taken: 4.265 seconds, Fetched 1 row(s) 336 337 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 338 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 339 340 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 341 """ 342 this = super()._parse_types( 343 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 344 ) 345 346 if this and not schema: 347 return this.transform( 348 lambda node: node.replace(exp.DataType.build("text")) 349 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 350 else node, 351 copy=False, 352 ) 353 354 return this 355 356 def _parse_partition_and_order( 357 self, 358 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 359 return ( 360 self._parse_csv(self._parse_conjunction) 361 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 362 else [], 363 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 364 ) 365 366 class Generator(generator.Generator): 367 LIMIT_FETCH = "LIMIT" 368 TABLESAMPLE_WITH_METHOD = False 369 TABLESAMPLE_SIZE_IS_PERCENT = True 370 JOIN_HINTS = False 371 TABLE_HINTS = False 372 QUERY_HINTS = False 373 INDEX_ON = "ON TABLE" 374 EXTRACT_ALLOWS_QUOTES = False 375 NVL2_SUPPORTED = False 376 377 TYPE_MAPPING = { 378 **generator.Generator.TYPE_MAPPING, 379 exp.DataType.Type.BIT: "BOOLEAN", 380 exp.DataType.Type.DATETIME: "TIMESTAMP", 381 exp.DataType.Type.TEXT: "STRING", 382 exp.DataType.Type.TIME: "TIMESTAMP", 383 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 384 exp.DataType.Type.VARBINARY: "BINARY", 385 } 386 387 TRANSFORMS = { 388 **generator.Generator.TRANSFORMS, 389 exp.Group: transforms.preprocess([transforms.unalias_group]), 390 exp.Select: transforms.preprocess( 391 [ 392 transforms.eliminate_qualify, 393 transforms.eliminate_distinct_on, 394 transforms.unnest_to_explode, 395 ] 396 ), 397 exp.Property: _property_sql, 398 exp.AnyValue: rename_func("FIRST"), 399 exp.ApproxDistinct: approx_count_distinct_sql, 400 exp.ArrayConcat: rename_func("CONCAT"), 401 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 402 exp.ArraySize: rename_func("SIZE"), 403 exp.ArraySort: _array_sort_sql, 404 exp.With: no_recursive_cte_sql, 405 exp.DateAdd: _add_date_sql, 406 exp.DateDiff: _date_diff_sql, 407 exp.DateStrToDate: rename_func("TO_DATE"), 408 exp.DateSub: _add_date_sql, 409 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 410 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 411 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 412 exp.FromBase64: rename_func("UNBASE64"), 413 exp.If: if_sql, 414 exp.ILike: no_ilike_sql, 415 exp.IsNan: rename_func("ISNAN"), 416 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 417 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 418 exp.JSONFormat: _json_format_sql, 419 exp.Left: left_to_substring_sql, 420 exp.Map: var_map_sql, 421 exp.Max: max_or_greatest, 422 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 423 exp.Min: min_or_least, 424 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 425 exp.NotNullColumnConstraint: lambda self, e: "" 426 if e.args.get("allow_null") 427 else "NOT NULL", 428 exp.VarMap: var_map_sql, 429 exp.Create: create_with_partitions_sql, 430 exp.Quantile: rename_func("PERCENTILE"), 431 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 432 exp.RegexpExtract: regexp_extract_sql, 433 exp.RegexpReplace: regexp_replace_sql, 434 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 435 exp.RegexpSplit: rename_func("SPLIT"), 436 exp.Right: right_to_substring_sql, 437 exp.SafeDivide: no_safe_divide_sql, 438 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 439 exp.SetAgg: rename_func("COLLECT_SET"), 440 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 441 exp.StrPosition: strposition_to_locate_sql, 442 exp.StrToDate: _str_to_date_sql, 443 exp.StrToTime: _str_to_time_sql, 444 exp.StrToUnix: _str_to_unix_sql, 445 exp.StructExtract: struct_extract_sql, 446 exp.TimeStrToDate: rename_func("TO_DATE"), 447 exp.TimeStrToTime: timestrtotime_sql, 448 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 449 exp.TimeToStr: _time_to_str, 450 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 451 exp.ToBase64: rename_func("BASE64"), 452 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 453 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 454 exp.TsOrDsToDate: _to_date_sql, 455 exp.TryCast: no_trycast_sql, 456 exp.UnixToStr: lambda self, e: self.func( 457 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 458 ), 459 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 460 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 461 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 462 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 463 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 464 exp.LastDateOfMonth: rename_func("LAST_DAY"), 465 exp.National: lambda self, e: self.national_sql(e, prefix=""), 466 } 467 468 PROPERTIES_LOCATION = { 469 **generator.Generator.PROPERTIES_LOCATION, 470 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 471 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 472 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 473 } 474 475 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 476 serde_props = self.sql(expression, "serde_properties") 477 serde_props = f" {serde_props}" if serde_props else "" 478 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 479 480 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 481 return self.func( 482 "COLLECT_LIST", 483 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 484 ) 485 486 def with_properties(self, properties: exp.Properties) -> str: 487 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 488 489 def datatype_sql(self, expression: exp.DataType) -> str: 490 if ( 491 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 492 and not expression.expressions 493 ): 494 expression = exp.DataType.build("text") 495 elif expression.this in exp.DataType.TEMPORAL_TYPES: 496 expression = exp.DataType.build(expression.this) 497 elif expression.is_type("float"): 498 size_expression = expression.find(exp.DataTypeParam) 499 if size_expression: 500 size = int(size_expression.name) 501 expression = ( 502 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 503 ) 504 505 return super().datatype_sql(expression)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
151class Hive(Dialect): 152 ALIAS_POST_TABLESAMPLE = True 153 IDENTIFIERS_CAN_START_WITH_DIGIT = True 154 155 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 156 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 157 158 TIME_MAPPING = { 159 "y": "%Y", 160 "Y": "%Y", 161 "YYYY": "%Y", 162 "yyyy": "%Y", 163 "YY": "%y", 164 "yy": "%y", 165 "MMMM": "%B", 166 "MMM": "%b", 167 "MM": "%m", 168 "M": "%-m", 169 "dd": "%d", 170 "d": "%-d", 171 "HH": "%H", 172 "H": "%-H", 173 "hh": "%I", 174 "h": "%-I", 175 "mm": "%M", 176 "m": "%-M", 177 "ss": "%S", 178 "s": "%-S", 179 "SSSSSS": "%f", 180 "a": "%p", 181 "DD": "%j", 182 "D": "%-j", 183 "E": "%a", 184 "EE": "%a", 185 "EEE": "%a", 186 "EEEE": "%A", 187 } 188 189 DATE_FORMAT = "'yyyy-MM-dd'" 190 DATEINT_FORMAT = "'yyyyMMdd'" 191 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 192 193 class Tokenizer(tokens.Tokenizer): 194 QUOTES = ["'", '"'] 195 IDENTIFIERS = ["`"] 196 STRING_ESCAPES = ["\\"] 197 ENCODE = "utf-8" 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ADD ARCHIVE": TokenType.COMMAND, 202 "ADD ARCHIVES": TokenType.COMMAND, 203 "ADD FILE": TokenType.COMMAND, 204 "ADD FILES": TokenType.COMMAND, 205 "ADD JAR": TokenType.COMMAND, 206 "ADD JARS": TokenType.COMMAND, 207 "MSCK REPAIR": TokenType.COMMAND, 208 "REFRESH": TokenType.COMMAND, 209 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 210 } 211 212 NUMERIC_LITERALS = { 213 "L": "BIGINT", 214 "S": "SMALLINT", 215 "Y": "TINYINT", 216 "D": "DOUBLE", 217 "F": "FLOAT", 218 "BD": "DECIMAL", 219 } 220 221 class Parser(parser.Parser): 222 LOG_DEFAULTS_TO_LN = True 223 STRICT_CAST = False 224 SUPPORTS_USER_DEFINED_TYPES = False 225 226 FUNCTIONS = { 227 **parser.Parser.FUNCTIONS, 228 "BASE64": exp.ToBase64.from_arg_list, 229 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 230 "COLLECT_SET": exp.SetAgg.from_arg_list, 231 "DATE_ADD": lambda args: exp.TsOrDsAdd( 232 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 233 ), 234 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 235 [ 236 exp.TimeStrToTime(this=seq_get(args, 0)), 237 seq_get(args, 1), 238 ] 239 ), 240 "DATE_SUB": lambda args: exp.TsOrDsAdd( 241 this=seq_get(args, 0), 242 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 243 unit=exp.Literal.string("DAY"), 244 ), 245 "DATEDIFF": lambda args: exp.DateDiff( 246 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 247 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 248 ), 249 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 250 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 251 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 252 "LOCATE": locate_to_strposition, 253 "MAP": parse_var_map, 254 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 255 "PERCENTILE": exp.Quantile.from_arg_list, 256 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 257 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 258 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 259 ), 260 "SIZE": exp.ArraySize.from_arg_list, 261 "SPLIT": exp.RegexpSplit.from_arg_list, 262 "STR_TO_MAP": lambda args: exp.StrToMap( 263 this=seq_get(args, 0), 264 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 265 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 266 ), 267 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 268 "TO_JSON": exp.JSONFormat.from_arg_list, 269 "UNBASE64": exp.FromBase64.from_arg_list, 270 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 271 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 272 } 273 274 NO_PAREN_FUNCTION_PARSERS = { 275 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 276 "TRANSFORM": lambda self: self._parse_transform(), 277 } 278 279 PROPERTY_PARSERS = { 280 **parser.Parser.PROPERTY_PARSERS, 281 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 282 expressions=self._parse_wrapped_csv(self._parse_property) 283 ), 284 } 285 286 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 287 if not self._match(TokenType.L_PAREN, advance=False): 288 self._retreat(self._index - 1) 289 return None 290 291 args = self._parse_wrapped_csv(self._parse_lambda) 292 row_format_before = self._parse_row_format(match_row=True) 293 294 record_writer = None 295 if self._match_text_seq("RECORDWRITER"): 296 record_writer = self._parse_string() 297 298 if not self._match(TokenType.USING): 299 return exp.Transform.from_arg_list(args) 300 301 command_script = self._parse_string() 302 303 self._match(TokenType.ALIAS) 304 schema = self._parse_schema() 305 306 row_format_after = self._parse_row_format(match_row=True) 307 record_reader = None 308 if self._match_text_seq("RECORDREADER"): 309 record_reader = self._parse_string() 310 311 return self.expression( 312 exp.QueryTransform, 313 expressions=args, 314 command_script=command_script, 315 schema=schema, 316 row_format_before=row_format_before, 317 record_writer=record_writer, 318 row_format_after=row_format_after, 319 record_reader=record_reader, 320 ) 321 322 def _parse_types( 323 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 324 ) -> t.Optional[exp.Expression]: 325 """ 326 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 327 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 328 329 spark-sql (default)> select cast(1234 as varchar(2)); 330 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 331 char/varchar type and simply treats them as string type. Please use string type 332 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 333 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 334 335 1234 336 Time taken: 4.265 seconds, Fetched 1 row(s) 337 338 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 339 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 340 341 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 342 """ 343 this = super()._parse_types( 344 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 345 ) 346 347 if this and not schema: 348 return this.transform( 349 lambda node: node.replace(exp.DataType.build("text")) 350 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 351 else node, 352 copy=False, 353 ) 354 355 return this 356 357 def _parse_partition_and_order( 358 self, 359 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 360 return ( 361 self._parse_csv(self._parse_conjunction) 362 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 363 else [], 364 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 365 ) 366 367 class Generator(generator.Generator): 368 LIMIT_FETCH = "LIMIT" 369 TABLESAMPLE_WITH_METHOD = False 370 TABLESAMPLE_SIZE_IS_PERCENT = True 371 JOIN_HINTS = False 372 TABLE_HINTS = False 373 QUERY_HINTS = False 374 INDEX_ON = "ON TABLE" 375 EXTRACT_ALLOWS_QUOTES = False 376 NVL2_SUPPORTED = False 377 378 TYPE_MAPPING = { 379 **generator.Generator.TYPE_MAPPING, 380 exp.DataType.Type.BIT: "BOOLEAN", 381 exp.DataType.Type.DATETIME: "TIMESTAMP", 382 exp.DataType.Type.TEXT: "STRING", 383 exp.DataType.Type.TIME: "TIMESTAMP", 384 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 385 exp.DataType.Type.VARBINARY: "BINARY", 386 } 387 388 TRANSFORMS = { 389 **generator.Generator.TRANSFORMS, 390 exp.Group: transforms.preprocess([transforms.unalias_group]), 391 exp.Select: transforms.preprocess( 392 [ 393 transforms.eliminate_qualify, 394 transforms.eliminate_distinct_on, 395 transforms.unnest_to_explode, 396 ] 397 ), 398 exp.Property: _property_sql, 399 exp.AnyValue: rename_func("FIRST"), 400 exp.ApproxDistinct: approx_count_distinct_sql, 401 exp.ArrayConcat: rename_func("CONCAT"), 402 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 403 exp.ArraySize: rename_func("SIZE"), 404 exp.ArraySort: _array_sort_sql, 405 exp.With: no_recursive_cte_sql, 406 exp.DateAdd: _add_date_sql, 407 exp.DateDiff: _date_diff_sql, 408 exp.DateStrToDate: rename_func("TO_DATE"), 409 exp.DateSub: _add_date_sql, 410 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 411 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 412 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 413 exp.FromBase64: rename_func("UNBASE64"), 414 exp.If: if_sql, 415 exp.ILike: no_ilike_sql, 416 exp.IsNan: rename_func("ISNAN"), 417 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 418 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 419 exp.JSONFormat: _json_format_sql, 420 exp.Left: left_to_substring_sql, 421 exp.Map: var_map_sql, 422 exp.Max: max_or_greatest, 423 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 424 exp.Min: min_or_least, 425 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 426 exp.NotNullColumnConstraint: lambda self, e: "" 427 if e.args.get("allow_null") 428 else "NOT NULL", 429 exp.VarMap: var_map_sql, 430 exp.Create: create_with_partitions_sql, 431 exp.Quantile: rename_func("PERCENTILE"), 432 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 433 exp.RegexpExtract: regexp_extract_sql, 434 exp.RegexpReplace: regexp_replace_sql, 435 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 436 exp.RegexpSplit: rename_func("SPLIT"), 437 exp.Right: right_to_substring_sql, 438 exp.SafeDivide: no_safe_divide_sql, 439 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 440 exp.SetAgg: rename_func("COLLECT_SET"), 441 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 442 exp.StrPosition: strposition_to_locate_sql, 443 exp.StrToDate: _str_to_date_sql, 444 exp.StrToTime: _str_to_time_sql, 445 exp.StrToUnix: _str_to_unix_sql, 446 exp.StructExtract: struct_extract_sql, 447 exp.TimeStrToDate: rename_func("TO_DATE"), 448 exp.TimeStrToTime: timestrtotime_sql, 449 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 450 exp.TimeToStr: _time_to_str, 451 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 452 exp.ToBase64: rename_func("BASE64"), 453 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 454 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 455 exp.TsOrDsToDate: _to_date_sql, 456 exp.TryCast: no_trycast_sql, 457 exp.UnixToStr: lambda self, e: self.func( 458 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 459 ), 460 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 461 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 462 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 463 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 464 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 465 exp.LastDateOfMonth: rename_func("LAST_DAY"), 466 exp.National: lambda self, e: self.national_sql(e, prefix=""), 467 } 468 469 PROPERTIES_LOCATION = { 470 **generator.Generator.PROPERTIES_LOCATION, 471 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 472 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 473 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 474 } 475 476 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 477 serde_props = self.sql(expression, "serde_properties") 478 serde_props = f" {serde_props}" if serde_props else "" 479 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 480 481 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 482 return self.func( 483 "COLLECT_LIST", 484 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 485 ) 486 487 def with_properties(self, properties: exp.Properties) -> str: 488 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 489 490 def datatype_sql(self, expression: exp.DataType) -> str: 491 if ( 492 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 493 and not expression.expressions 494 ): 495 expression = exp.DataType.build("text") 496 elif expression.this in exp.DataType.TEMPORAL_TYPES: 497 expression = exp.DataType.build(expression.this) 498 elif expression.is_type("float"): 499 size_expression = expression.find(exp.DataTypeParam) 500 if size_expression: 501 size = int(size_expression.name) 502 expression = ( 503 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 504 ) 505 506 return super().datatype_sql(expression)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'sqlglot.dialects.hive.Hive.Tokenizer'>
parser_class =
<class 'sqlglot.dialects.hive.Hive.Parser'>
generator_class =
<class 'sqlglot.dialects.hive.Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
193 class Tokenizer(tokens.Tokenizer): 194 QUOTES = ["'", '"'] 195 IDENTIFIERS = ["`"] 196 STRING_ESCAPES = ["\\"] 197 ENCODE = "utf-8" 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ADD ARCHIVE": TokenType.COMMAND, 202 "ADD ARCHIVES": TokenType.COMMAND, 203 "ADD FILE": TokenType.COMMAND, 204 "ADD FILES": TokenType.COMMAND, 205 "ADD JAR": TokenType.COMMAND, 206 "ADD JARS": TokenType.COMMAND, 207 "MSCK REPAIR": TokenType.COMMAND, 208 "REFRESH": TokenType.COMMAND, 209 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 210 } 211 212 NUMERIC_LITERALS = { 213 "L": "BIGINT", 214 "S": "SMALLINT", 215 "Y": "TINYINT", 216 "D": "DOUBLE", 217 "F": "FLOAT", 218 "BD": "DECIMAL", 219 }
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.BIGINT: 'BIGINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.COMMAND: 'COMMAND'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
221 class Parser(parser.Parser): 222 LOG_DEFAULTS_TO_LN = True 223 STRICT_CAST = False 224 SUPPORTS_USER_DEFINED_TYPES = False 225 226 FUNCTIONS = { 227 **parser.Parser.FUNCTIONS, 228 "BASE64": exp.ToBase64.from_arg_list, 229 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 230 "COLLECT_SET": exp.SetAgg.from_arg_list, 231 "DATE_ADD": lambda args: exp.TsOrDsAdd( 232 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 233 ), 234 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 235 [ 236 exp.TimeStrToTime(this=seq_get(args, 0)), 237 seq_get(args, 1), 238 ] 239 ), 240 "DATE_SUB": lambda args: exp.TsOrDsAdd( 241 this=seq_get(args, 0), 242 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 243 unit=exp.Literal.string("DAY"), 244 ), 245 "DATEDIFF": lambda args: exp.DateDiff( 246 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 247 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 248 ), 249 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 250 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 251 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 252 "LOCATE": locate_to_strposition, 253 "MAP": parse_var_map, 254 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 255 "PERCENTILE": exp.Quantile.from_arg_list, 256 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 257 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 258 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 259 ), 260 "SIZE": exp.ArraySize.from_arg_list, 261 "SPLIT": exp.RegexpSplit.from_arg_list, 262 "STR_TO_MAP": lambda args: exp.StrToMap( 263 this=seq_get(args, 0), 264 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 265 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 266 ), 267 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 268 "TO_JSON": exp.JSONFormat.from_arg_list, 269 "UNBASE64": exp.FromBase64.from_arg_list, 270 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 271 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 272 } 273 274 NO_PAREN_FUNCTION_PARSERS = { 275 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 276 "TRANSFORM": lambda self: self._parse_transform(), 277 } 278 279 PROPERTY_PARSERS = { 280 **parser.Parser.PROPERTY_PARSERS, 281 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 282 expressions=self._parse_wrapped_csv(self._parse_property) 283 ), 284 } 285 286 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 287 if not self._match(TokenType.L_PAREN, advance=False): 288 self._retreat(self._index - 1) 289 return None 290 291 args = self._parse_wrapped_csv(self._parse_lambda) 292 row_format_before = self._parse_row_format(match_row=True) 293 294 record_writer = None 295 if self._match_text_seq("RECORDWRITER"): 296 record_writer = self._parse_string() 297 298 if not self._match(TokenType.USING): 299 return exp.Transform.from_arg_list(args) 300 301 command_script = self._parse_string() 302 303 self._match(TokenType.ALIAS) 304 schema = self._parse_schema() 305 306 row_format_after = self._parse_row_format(match_row=True) 307 record_reader = None 308 if self._match_text_seq("RECORDREADER"): 309 record_reader = self._parse_string() 310 311 return self.expression( 312 exp.QueryTransform, 313 expressions=args, 314 command_script=command_script, 315 schema=schema, 316 row_format_before=row_format_before, 317 record_writer=record_writer, 318 row_format_after=row_format_after, 319 record_reader=record_reader, 320 ) 321 322 def _parse_types( 323 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 324 ) -> t.Optional[exp.Expression]: 325 """ 326 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 327 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 328 329 spark-sql (default)> select cast(1234 as varchar(2)); 330 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 331 char/varchar type and simply treats them as string type. Please use string type 332 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 333 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 334 335 1234 336 Time taken: 4.265 seconds, Fetched 1 row(s) 337 338 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 339 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 340 341 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 342 """ 343 this = super()._parse_types( 344 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 345 ) 346 347 if this and not schema: 348 return this.transform( 349 lambda node: node.replace(exp.DataType.build("text")) 350 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 351 else node, 352 copy=False, 353 ) 354 355 return this 356 357 def _parse_partition_and_order( 358 self, 359 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 360 return ( 361 self._parse_csv(self._parse_conjunction) 362 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 363 else [], 364 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 365 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KINDS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
367 class Generator(generator.Generator): 368 LIMIT_FETCH = "LIMIT" 369 TABLESAMPLE_WITH_METHOD = False 370 TABLESAMPLE_SIZE_IS_PERCENT = True 371 JOIN_HINTS = False 372 TABLE_HINTS = False 373 QUERY_HINTS = False 374 INDEX_ON = "ON TABLE" 375 EXTRACT_ALLOWS_QUOTES = False 376 NVL2_SUPPORTED = False 377 378 TYPE_MAPPING = { 379 **generator.Generator.TYPE_MAPPING, 380 exp.DataType.Type.BIT: "BOOLEAN", 381 exp.DataType.Type.DATETIME: "TIMESTAMP", 382 exp.DataType.Type.TEXT: "STRING", 383 exp.DataType.Type.TIME: "TIMESTAMP", 384 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 385 exp.DataType.Type.VARBINARY: "BINARY", 386 } 387 388 TRANSFORMS = { 389 **generator.Generator.TRANSFORMS, 390 exp.Group: transforms.preprocess([transforms.unalias_group]), 391 exp.Select: transforms.preprocess( 392 [ 393 transforms.eliminate_qualify, 394 transforms.eliminate_distinct_on, 395 transforms.unnest_to_explode, 396 ] 397 ), 398 exp.Property: _property_sql, 399 exp.AnyValue: rename_func("FIRST"), 400 exp.ApproxDistinct: approx_count_distinct_sql, 401 exp.ArrayConcat: rename_func("CONCAT"), 402 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 403 exp.ArraySize: rename_func("SIZE"), 404 exp.ArraySort: _array_sort_sql, 405 exp.With: no_recursive_cte_sql, 406 exp.DateAdd: _add_date_sql, 407 exp.DateDiff: _date_diff_sql, 408 exp.DateStrToDate: rename_func("TO_DATE"), 409 exp.DateSub: _add_date_sql, 410 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 411 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 412 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 413 exp.FromBase64: rename_func("UNBASE64"), 414 exp.If: if_sql, 415 exp.ILike: no_ilike_sql, 416 exp.IsNan: rename_func("ISNAN"), 417 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 418 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 419 exp.JSONFormat: _json_format_sql, 420 exp.Left: left_to_substring_sql, 421 exp.Map: var_map_sql, 422 exp.Max: max_or_greatest, 423 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 424 exp.Min: min_or_least, 425 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 426 exp.NotNullColumnConstraint: lambda self, e: "" 427 if e.args.get("allow_null") 428 else "NOT NULL", 429 exp.VarMap: var_map_sql, 430 exp.Create: create_with_partitions_sql, 431 exp.Quantile: rename_func("PERCENTILE"), 432 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 433 exp.RegexpExtract: regexp_extract_sql, 434 exp.RegexpReplace: regexp_replace_sql, 435 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 436 exp.RegexpSplit: rename_func("SPLIT"), 437 exp.Right: right_to_substring_sql, 438 exp.SafeDivide: no_safe_divide_sql, 439 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 440 exp.SetAgg: rename_func("COLLECT_SET"), 441 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 442 exp.StrPosition: strposition_to_locate_sql, 443 exp.StrToDate: _str_to_date_sql, 444 exp.StrToTime: _str_to_time_sql, 445 exp.StrToUnix: _str_to_unix_sql, 446 exp.StructExtract: struct_extract_sql, 447 exp.TimeStrToDate: rename_func("TO_DATE"), 448 exp.TimeStrToTime: timestrtotime_sql, 449 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 450 exp.TimeToStr: _time_to_str, 451 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 452 exp.ToBase64: rename_func("BASE64"), 453 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 454 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 455 exp.TsOrDsToDate: _to_date_sql, 456 exp.TryCast: no_trycast_sql, 457 exp.UnixToStr: lambda self, e: self.func( 458 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 459 ), 460 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 461 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 462 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 463 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 464 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 465 exp.LastDateOfMonth: rename_func("LAST_DAY"), 466 exp.National: lambda self, e: self.national_sql(e, prefix=""), 467 } 468 469 PROPERTIES_LOCATION = { 470 **generator.Generator.PROPERTIES_LOCATION, 471 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 472 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 473 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 474 } 475 476 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 477 serde_props = self.sql(expression, "serde_properties") 478 serde_props = f" {serde_props}" if serde_props else "" 479 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 480 481 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 482 return self.func( 483 "COLLECT_LIST", 484 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 485 ) 486 487 def with_properties(self, properties: exp.Properties) -> str: 488 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 489 490 def datatype_sql(self, expression: exp.DataType) -> str: 491 if ( 492 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 493 and not expression.expressions 494 ): 495 expression = exp.DataType.build("text") 496 elif expression.this in exp.DataType.TEMPORAL_TYPES: 497 expression = exp.DataType.build(expression.this) 498 elif expression.is_type("float"): 499 size_expression = expression.find(exp.DataTypeParam) 500 if size_expression: 501 size = int(size_expression.name) 502 expression = ( 503 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 504 ) 505 506 return super().datatype_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalDayToSecondSpan'>: 'DAY TO SECOND', <class 'sqlglot.expressions.IntervalYearToMonthSpan'>: 'YEAR TO MONTH', <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function create_with_partitions_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>}
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
490 def datatype_sql(self, expression: exp.DataType) -> str: 491 if ( 492 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 493 and not expression.expressions 494 ): 495 expression = exp.DataType.build("text") 496 elif expression.this in exp.DataType.TEMPORAL_TYPES: 497 expression = exp.DataType.build(expression.this) 498 elif expression.is_type("float"): 499 size_expression = expression.find(exp.DataTypeParam) 500 if size_expression: 501 size = int(size_expression.name) 502 expression = ( 503 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 504 ) 505 506 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
256 @classmethod 257 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 258 """Checks if text can be identified given an identify option. 259 260 Args: 261 text: The text to check. 262 identify: 263 "always" or `True`: Always returns true. 264 "safe": True if the identifier is case-insensitive. 265 266 Returns: 267 Whether or not the given text can be identified. 268 """ 269 if identify is True or identify == "always": 270 return True 271 272 if identify == "safe": 273 return not cls.case_sensitive(text) 274 275 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'sqlglot.dialects.hive.Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- ESCAPE_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql