1# bigquery_view_parser.py 2# 3# A parser to extract table names from BigQuery view definitions. 4# This is based on the `select_parser.py` sample in pyparsing: 5# https://github.com/pyparsing/pyparsing/blob/master/examples/select_parser.py 6# 7# Michael Smedberg 8# 9 10from pyparsing import ParserElement, Suppress, Forward, CaselessKeyword 11from pyparsing import MatchFirst, alphas, alphanums, Combine, Word 12from pyparsing import QuotedString, CharsNotIn, Optional, Group, ZeroOrMore 13from pyparsing import oneOf, delimitedList, restOfLine, cStyleComment 14from pyparsing import infixNotation, opAssoc, Regex, nums 15 16ParserElement.enablePackrat() 17 18 19class BigQueryViewParser: 20 """Parser to extract table info from BigQuery view definitions""" 21 22 _parser = None 23 _table_identifiers = set() 24 _with_aliases = set() 25 26 def get_table_names(self, sql_stmt): 27 table_identifiers, with_aliases = self._parse(sql_stmt) 28 29 # Table names and alias names might differ by case, but that's not 30 # relevant- aliases are not case sensitive 31 lower_aliases = BigQueryViewParser.lowercase_set_of_tuples(with_aliases) 32 tables = { 33 x 34 for x in table_identifiers 35 if not BigQueryViewParser.lowercase_of_tuple(x) in lower_aliases 36 } 37 38 # Table names ARE case sensitive as described at 39 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 40 return tables 41 42 def _parse(self, sql_stmt): 43 BigQueryViewParser._table_identifiers.clear() 44 BigQueryViewParser._with_aliases.clear() 45 BigQueryViewParser._get_parser().parseString(sql_stmt) 46 47 return (BigQueryViewParser._table_identifiers, BigQueryViewParser._with_aliases) 48 49 @classmethod 50 def lowercase_of_tuple(cls, tuple_to_lowercase): 51 return tuple(x.lower() if x else None for x in tuple_to_lowercase) 52 53 @classmethod 54 def lowercase_set_of_tuples(cls, set_of_tuples): 55 return {BigQueryViewParser.lowercase_of_tuple(x) for x in set_of_tuples} 56 57 @classmethod 58 def _get_parser(cls): 59 if cls._parser is not None: 60 return cls._parser 61 62 ParserElement.enablePackrat() 63 64 LPAR, RPAR, COMMA, LBRACKET, RBRACKET, LT, GT = map(Suppress, "(),[]<>") 65 ungrouped_select_stmt = Forward().setName("select statement") 66 67 # keywords 68 ( 69 UNION, 70 ALL, 71 AND, 72 INTERSECT, 73 EXCEPT, 74 COLLATE, 75 ASC, 76 DESC, 77 ON, 78 USING, 79 NATURAL, 80 INNER, 81 CROSS, 82 LEFT, 83 RIGHT, 84 OUTER, 85 FULL, 86 JOIN, 87 AS, 88 INDEXED, 89 NOT, 90 SELECT, 91 DISTINCT, 92 FROM, 93 WHERE, 94 GROUP, 95 BY, 96 HAVING, 97 ORDER, 98 BY, 99 LIMIT, 100 OFFSET, 101 OR, 102 CAST, 103 ISNULL, 104 NOTNULL, 105 NULL, 106 IS, 107 BETWEEN, 108 ELSE, 109 END, 110 CASE, 111 WHEN, 112 THEN, 113 EXISTS, 114 COLLATE, 115 IN, 116 LIKE, 117 GLOB, 118 REGEXP, 119 MATCH, 120 ESCAPE, 121 CURRENT_TIME, 122 CURRENT_DATE, 123 CURRENT_TIMESTAMP, 124 WITH, 125 EXTRACT, 126 PARTITION, 127 ROWS, 128 RANGE, 129 UNBOUNDED, 130 PRECEDING, 131 CURRENT, 132 ROW, 133 FOLLOWING, 134 OVER, 135 INTERVAL, 136 DATE_ADD, 137 DATE_SUB, 138 ADDDATE, 139 SUBDATE, 140 REGEXP_EXTRACT, 141 SPLIT, 142 ORDINAL, 143 FIRST_VALUE, 144 LAST_VALUE, 145 NTH_VALUE, 146 LEAD, 147 LAG, 148 PERCENTILE_CONT, 149 PRECENTILE_DISC, 150 RANK, 151 DENSE_RANK, 152 PERCENT_RANK, 153 CUME_DIST, 154 NTILE, 155 ROW_NUMBER, 156 DATE, 157 TIME, 158 DATETIME, 159 TIMESTAMP, 160 UNNEST, 161 INT64, 162 NUMERIC, 163 FLOAT64, 164 BOOL, 165 BYTES, 166 GEOGRAPHY, 167 ARRAY, 168 STRUCT, 169 SAFE_CAST, 170 ANY_VALUE, 171 ARRAY_AGG, 172 ARRAY_CONCAT_AGG, 173 AVG, 174 BIT_AND, 175 BIT_OR, 176 BIT_XOR, 177 COUNT, 178 COUNTIF, 179 LOGICAL_AND, 180 LOGICAL_OR, 181 MAX, 182 MIN, 183 STRING_AGG, 184 SUM, 185 CORR, 186 COVAR_POP, 187 COVAR_SAMP, 188 STDDEV_POP, 189 STDDEV_SAMP, 190 STDDEV, 191 VAR_POP, 192 VAR_SAMP, 193 VARIANCE, 194 TIMESTAMP_ADD, 195 TIMESTAMP_SUB, 196 GENERATE_ARRAY, 197 GENERATE_DATE_ARRAY, 198 GENERATE_TIMESTAMP_ARRAY, 199 FOR, 200 SYSTEMTIME, 201 AS, 202 OF, 203 WINDOW, 204 RESPECT, 205 IGNORE, 206 NULLS, 207 ) = map( 208 CaselessKeyword, 209 """ 210 UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING, 211 NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED, 212 NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY, 213 LIMIT, OFFSET, OR, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE, 214 END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP, 215 MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, WITH, 216 EXTRACT, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, CURRENT, 217 ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD, DATE_SUB, ADDDATE, 218 SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE, LAST_VALUE, 219 NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK, 220 DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME, 221 DATETIME, TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES, 222 GEOGRAPHY, ARRAY, STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG, 223 ARRAY_CONCAT_AGG, AVG, BIT_AND, BIT_OR, BIT_XOR, COUNT, COUNTIF, 224 LOGICAL_AND, LOGICAL_OR, MAX, MIN, STRING_AGG, SUM, CORR, 225 COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP, STDDEV, VAR_POP, 226 VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB, GENERATE_ARRAY, 227 GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR, SYSTEMTIME, AS, 228 OF, WINDOW, RESPECT, IGNORE, NULLS 229 """.replace( 230 ",", "" 231 ).split(), 232 ) 233 234 keyword_nonfunctions = MatchFirst( 235 ( 236 UNION, 237 ALL, 238 INTERSECT, 239 EXCEPT, 240 COLLATE, 241 ASC, 242 DESC, 243 ON, 244 USING, 245 NATURAL, 246 INNER, 247 CROSS, 248 LEFT, 249 RIGHT, 250 OUTER, 251 FULL, 252 JOIN, 253 AS, 254 INDEXED, 255 NOT, 256 SELECT, 257 DISTINCT, 258 FROM, 259 WHERE, 260 GROUP, 261 BY, 262 HAVING, 263 ORDER, 264 BY, 265 LIMIT, 266 OFFSET, 267 CAST, 268 ISNULL, 269 NOTNULL, 270 NULL, 271 IS, 272 BETWEEN, 273 ELSE, 274 END, 275 CASE, 276 WHEN, 277 THEN, 278 EXISTS, 279 COLLATE, 280 IN, 281 LIKE, 282 GLOB, 283 REGEXP, 284 MATCH, 285 STRUCT, 286 WINDOW, 287 ) 288 ) 289 290 keyword = keyword_nonfunctions | MatchFirst( 291 ( 292 ESCAPE, 293 CURRENT_TIME, 294 CURRENT_DATE, 295 CURRENT_TIMESTAMP, 296 DATE_ADD, 297 DATE_SUB, 298 ADDDATE, 299 SUBDATE, 300 INTERVAL, 301 STRING_AGG, 302 REGEXP_EXTRACT, 303 SPLIT, 304 ORDINAL, 305 UNNEST, 306 SAFE_CAST, 307 PARTITION, 308 TIMESTAMP_ADD, 309 TIMESTAMP_SUB, 310 ARRAY, 311 GENERATE_ARRAY, 312 GENERATE_DATE_ARRAY, 313 GENERATE_TIMESTAMP_ARRAY, 314 ) 315 ) 316 317 identifier_word = Word(alphas + "_@#", alphanums + "@$#_") 318 identifier = ~keyword + identifier_word.copy() 319 collation_name = identifier.copy() 320 # NOTE: Column names can be keywords. Doc says they cannot, but in practice it seems to work. 321 column_name = identifier_word.copy() 322 qualified_column_name = Combine( 323 column_name 324 + (ZeroOrMore(" ") + "." + ZeroOrMore(" ") + column_name) * (0, 6) 325 ) 326 # NOTE: As with column names, column aliases can be keywords, e.g. functions like `current_time`. Other 327 # keywords, e.g. `from` make parsing pretty difficult (e.g. "SELECT a from from b" is confusing.) 328 column_alias = ~keyword_nonfunctions + column_name.copy() 329 table_name = identifier.copy() 330 table_alias = identifier.copy() 331 index_name = identifier.copy() 332 function_name = identifier.copy() 333 parameter_name = identifier.copy() 334 # NOTE: The expression in a CASE statement can be an integer. E.g. this is valid SQL: 335 # select CASE 1 WHEN 1 THEN -1 ELSE -2 END from test_table 336 unquoted_case_identifier = ~keyword + Word(alphanums + "$_") 337 quoted_case_identifier = ~keyword + ( 338 QuotedString('"') ^ Suppress("`") + CharsNotIn("`") + Suppress("`") 339 ) 340 case_identifier = quoted_case_identifier | unquoted_case_identifier 341 case_expr = ( 342 Optional(case_identifier + Suppress(".")) 343 + Optional(case_identifier + Suppress(".")) 344 + case_identifier 345 ) 346 347 # expression 348 expr = Forward().setName("expression") 349 350 integer = Regex(r"[+-]?\d+") 351 numeric_literal = Regex(r"[+-]?\d*\.?\d+([eE][+-]?\d+)?") 352 string_literal = QuotedString("'") | QuotedString('"') | QuotedString("`") 353 regex_literal = "r" + string_literal 354 blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'") 355 date_or_time_literal = (DATE | TIME | DATETIME | TIMESTAMP) + string_literal 356 literal_value = ( 357 numeric_literal 358 | string_literal 359 | regex_literal 360 | blob_literal 361 | date_or_time_literal 362 | NULL 363 | CURRENT_TIME + Optional(LPAR + Optional(string_literal) + RPAR) 364 | CURRENT_DATE + Optional(LPAR + Optional(string_literal) + RPAR) 365 | CURRENT_TIMESTAMP + Optional(LPAR + Optional(string_literal) + RPAR) 366 ) 367 bind_parameter = Word("?", nums) | Combine(oneOf(": @ $") + parameter_name) 368 type_name = oneOf( 369 """TEXT REAL INTEGER BLOB NULL TIMESTAMP STRING DATE 370 INT64 NUMERIC FLOAT64 BOOL BYTES DATETIME GEOGRAPHY TIME ARRAY 371 STRUCT""", 372 caseless=True, 373 ) 374 date_part = oneOf( 375 """DAY DAY_HOUR DAY_MICROSECOND DAY_MINUTE DAY_SECOND 376 HOUR HOUR_MICROSECOND HOUR_MINUTE HOUR_SECOND MICROSECOND MINUTE 377 MINUTE_MICROSECOND MINUTE_SECOND MONTH QUARTER SECOND 378 SECOND_MICROSECOND WEEK YEAR YEAR_MONTH""", 379 caseless=True, 380 ) 381 datetime_operators = ( 382 DATE_ADD | DATE_SUB | ADDDATE | SUBDATE | TIMESTAMP_ADD | TIMESTAMP_SUB 383 ) 384 385 grouping_term = expr.copy() 386 ordering_term = Group( 387 expr("order_key") 388 + Optional(COLLATE + collation_name("collate")) 389 + Optional(ASC | DESC)("direction") 390 )("ordering_term") 391 392 function_arg = expr.copy()("function_arg") 393 function_args = Optional( 394 "*" 395 | Optional(DISTINCT) 396 + delimitedList(function_arg) 397 + Optional((RESPECT | IGNORE) + NULLS) 398 )("function_args") 399 function_call = ( 400 (function_name | keyword)("function_name") 401 + LPAR 402 + Group(function_args)("function_args_group") 403 + RPAR 404 ) 405 406 navigation_function_name = ( 407 FIRST_VALUE 408 | LAST_VALUE 409 | NTH_VALUE 410 | LEAD 411 | LAG 412 | PERCENTILE_CONT 413 | PRECENTILE_DISC 414 ) 415 aggregate_function_name = ( 416 ANY_VALUE 417 | ARRAY_AGG 418 | ARRAY_CONCAT_AGG 419 | AVG 420 | BIT_AND 421 | BIT_OR 422 | BIT_XOR 423 | COUNT 424 | COUNTIF 425 | LOGICAL_AND 426 | LOGICAL_OR 427 | MAX 428 | MIN 429 | STRING_AGG 430 | SUM 431 ) 432 statistical_aggregate_function_name = ( 433 CORR 434 | COVAR_POP 435 | COVAR_SAMP 436 | STDDEV_POP 437 | STDDEV_SAMP 438 | STDDEV 439 | VAR_POP 440 | VAR_SAMP 441 | VARIANCE 442 ) 443 numbering_function_name = ( 444 RANK | DENSE_RANK | PERCENT_RANK | CUME_DIST | NTILE | ROW_NUMBER 445 ) 446 analytic_function_name = ( 447 navigation_function_name 448 | aggregate_function_name 449 | statistical_aggregate_function_name 450 | numbering_function_name 451 )("analytic_function_name") 452 partition_expression_list = delimitedList(grouping_term)( 453 "partition_expression_list" 454 ) 455 window_frame_boundary_start = ( 456 UNBOUNDED + PRECEDING 457 | numeric_literal + (PRECEDING | FOLLOWING) 458 | CURRENT + ROW 459 ) 460 window_frame_boundary_end = ( 461 UNBOUNDED + FOLLOWING 462 | numeric_literal + (PRECEDING | FOLLOWING) 463 | CURRENT + ROW 464 ) 465 window_frame_clause = (ROWS | RANGE) + ( 466 ((UNBOUNDED + PRECEDING) | (numeric_literal + PRECEDING) | (CURRENT + ROW)) 467 | (BETWEEN + window_frame_boundary_start + AND + window_frame_boundary_end) 468 ) 469 window_name = identifier.copy()("window_name") 470 window_specification = ( 471 Optional(window_name) 472 + Optional(PARTITION + BY + partition_expression_list) 473 + Optional(ORDER + BY + delimitedList(ordering_term)) 474 + Optional(window_frame_clause)("window_specification") 475 ) 476 analytic_function = ( 477 analytic_function_name 478 + LPAR 479 + function_args 480 + RPAR 481 + OVER 482 + (window_name | LPAR + Optional(window_specification) + RPAR) 483 )("analytic_function") 484 485 string_agg_term = ( 486 STRING_AGG 487 + LPAR 488 + Optional(DISTINCT) 489 + expr 490 + Optional(COMMA + string_literal) 491 + Optional( 492 ORDER + BY + expr + Optional(ASC | DESC) + Optional(LIMIT + integer) 493 ) 494 + RPAR 495 )("string_agg") 496 array_literal = ( 497 Optional(ARRAY + Optional(LT + delimitedList(type_name) + GT)) 498 + LBRACKET 499 + delimitedList(expr) 500 + RBRACKET 501 ) 502 interval = INTERVAL + expr + date_part 503 array_generator = ( 504 GENERATE_ARRAY 505 + LPAR 506 + numeric_literal 507 + COMMA 508 + numeric_literal 509 + COMMA 510 + numeric_literal 511 + RPAR 512 ) 513 date_array_generator = ( 514 (GENERATE_DATE_ARRAY | GENERATE_TIMESTAMP_ARRAY) 515 + LPAR 516 + expr("start_date") 517 + COMMA 518 + expr("end_date") 519 + Optional(COMMA + interval) 520 + RPAR 521 ) 522 523 explicit_struct = ( 524 STRUCT 525 + Optional(LT + delimitedList(type_name) + GT) 526 + LPAR 527 + Optional(delimitedList(expr + Optional(AS + identifier))) 528 + RPAR 529 ) 530 531 case_when = WHEN + expr.copy()("when") 532 case_then = THEN + expr.copy()("then") 533 case_clauses = Group(ZeroOrMore(case_when + case_then)) 534 case_else = ELSE + expr.copy()("else") 535 case_stmt = ( 536 CASE 537 + Optional(case_expr.copy()) 538 + case_clauses("case_clauses") 539 + Optional(case_else) 540 + END 541 )("case") 542 543 expr_term = ( 544 (analytic_function)("analytic_function") 545 | (CAST + LPAR + expr + AS + type_name + RPAR)("cast") 546 | (SAFE_CAST + LPAR + expr + AS + type_name + RPAR)("safe_cast") 547 | (Optional(EXISTS) + LPAR + ungrouped_select_stmt + RPAR)("subselect") 548 | (literal_value)("literal") 549 | (bind_parameter)("bind_parameter") 550 | (EXTRACT + LPAR + expr + FROM + expr + RPAR)("extract") 551 | case_stmt 552 | (datetime_operators + LPAR + expr + COMMA + interval + RPAR)( 553 "date_operation" 554 ) 555 | string_agg_term("string_agg_term") 556 | array_literal("array_literal") 557 | array_generator("array_generator") 558 | date_array_generator("date_array_generator") 559 | explicit_struct("explicit_struct") 560 | function_call("function_call") 561 | qualified_column_name("column") 562 ) + Optional(LBRACKET + (OFFSET | ORDINAL) + LPAR + expr + RPAR + RBRACKET)( 563 "offset_ordinal" 564 ) 565 566 struct_term = LPAR + delimitedList(expr_term) + RPAR 567 568 UNARY, BINARY, TERNARY = 1, 2, 3 569 expr << infixNotation( 570 (expr_term | struct_term), 571 [ 572 (oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT), 573 (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT), 574 ("||", BINARY, opAssoc.LEFT), 575 (oneOf("* / %"), BINARY, opAssoc.LEFT), 576 (oneOf("+ -"), BINARY, opAssoc.LEFT), 577 (oneOf("<< >> & |"), BINARY, opAssoc.LEFT), 578 (oneOf("= > < >= <= <> != !< !>"), BINARY, opAssoc.LEFT), 579 ( 580 IS + Optional(NOT) 581 | Optional(NOT) + IN 582 | Optional(NOT) + LIKE 583 | GLOB 584 | MATCH 585 | REGEXP, 586 BINARY, 587 opAssoc.LEFT, 588 ), 589 ((BETWEEN, AND), TERNARY, opAssoc.LEFT), 590 ( 591 Optional(NOT) 592 + IN 593 + LPAR 594 + Group(ungrouped_select_stmt | delimitedList(expr)) 595 + RPAR, 596 UNARY, 597 opAssoc.LEFT, 598 ), 599 (AND, BINARY, opAssoc.LEFT), 600 (OR, BINARY, opAssoc.LEFT), 601 ], 602 ) 603 quoted_expr = ( 604 expr 605 ^ Suppress('"') + expr + Suppress('"') 606 ^ Suppress("'") + expr + Suppress("'") 607 ^ Suppress("`") + expr + Suppress("`") 608 )("quoted_expr") 609 610 compound_operator = ( 611 UNION + Optional(ALL | DISTINCT) 612 | INTERSECT + DISTINCT 613 | EXCEPT + DISTINCT 614 | INTERSECT 615 | EXCEPT 616 )("compound_operator") 617 618 join_constraint = Group( 619 Optional( 620 ON + expr 621 | USING + LPAR + Group(delimitedList(qualified_column_name)) + RPAR 622 ) 623 )("join_constraint") 624 625 join_op = ( 626 COMMA 627 | Group( 628 Optional(NATURAL) 629 + Optional( 630 INNER 631 | CROSS 632 | LEFT + OUTER 633 | LEFT 634 | RIGHT + OUTER 635 | RIGHT 636 | FULL + OUTER 637 | OUTER 638 | FULL 639 ) 640 + JOIN 641 ) 642 )("join_op") 643 644 join_source = Forward() 645 646 # We support three kinds of table identifiers. 647 # 648 # First, dot delimited info like project.dataset.table, where 649 # each component follows the rules described in the BigQuery 650 # docs, namely: 651 # Contain letters (upper or lower case), numbers, and underscores 652 # 653 # Second, a dot delimited quoted string. Since it's quoted, we'll be 654 # liberal w.r.t. what characters we allow. E.g.: 655 # `project.dataset.name-with-dashes` 656 # 657 # Third, a series of quoted strings, delimited by dots, e.g.: 658 # `project`.`dataset`.`name-with-dashes` 659 # 660 # We also support combinations, like: 661 # project.dataset.`name-with-dashes` 662 # `project`.`dataset.name-with-dashes` 663 664 def record_table_identifier(t): 665 identifier_list = t.asList() 666 padded_list = [None] * (3 - len(identifier_list)) + identifier_list 667 cls._table_identifiers.add(tuple(padded_list)) 668 669 standard_table_part = ~keyword + Word(alphanums + "_") 670 quoted_project_part = ( 671 Suppress('"') + CharsNotIn('"') + Suppress('"') 672 | Suppress("'") + CharsNotIn("'") + Suppress("'") 673 | Suppress("`") + CharsNotIn("`") + Suppress("`") 674 ) 675 quoted_table_part = ( 676 Suppress('"') + CharsNotIn('".') + Suppress('"') 677 | Suppress("'") + CharsNotIn("'.") + Suppress("'") 678 | Suppress("`") + CharsNotIn("`.") + Suppress("`") 679 ) 680 quoted_table_parts_identifier = ( 681 Optional( 682 (quoted_project_part("project") | standard_table_part("project")) 683 + Suppress(".") 684 ) 685 + Optional( 686 (quoted_table_part("dataset") | standard_table_part("dataset")) 687 + Suppress(".") 688 ) 689 + (quoted_table_part("table") | standard_table_part("table")) 690 ).setParseAction(record_table_identifier) 691 692 def record_quoted_table_identifier(t): 693 identifier_list = t.asList()[0].split(".") 694 first = ".".join(identifier_list[0:-2]) or None 695 second = identifier_list[-2] 696 third = identifier_list[-1] 697 identifier_list = [first, second, third] 698 padded_list = [None] * (3 - len(identifier_list)) + identifier_list 699 cls._table_identifiers.add(tuple(padded_list)) 700 701 quotable_table_parts_identifier = ( 702 Suppress('"') + CharsNotIn('"') + Suppress('"') 703 | Suppress("'") + CharsNotIn("'") + Suppress("'") 704 | Suppress("`") + CharsNotIn("`") + Suppress("`") 705 ).setParseAction(record_quoted_table_identifier) 706 707 table_identifier = ( 708 quoted_table_parts_identifier | quotable_table_parts_identifier 709 ) 710 single_source = ( 711 ( 712 table_identifier 713 + Optional(Optional(AS) + table_alias("table_alias*")) 714 + Optional(FOR + SYSTEMTIME + AS + OF + string_literal) 715 + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED) 716 )("index") 717 | (LPAR + ungrouped_select_stmt + RPAR) 718 | (LPAR + join_source + RPAR) 719 | (UNNEST + LPAR + expr + RPAR) 720 ) + Optional(Optional(AS) + table_alias) 721 722 join_source << single_source + ZeroOrMore( 723 join_op + single_source + join_constraint 724 ) 725 726 over_partition = (PARTITION + BY + delimitedList(partition_expression_list))( 727 "over_partition" 728 ) 729 over_order = ORDER + BY + delimitedList(ordering_term) 730 over_unsigned_value_specification = expr 731 over_window_frame_preceding = ( 732 UNBOUNDED + PRECEDING 733 | over_unsigned_value_specification + PRECEDING 734 | CURRENT + ROW 735 ) 736 over_window_frame_following = ( 737 UNBOUNDED + FOLLOWING 738 | over_unsigned_value_specification + FOLLOWING 739 | CURRENT + ROW 740 ) 741 over_window_frame_bound = ( 742 over_window_frame_preceding | over_window_frame_following 743 ) 744 over_window_frame_between = ( 745 BETWEEN + over_window_frame_bound + AND + over_window_frame_bound 746 ) 747 over_window_frame_extent = ( 748 over_window_frame_preceding | over_window_frame_between 749 ) 750 over_row_or_range = (ROWS | RANGE) + over_window_frame_extent 751 over = ( 752 OVER 753 + LPAR 754 + Optional(over_partition) 755 + Optional(over_order) 756 + Optional(over_row_or_range) 757 + RPAR 758 )("over") 759 760 result_column = Optional(table_name + ".") + "*" + Optional( 761 EXCEPT + LPAR + delimitedList(column_name) + RPAR 762 ) | Group(quoted_expr + Optional(over) + Optional(Optional(AS) + column_alias)) 763 764 window_select_clause = ( 765 WINDOW + identifier + AS + LPAR + window_specification + RPAR 766 ) 767 768 with_stmt = Forward().setName("with statement") 769 ungrouped_select_no_with = ( 770 SELECT 771 + Optional(DISTINCT | ALL) 772 + Group(delimitedList(result_column))("columns") 773 + Optional(FROM + join_source("from*")) 774 + Optional(WHERE + expr) 775 + Optional( 776 GROUP + BY + Group(delimitedList(grouping_term))("group_by_terms") 777 ) 778 + Optional(HAVING + expr("having_expr")) 779 + Optional( 780 ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms") 781 ) 782 + Optional(delimitedList(window_select_clause)) 783 ) 784 select_no_with = ungrouped_select_no_with | ( 785 LPAR + ungrouped_select_no_with + RPAR 786 ) 787 select_core = Optional(with_stmt) + select_no_with 788 grouped_select_core = select_core | (LPAR + select_core + RPAR) 789 790 ungrouped_select_stmt << ( 791 grouped_select_core 792 + ZeroOrMore(compound_operator + grouped_select_core) 793 + Optional( 794 LIMIT 795 + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)( 796 "limit" 797 ) 798 ) 799 )("select") 800 select_stmt = ungrouped_select_stmt | (LPAR + ungrouped_select_stmt + RPAR) 801 802 # define comment format, and ignore them 803 sql_comment = oneOf("-- #") + restOfLine | cStyleComment 804 select_stmt.ignore(sql_comment) 805 806 def record_with_alias(t): 807 identifier_list = t.asList() 808 padded_list = [None] * (3 - len(identifier_list)) + identifier_list 809 cls._with_aliases.add(tuple(padded_list)) 810 811 with_clause = Group( 812 identifier.setParseAction(record_with_alias) 813 + AS 814 + LPAR 815 + select_stmt 816 + RPAR 817 ) 818 with_stmt << (WITH + delimitedList(with_clause)) 819 with_stmt.ignore(sql_comment) 820 821 cls._parser = select_stmt 822 return cls._parser 823 824 def test(self, sql_stmt, expected_tables, verbose=False): 825 def print_(*args): 826 if verbose: 827 print(*args) 828 829 print_(sql_stmt.strip()) 830 found_tables = self.get_table_names(sql_stmt) 831 print_(found_tables) 832 expected_tables_set = set(expected_tables) 833 834 if expected_tables_set != found_tables: 835 raise Exception( 836 f"Test {test_index} failed- expected {expected_tables_set} but got {found_tables}" 837 ) 838 print_() 839 840 841if __name__ == "__main__": 842 TEST_CASES = [ 843 [ 844 """ 845 SELECT x FROM y.a, b 846 """, 847 [ 848 (None, "y", "a"), 849 ( 850 None, 851 None, 852 "b", 853 ), 854 ], 855 ], 856 [ 857 """ 858 SELECT x FROM y.a JOIN b 859 """, 860 [ 861 (None, "y", "a"), 862 (None, None, "b"), 863 ], 864 ], 865 [ 866 """ 867 select * from xyzzy where z > 100 868 """, 869 [ 870 (None, None, "xyzzy"), 871 ], 872 ], 873 [ 874 """ 875 select * from xyzzy where z > 100 order by zz 876 """, 877 [ 878 (None, None, "xyzzy"), 879 ], 880 ], 881 [ 882 """ 883 select * from xyzzy 884 """, 885 [ 886 ( 887 None, 888 None, 889 "xyzzy", 890 ), 891 ], 892 ], 893 [ 894 """ 895 select z.* from xyzzy 896 """, 897 [ 898 ( 899 None, 900 None, 901 "xyzzy", 902 ), 903 ], 904 ], 905 [ 906 """ 907 select a, b from test_table where 1=1 and b='yes' 908 """, 909 [ 910 (None, None, "test_table"), 911 ], 912 ], 913 [ 914 """ 915 select a, b from test_table where 1=1 and b in (select bb from foo) 916 """, 917 [ 918 (None, None, "test_table"), 919 (None, None, "foo"), 920 ], 921 ], 922 [ 923 """ 924 select z.a, b from test_table where 1=1 and b in (select bb from foo) 925 """, 926 [ 927 (None, None, "test_table"), 928 (None, None, "foo"), 929 ], 930 ], 931 [ 932 """ 933 select z.a, b from test_table where 1=1 and b in (select bb from foo) order by b,c desc,d 934 """, 935 [ 936 (None, None, "test_table"), 937 (None, None, "foo"), 938 ], 939 ], 940 [ 941 """ 942 select z.a, b from test_table left join test2_table where 1=1 and b in (select bb from foo) 943 """, 944 [ 945 (None, None, "test_table"), 946 (None, None, "test2_table"), 947 (None, None, "foo"), 948 ], 949 ], 950 [ 951 """ 952 select a, db.table.b as BBB from db.table where 1=1 and BBB='yes' 953 """, 954 [ 955 (None, "db", "table"), 956 ], 957 ], 958 [ 959 """ 960 select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' 961 """, 962 [ 963 (None, None, "test_table"), 964 (None, "db", "table"), 965 ], 966 ], 967 [ 968 """ 969 select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' limit 50 970 """, 971 [ 972 (None, None, "test_table"), 973 (None, "db", "table"), 974 ], 975 ], 976 [ 977 """ 978 select a, b from test_table where (1=1 or 2=3) and b='yes' group by zx having b=2 order by 1 979 """, 980 [ 981 (None, None, "test_table"), 982 ], 983 ], 984 [ 985 """ 986 select 987 a, 988 b 989 # this is a comment 990 from 991 test_table 992 # another comment 993 where (1=1 or 2=3) and b='yes' 994 #yup, a comment 995 group by zx having b=2 order by 1 996 """, 997 [ 998 (None, None, "test_table"), 999 ], 1000 ], 1001 [ 1002 """ 1003 SELECT COUNT(DISTINCT foo) FROM bar JOIN baz ON bar.baz_id = baz.id 1004 """, 1005 [ 1006 (None, None, "bar"), 1007 (None, None, "baz"), 1008 ], 1009 ], 1010 [ 1011 """ 1012 SELECT COUNT(DISTINCT foo) FROM bar, baz WHERE bar.baz_id = baz.id 1013 """, 1014 [ 1015 (None, None, "bar"), 1016 (None, None, "baz"), 1017 ], 1018 ], 1019 [ 1020 """ 1021 WITH one AS (SELECT id FROM foo) SELECT one.id 1022 """, 1023 [ 1024 (None, None, "foo"), 1025 ], 1026 ], 1027 [ 1028 """ 1029 WITH one AS (SELECT id FROM foo), two AS (select id FROM bar) SELECT one.id, two.id 1030 """, 1031 [ 1032 (None, None, "foo"), 1033 (None, None, "bar"), 1034 ], 1035 ], 1036 [ 1037 """ 1038 SELECT x, 1039 RANK() OVER (ORDER BY x ASC) AS rank, 1040 DENSE_RANK() OVER (ORDER BY x ASC) AS dense_rank, 1041 ROW_NUMBER() OVER (PARTITION BY x ORDER BY y) AS row_num 1042 FROM a 1043 """, 1044 [ 1045 ( 1046 None, 1047 None, 1048 "a", 1049 ), 1050 ], 1051 ], 1052 [ 1053 """ 1054 SELECT x, COUNT(*) OVER ( ORDER BY x 1055 RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING ) AS count_x 1056 FROM T 1057 """, 1058 [ 1059 ( 1060 None, 1061 None, 1062 "T", 1063 ), 1064 ], 1065 ], 1066 [ 1067 """ 1068 SELECT firstname, department, startdate, 1069 RANK() OVER ( PARTITION BY department ORDER BY startdate ) AS rank 1070 FROM Employees 1071 """, 1072 [ 1073 (None, None, "Employees"), 1074 ], 1075 ], 1076 # A fragment from https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions 1077 [ 1078 """ 1079 SELECT 'Sophia Liu' as name, 1080 TIMESTAMP '2016-10-18 2:51:45' as finish_time, 1081 'F30-34' as division 1082 UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39' 1083 UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34' 1084 UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39' 1085 UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39' 1086 UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39' 1087 UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34' 1088 UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34' 1089 UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29' 1090 UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34' 1091 """, 1092 [], 1093 ], 1094 # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions 1095 [ 1096 """ 1097 WITH finishers AS 1098 (SELECT 'Sophia Liu' as name, 1099 TIMESTAMP '2016-10-18 2:51:45' as finish_time, 1100 'F30-34' as division 1101 UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39' 1102 UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34' 1103 UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39' 1104 UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39' 1105 UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39' 1106 UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34' 1107 UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34' 1108 UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29' 1109 UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34') 1110 SELECT name, 1111 FORMAT_TIMESTAMP('%X', finish_time) AS finish_time, 1112 division, 1113 FORMAT_TIMESTAMP('%X', fastest_time) AS fastest_time, 1114 TIMESTAMP_DIFF(finish_time, fastest_time, SECOND) AS delta_in_seconds 1115 FROM ( 1116 SELECT name, 1117 finish_time, 1118 division, 1119 FIRST_VALUE(finish_time) 1120 OVER (PARTITION BY division ORDER BY finish_time ASC 1121 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS fastest_time 1122 FROM finishers) 1123 """, 1124 [], 1125 ], 1126 # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions 1127 [ 1128 """ 1129 WITH finishers AS 1130 (SELECT 'Sophia Liu' as name, 1131 TIMESTAMP '2016-10-18 2:51:45' as finish_time, 1132 'F30-34' as division 1133 UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39' 1134 UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34' 1135 UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39' 1136 UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39' 1137 UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39' 1138 UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34' 1139 UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34' 1140 UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29' 1141 UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34') 1142 SELECT name, 1143 FORMAT_TIMESTAMP('%X', finish_time) AS finish_time, 1144 division, 1145 FORMAT_TIMESTAMP('%X', slowest_time) AS slowest_time, 1146 TIMESTAMP_DIFF(slowest_time, finish_time, SECOND) AS delta_in_seconds 1147 FROM ( 1148 SELECT name, 1149 finish_time, 1150 division, 1151 LAST_VALUE(finish_time) 1152 OVER (PARTITION BY division ORDER BY finish_time ASC 1153 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS slowest_time 1154 FROM finishers) 1155 """, 1156 [], 1157 ], 1158 # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions 1159 [ 1160 """ 1161 WITH finishers AS 1162 (SELECT 'Sophia Liu' as name, 1163 TIMESTAMP '2016-10-18 2:51:45' as finish_time, 1164 'F30-34' as division 1165 UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39' 1166 UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34' 1167 UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39' 1168 UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39' 1169 UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39' 1170 UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34' 1171 UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34' 1172 UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29' 1173 UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34') 1174 SELECT name, 1175 FORMAT_TIMESTAMP('%X', finish_time) AS finish_time, 1176 division, 1177 FORMAT_TIMESTAMP('%X', fastest_time) AS fastest_time, 1178 FORMAT_TIMESTAMP('%X', second_fastest) AS second_fastest 1179 FROM ( 1180 SELECT name, 1181 finish_time, 1182 division,finishers, 1183 FIRST_VALUE(finish_time) 1184 OVER w1 AS fastest_time, 1185 NTH_VALUE(finish_time, 2) 1186 OVER w1 as second_fastest 1187 FROM finishers 1188 WINDOW w1 AS ( 1189 PARTITION BY division ORDER BY finish_time ASC 1190 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) 1191 """, 1192 [], 1193 ], 1194 # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions 1195 [ 1196 """ 1197 WITH finishers AS 1198 (SELECT 'Sophia Liu' as name, 1199 TIMESTAMP '2016-10-18 2:51:45' as finish_time, 1200 'F30-34' as division 1201 UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39' 1202 UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34' 1203 UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39' 1204 UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39' 1205 UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39' 1206 UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34' 1207 UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34' 1208 UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29' 1209 UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34') 1210 SELECT name, 1211 finish_time, 1212 division, 1213 LEAD(name) 1214 OVER (PARTITION BY division ORDER BY finish_time ASC) AS followed_by 1215 FROM finishers 1216 """, 1217 [], 1218 ], 1219 # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions 1220 [ 1221 """ 1222 WITH finishers AS 1223 (SELECT 'Sophia Liu' as name, 1224 TIMESTAMP '2016-10-18 2:51:45' as finish_time, 1225 'F30-34' as division 1226 UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39' 1227 UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34' 1228 UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39' 1229 UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39' 1230 UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39' 1231 UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34' 1232 UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34' 1233 UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29' 1234 UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34') 1235 SELECT name, 1236 finish_time, 1237 division, 1238 LEAD(name, 2) 1239 OVER (PARTITION BY division ORDER BY finish_time ASC) AS two_runners_back 1240 FROM finishers 1241 """, 1242 [], 1243 ], 1244 # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions 1245 [ 1246 """ 1247 WITH finishers AS 1248 (SELECT 'Sophia Liu' as name, 1249 TIMESTAMP '2016-10-18 2:51:45' as finish_time, 1250 'F30-34' as division 1251 UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39' 1252 UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34' 1253 UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39' 1254 UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39' 1255 UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39' 1256 UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34' 1257 UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34' 1258 UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29' 1259 UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34') 1260 SELECT name, 1261 finish_time, 1262 division, 1263 LAG(name) 1264 OVER (PARTITION BY division ORDER BY finish_time ASC) AS preceding_runner 1265 FROM finishers 1266 """, 1267 [], 1268 ], 1269 # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions 1270 [ 1271 """ 1272 SELECT 1273 PERCENTILE_CONT(x, 0) OVER() AS min, 1274 PERCENTILE_CONT(x, 0.01) OVER() AS percentile1, 1275 PERCENTILE_CONT(x, 0.5) OVER() AS median, 1276 PERCENTILE_CONT(x, 0.9) OVER() AS percentile90, 1277 PERCENTILE_CONT(x, 1) OVER() AS max 1278 FROM UNNEST([0, 3, NULL, 1, 2]) AS x LIMIT 1 1279 """, 1280 [], 1281 ], 1282 # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions 1283 [ 1284 """ 1285 SELECT 1286 x, 1287 PERCENTILE_DISC(x, 0) OVER() AS min, 1288 PERCENTILE_DISC(x, 0.5) OVER() AS median, 1289 PERCENTILE_DISC(x, 1) OVER() AS max 1290 FROM UNNEST(['c', NULL, 'b', 'a']) AS x 1291 """, 1292 [], 1293 ], 1294 # From https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions 1295 [ 1296 """ 1297 SELECT 1298 TIMESTAMP "2008-12-25 15:30:00 UTC" as original, 1299 TIMESTAMP_ADD(TIMESTAMP "2008-12-25 15:30:00 UTC", INTERVAL 10 MINUTE) AS later 1300 """, 1301 [], 1302 ], 1303 # Previously hosted on https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions, but 1304 # appears to no longer be there 1305 [ 1306 """ 1307 WITH date_hour_slots AS ( 1308 SELECT 1309 [ 1310 STRUCT( 1311 " 00:00:00 UTC" as hrs, 1312 GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range), 1313 STRUCT( 1314 " 01:00:00 UTC" as hrs, 1315 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1316 STRUCT( 1317 " 02:00:00 UTC" as hrs, 1318 GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range), 1319 STRUCT( 1320 " 03:00:00 UTC" as hrs, 1321 GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range), 1322 STRUCT( 1323 " 04:00:00 UTC" as hrs, 1324 GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range), 1325 STRUCT( 1326 " 05:00:00 UTC" as hrs, 1327 GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range), 1328 STRUCT( 1329 " 06:00:00 UTC" as hrs, 1330 GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range), 1331 STRUCT( 1332 " 07:00:00 UTC" as hrs, 1333 GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range), 1334 STRUCT( 1335 " 08:00:00 UTC" as hrs, 1336 GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY ) as dt_range), 1337 STRUCT( 1338 " 09:00:00 UTC" as hrs, 1339 GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range), 1340 STRUCT( 1341 " 10:00:00 UTC" as hrs, 1342 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1343 STRUCT( 1344 " 11:00:00 UTC" as hrs, 1345 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1346 STRUCT( 1347 " 12:00:00 UTC" as hrs, 1348 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1349 STRUCT( 1350 " 13:00:00 UTC" as hrs, 1351 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1352 STRUCT( 1353 " 14:00:00 UTC" as hrs, 1354 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1355 STRUCT( 1356 " 15:00:00 UTC" as hrs, 1357 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1358 STRUCT( 1359 " 16:00:00 UTC" as hrs, 1360 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1361 STRUCT( 1362 " 17:00:00 UTC" as hrs, 1363 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1364 STRUCT( 1365 " 18:00:00 UTC" as hrs, 1366 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1367 STRUCT( 1368 " 19:00:00 UTC" as hrs, 1369 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1370 STRUCT( 1371 " 20:00:00 UTC" as hrs, 1372 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1373 STRUCT( 1374 " 21:00:00 UTC" as hrs, 1375 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1376 STRUCT( 1377 " 22:00:00 UTC" as hrs, 1378 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range), 1379 STRUCT( 1380 " 23:00:00 UTC" as hrs, 1381 GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range) 1382 ] 1383 AS full_timestamps) 1384 SELECT 1385 dt AS dates, hrs, CAST(CONCAT( CAST(dt as STRING), CAST(hrs as STRING)) as TIMESTAMP) as timestamp_value 1386 FROM `date_hour_slots`, date_hour_slots.full_timestamps LEFT JOIN full_timestamps.dt_range as dt 1387 """, 1388 [ 1389 (None, "date_hour_slots", "full_timestamps"), 1390 (None, "full_timestamps", "dt_range"), 1391 ], 1392 ], 1393 [ 1394 """ 1395 SELECT 1396 [foo], 1397 ARRAY[foo], 1398 ARRAY<int64, STRING>[foo, bar], 1399 STRUCT(1, 3), 1400 STRUCT<int64, STRING>(2, 'foo'), 1401 current_date(), 1402 GENERATE_ARRAY(5, NULL, 1), 1403 GENERATE_DATE_ARRAY('2016-10-05', '2016-10-01', INTERVAL 1 DAY), 1404 GENERATE_DATE_ARRAY('2016-10-05', NULL), 1405 GENERATE_DATE_ARRAY('2016-01-01', '2016-12-31', INTERVAL 2 MONTH), 1406 GENERATE_DATE_ARRAY('2000-02-01',current_date(), INTERVAL 1 DAY), 1407 GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-05 00:00:02', INTERVAL 1 SECOND) 1408 FROM 1409 bar 1410 """, 1411 [ 1412 (None, None, "bar"), 1413 ], 1414 ], 1415 [ 1416 """ 1417 SELECT GENERATE_ARRAY(start, 5) AS example_array 1418 FROM UNNEST([3, 4, 5]) AS start 1419 """, 1420 [], 1421 ], 1422 [ 1423 """ 1424 WITH StartsAndEnds AS ( 1425 SELECT DATE '2016-01-01' AS date_start, DATE '2016-01-31' AS date_end 1426 UNION ALL SELECT DATE "2016-04-01", DATE "2016-04-30" 1427 UNION ALL SELECT DATE "2016-07-01", DATE "2016-07-31" 1428 UNION ALL SELECT DATE "2016-10-01", DATE "2016-10-31" 1429 ) 1430 SELECT GENERATE_DATE_ARRAY(date_start, date_end, INTERVAL 1 WEEK) AS date_range 1431 FROM StartsAndEnds 1432 """, 1433 [], 1434 ], 1435 [ 1436 """ 1437 SELECT GENERATE_TIMESTAMP_ARRAY(start_timestamp, end_timestamp, INTERVAL 1 HOUR) 1438 AS timestamp_array 1439 FROM 1440 (SELECT 1441 TIMESTAMP '2016-10-05 00:00:00' AS start_timestamp, 1442 TIMESTAMP '2016-10-05 02:00:00' AS end_timestamp 1443 UNION ALL 1444 SELECT 1445 TIMESTAMP '2016-10-05 12:00:00' AS start_timestamp, 1446 TIMESTAMP '2016-10-05 14:00:00' AS end_timestamp 1447 UNION ALL 1448 SELECT 1449 TIMESTAMP '2016-10-05 23:59:00' AS start_timestamp, 1450 TIMESTAMP '2016-10-06 01:59:00' AS end_timestamp) 1451 """, 1452 [], 1453 ], 1454 [ 1455 """ 1456 SELECT DATE_SUB(current_date("-08:00")), INTERVAL 2 DAY) 1457 """, 1458 [], 1459 ], 1460 [ 1461 """ 1462 SELECT 1463 case when (a) then b else c end 1464 FROM d 1465 """, 1466 [ 1467 ( 1468 None, 1469 None, 1470 "d", 1471 ), 1472 ], 1473 ], 1474 [ 1475 """ 1476 SELECT 1477 e, 1478 case when (f) then g else h end 1479 FROM i 1480 """, 1481 [ 1482 ( 1483 None, 1484 None, 1485 "i", 1486 ), 1487 ], 1488 ], 1489 [ 1490 """ 1491 SELECT 1492 case when j then k else l end 1493 FROM m 1494 """, 1495 [ 1496 ( 1497 None, 1498 None, 1499 "m", 1500 ), 1501 ], 1502 ], 1503 [ 1504 """ 1505 SELECT 1506 n, 1507 case when o then p else q end 1508 FROM r 1509 """, 1510 [ 1511 ( 1512 None, 1513 None, 1514 "r", 1515 ), 1516 ], 1517 ], 1518 [ 1519 """ 1520 SELECT 1521 case s when (t) then u else v end 1522 FROM w 1523 """, 1524 [ 1525 ( 1526 None, 1527 None, 1528 "w", 1529 ), 1530 ], 1531 ], 1532 [ 1533 """ 1534 SELECT 1535 x, 1536 case y when (z) then aa else ab end 1537 FROM ac 1538 """, 1539 [ 1540 ( 1541 None, 1542 None, 1543 "ac", 1544 ), 1545 ], 1546 ], 1547 [ 1548 """ 1549 SELECT 1550 case ad when ae then af else ag end 1551 FROM ah 1552 """, 1553 [ 1554 ( 1555 None, 1556 None, 1557 "ah", 1558 ), 1559 ], 1560 ], 1561 [ 1562 """ 1563 SELECT 1564 ai, 1565 case aj when ak then al else am end 1566 FROM an 1567 """, 1568 [ 1569 ( 1570 None, 1571 None, 1572 "an", 1573 ), 1574 ], 1575 ], 1576 [ 1577 """ 1578 WITH 1579 ONE AS (SELECT x FROM y), 1580 TWO AS (select a FROM b) 1581 SELECT y FROM onE JOIN TWo 1582 """, 1583 [ 1584 ( 1585 None, 1586 None, 1587 "y", 1588 ), 1589 ( 1590 None, 1591 None, 1592 "b", 1593 ), 1594 ], 1595 ], 1596 [ 1597 """ 1598 SELECT 1599 a, 1600 (SELECT b FROM oNE) 1601 FROM OnE 1602 """, 1603 [ 1604 ( 1605 None, 1606 None, 1607 "oNE", 1608 ), 1609 ( 1610 None, 1611 None, 1612 "OnE", 1613 ), 1614 ], 1615 ], 1616 [ 1617 """ 1618 SELECT * FROM `a.b.c` 1619 """, 1620 [ 1621 ("a", "b", "c"), 1622 ], 1623 ], 1624 [ 1625 """ 1626 SELECT * FROM `b.c` 1627 """, 1628 [ 1629 (None, "b", "c"), 1630 ], 1631 ], 1632 [ 1633 """ 1634 SELECT * FROM `c` 1635 """, 1636 [ 1637 (None, None, "c"), 1638 ], 1639 ], 1640 [ 1641 """ 1642 SELECT * FROM a.b.c 1643 """, 1644 [ 1645 ("a", "b", "c"), 1646 ], 1647 ], 1648 [ 1649 """ 1650 SELECT * FROM "a"."b"."c" 1651 """, 1652 [ 1653 ("a", "b", "c"), 1654 ], 1655 ], 1656 [ 1657 """ 1658 SELECT * FROM 'a'.'b'.'c' 1659 """, 1660 [ 1661 ("a", "b", "c"), 1662 ], 1663 ], 1664 [ 1665 """ 1666 SELECT * FROM `a`.`b`.`c` 1667 """, 1668 [ 1669 ("a", "b", "c"), 1670 ], 1671 ], 1672 [ 1673 """ 1674 SELECT * FROM "a.b.c" 1675 """, 1676 [ 1677 ("a", "b", "c"), 1678 ], 1679 ], 1680 [ 1681 """ 1682 SELECT * FROM 'a.b.c' 1683 """, 1684 [ 1685 ("a", "b", "c"), 1686 ], 1687 ], 1688 [ 1689 """ 1690 SELECT * FROM `a.b.c` 1691 """, 1692 [ 1693 ("a", "b", "c"), 1694 ], 1695 ], 1696 [ 1697 """ 1698 SELECT * 1699 FROM t1 1700 WHERE t1.a IN (SELECT t2.a 1701 FROM t2 ) FOR SYSTEM_TIME AS OF t1.timestamp_column) 1702 """, 1703 [ 1704 (None, None, "t1"), 1705 (None, None, "t2"), 1706 ], 1707 ], 1708 [ 1709 """ 1710 WITH a AS (SELECT b FROM c) 1711 SELECT d FROM A JOIN e ON f = g JOIN E ON h = i 1712 """, 1713 [ 1714 (None, None, "c"), 1715 (None, None, "e"), 1716 (None, None, "E"), 1717 ], 1718 ], 1719 [ 1720 """ 1721 with 1722 a as ( 1723 ( 1724 select b from 1725 ( 1726 select c from d 1727 ) 1728 Union all 1729 ( 1730 select e from f 1731 ) 1732 ) 1733 ) 1734 1735 select g from h 1736 """, 1737 [ 1738 (None, None, "d"), 1739 (None, None, "f"), 1740 (None, None, "h"), 1741 ], 1742 ], 1743 [ 1744 """ 1745 select 1746 a AS ESCAPE, 1747 b AS CURRENT_TIME, 1748 c AS CURRENT_DATE, 1749 d AS CURRENT_TIMESTAMP, 1750 e AS DATE_ADD 1751 FROM x 1752 """, 1753 [ 1754 (None, None, "x"), 1755 ], 1756 ], 1757 [ 1758 """ 1759 WITH x AS ( 1760 SELECT a 1761 FROM b 1762 WINDOW w as (PARTITION BY a) 1763 ) 1764 SELECT y FROM z 1765 """, 1766 [(None, None, "b"), (None, None, "z")], 1767 ], 1768 [ 1769 """ 1770 SELECT DISTINCT 1771 FIRST_VALUE(x IGNORE NULLS) OVER (PARTITION BY y) 1772 FROM z 1773 """, 1774 [(None, None, "z")], 1775 ], 1776 [ 1777 """ 1778 SELECT a . b . c 1779 FROM d 1780 """, 1781 [(None, None, "d")], 1782 ], 1783 [ 1784 """ 1785 WITH a AS ( 1786 SELECT b FROM c 1787 UNION ALL 1788 ( 1789 WITH d AS ( 1790 SELECT e FROM f 1791 ) 1792 SELECT g FROM d 1793 ) 1794 ) 1795 SELECT h FROM a 1796 """, 1797 [(None, None, "c"), (None, None, "f")], 1798 ], 1799 [ 1800 """ 1801 WITH a AS ( 1802 SELECT b FROM c 1803 UNION ALL 1804 ( 1805 WITH d AS ( 1806 SELECT e FROM f 1807 ) 1808 SELECT g FROM d 1809 ) 1810 ) 1811 (SELECT h FROM a) 1812 """, 1813 [(None, None, "c"), (None, None, "f")], 1814 ], 1815 [ 1816 """ 1817 SELECT * FROM a.b.`c` 1818 """, 1819 [("a", "b", "c")], 1820 ], 1821 [ 1822 """ 1823 SELECT * FROM 'a'.b.`c` 1824 """, 1825 [("a", "b", "c")], 1826 ], 1827 ] 1828 1829 parser = BigQueryViewParser() 1830 for test_index, test_case in enumerate(TEST_CASES): 1831 sql, expected = test_case 1832 parser.test(sql_stmt=sql, expected_tables=expected, verbose=True) 1833