1# bigquery_view_parser.py
2#
3# A parser to extract table names from BigQuery view definitions.
4# This is based on the `select_parser.py` sample in pyparsing:
5# https://github.com/pyparsing/pyparsing/blob/master/examples/select_parser.py
6#
7# Michael Smedberg
8#
9
10from pyparsing import ParserElement, Suppress, Forward, CaselessKeyword
11from pyparsing import MatchFirst, alphas, alphanums, Combine, Word
12from pyparsing import QuotedString, CharsNotIn, Optional, Group, ZeroOrMore
13from pyparsing import oneOf, delimitedList, restOfLine, cStyleComment
14from pyparsing import infixNotation, opAssoc, Regex, nums
15
16ParserElement.enablePackrat()
17
18
19class BigQueryViewParser:
20    """Parser to extract table info from BigQuery view definitions"""
21
22    _parser = None
23    _table_identifiers = set()
24    _with_aliases = set()
25
26    def get_table_names(self, sql_stmt):
27        table_identifiers, with_aliases = self._parse(sql_stmt)
28
29        # Table names and alias names might differ by case, but that's not
30        # relevant- aliases are not case sensitive
31        lower_aliases = BigQueryViewParser.lowercase_set_of_tuples(with_aliases)
32        tables = {
33            x
34            for x in table_identifiers
35            if not BigQueryViewParser.lowercase_of_tuple(x) in lower_aliases
36        }
37
38        # Table names ARE case sensitive as described at
39        # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
40        return tables
41
42    def _parse(self, sql_stmt):
43        BigQueryViewParser._table_identifiers.clear()
44        BigQueryViewParser._with_aliases.clear()
45        BigQueryViewParser._get_parser().parseString(sql_stmt)
46
47        return (BigQueryViewParser._table_identifiers, BigQueryViewParser._with_aliases)
48
49    @classmethod
50    def lowercase_of_tuple(cls, tuple_to_lowercase):
51        return tuple(x.lower() if x else None for x in tuple_to_lowercase)
52
53    @classmethod
54    def lowercase_set_of_tuples(cls, set_of_tuples):
55        return {BigQueryViewParser.lowercase_of_tuple(x) for x in set_of_tuples}
56
57    @classmethod
58    def _get_parser(cls):
59        if cls._parser is not None:
60            return cls._parser
61
62        ParserElement.enablePackrat()
63
64        LPAR, RPAR, COMMA, LBRACKET, RBRACKET, LT, GT = map(Suppress, "(),[]<>")
65        ungrouped_select_stmt = Forward().setName("select statement")
66
67        # keywords
68        (
69            UNION,
70            ALL,
71            AND,
72            INTERSECT,
73            EXCEPT,
74            COLLATE,
75            ASC,
76            DESC,
77            ON,
78            USING,
79            NATURAL,
80            INNER,
81            CROSS,
82            LEFT,
83            RIGHT,
84            OUTER,
85            FULL,
86            JOIN,
87            AS,
88            INDEXED,
89            NOT,
90            SELECT,
91            DISTINCT,
92            FROM,
93            WHERE,
94            GROUP,
95            BY,
96            HAVING,
97            ORDER,
98            BY,
99            LIMIT,
100            OFFSET,
101            OR,
102            CAST,
103            ISNULL,
104            NOTNULL,
105            NULL,
106            IS,
107            BETWEEN,
108            ELSE,
109            END,
110            CASE,
111            WHEN,
112            THEN,
113            EXISTS,
114            COLLATE,
115            IN,
116            LIKE,
117            GLOB,
118            REGEXP,
119            MATCH,
120            ESCAPE,
121            CURRENT_TIME,
122            CURRENT_DATE,
123            CURRENT_TIMESTAMP,
124            WITH,
125            EXTRACT,
126            PARTITION,
127            ROWS,
128            RANGE,
129            UNBOUNDED,
130            PRECEDING,
131            CURRENT,
132            ROW,
133            FOLLOWING,
134            OVER,
135            INTERVAL,
136            DATE_ADD,
137            DATE_SUB,
138            ADDDATE,
139            SUBDATE,
140            REGEXP_EXTRACT,
141            SPLIT,
142            ORDINAL,
143            FIRST_VALUE,
144            LAST_VALUE,
145            NTH_VALUE,
146            LEAD,
147            LAG,
148            PERCENTILE_CONT,
149            PRECENTILE_DISC,
150            RANK,
151            DENSE_RANK,
152            PERCENT_RANK,
153            CUME_DIST,
154            NTILE,
155            ROW_NUMBER,
156            DATE,
157            TIME,
158            DATETIME,
159            TIMESTAMP,
160            UNNEST,
161            INT64,
162            NUMERIC,
163            FLOAT64,
164            BOOL,
165            BYTES,
166            GEOGRAPHY,
167            ARRAY,
168            STRUCT,
169            SAFE_CAST,
170            ANY_VALUE,
171            ARRAY_AGG,
172            ARRAY_CONCAT_AGG,
173            AVG,
174            BIT_AND,
175            BIT_OR,
176            BIT_XOR,
177            COUNT,
178            COUNTIF,
179            LOGICAL_AND,
180            LOGICAL_OR,
181            MAX,
182            MIN,
183            STRING_AGG,
184            SUM,
185            CORR,
186            COVAR_POP,
187            COVAR_SAMP,
188            STDDEV_POP,
189            STDDEV_SAMP,
190            STDDEV,
191            VAR_POP,
192            VAR_SAMP,
193            VARIANCE,
194            TIMESTAMP_ADD,
195            TIMESTAMP_SUB,
196            GENERATE_ARRAY,
197            GENERATE_DATE_ARRAY,
198            GENERATE_TIMESTAMP_ARRAY,
199            FOR,
200            SYSTEMTIME,
201            AS,
202            OF,
203            WINDOW,
204            RESPECT,
205            IGNORE,
206            NULLS,
207        ) = map(
208            CaselessKeyword,
209            """
210            UNION, ALL, AND, INTERSECT, EXCEPT, COLLATE, ASC, DESC, ON, USING,
211            NATURAL, INNER, CROSS, LEFT, RIGHT, OUTER, FULL, JOIN, AS, INDEXED,
212            NOT, SELECT, DISTINCT, FROM, WHERE, GROUP, BY, HAVING, ORDER, BY,
213            LIMIT, OFFSET, OR, CAST, ISNULL, NOTNULL, NULL, IS, BETWEEN, ELSE,
214            END, CASE, WHEN, THEN, EXISTS, COLLATE, IN, LIKE, GLOB, REGEXP,
215            MATCH, ESCAPE, CURRENT_TIME, CURRENT_DATE, CURRENT_TIMESTAMP, WITH,
216            EXTRACT, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, CURRENT,
217            ROW, FOLLOWING, OVER, INTERVAL, DATE_ADD, DATE_SUB, ADDDATE,
218            SUBDATE, REGEXP_EXTRACT, SPLIT, ORDINAL, FIRST_VALUE, LAST_VALUE,
219            NTH_VALUE, LEAD, LAG, PERCENTILE_CONT, PRECENTILE_DISC, RANK,
220            DENSE_RANK, PERCENT_RANK, CUME_DIST, NTILE, ROW_NUMBER, DATE, TIME,
221            DATETIME, TIMESTAMP, UNNEST, INT64, NUMERIC, FLOAT64, BOOL, BYTES,
222            GEOGRAPHY, ARRAY, STRUCT, SAFE_CAST, ANY_VALUE, ARRAY_AGG,
223            ARRAY_CONCAT_AGG, AVG, BIT_AND, BIT_OR, BIT_XOR, COUNT, COUNTIF,
224            LOGICAL_AND, LOGICAL_OR, MAX, MIN, STRING_AGG, SUM, CORR,
225            COVAR_POP, COVAR_SAMP, STDDEV_POP, STDDEV_SAMP, STDDEV, VAR_POP,
226            VAR_SAMP, VARIANCE, TIMESTAMP_ADD, TIMESTAMP_SUB, GENERATE_ARRAY,
227            GENERATE_DATE_ARRAY, GENERATE_TIMESTAMP_ARRAY, FOR, SYSTEMTIME, AS,
228            OF, WINDOW, RESPECT, IGNORE, NULLS
229                 """.replace(
230                ",", ""
231            ).split(),
232        )
233
234        keyword_nonfunctions = MatchFirst(
235            (
236                UNION,
237                ALL,
238                INTERSECT,
239                EXCEPT,
240                COLLATE,
241                ASC,
242                DESC,
243                ON,
244                USING,
245                NATURAL,
246                INNER,
247                CROSS,
248                LEFT,
249                RIGHT,
250                OUTER,
251                FULL,
252                JOIN,
253                AS,
254                INDEXED,
255                NOT,
256                SELECT,
257                DISTINCT,
258                FROM,
259                WHERE,
260                GROUP,
261                BY,
262                HAVING,
263                ORDER,
264                BY,
265                LIMIT,
266                OFFSET,
267                CAST,
268                ISNULL,
269                NOTNULL,
270                NULL,
271                IS,
272                BETWEEN,
273                ELSE,
274                END,
275                CASE,
276                WHEN,
277                THEN,
278                EXISTS,
279                COLLATE,
280                IN,
281                LIKE,
282                GLOB,
283                REGEXP,
284                MATCH,
285                STRUCT,
286                WINDOW,
287            )
288        )
289
290        keyword = keyword_nonfunctions | MatchFirst(
291            (
292                ESCAPE,
293                CURRENT_TIME,
294                CURRENT_DATE,
295                CURRENT_TIMESTAMP,
296                DATE_ADD,
297                DATE_SUB,
298                ADDDATE,
299                SUBDATE,
300                INTERVAL,
301                STRING_AGG,
302                REGEXP_EXTRACT,
303                SPLIT,
304                ORDINAL,
305                UNNEST,
306                SAFE_CAST,
307                PARTITION,
308                TIMESTAMP_ADD,
309                TIMESTAMP_SUB,
310                ARRAY,
311                GENERATE_ARRAY,
312                GENERATE_DATE_ARRAY,
313                GENERATE_TIMESTAMP_ARRAY,
314            )
315        )
316
317        identifier_word = Word(alphas + "_@#", alphanums + "@$#_")
318        identifier = ~keyword + identifier_word.copy()
319        collation_name = identifier.copy()
320        # NOTE: Column names can be keywords.  Doc says they cannot, but in practice it seems to work.
321        column_name = identifier_word.copy()
322        qualified_column_name = Combine(
323            column_name
324            + (ZeroOrMore(" ") + "." + ZeroOrMore(" ") + column_name) * (0, 6)
325        )
326        # NOTE: As with column names, column aliases can be keywords, e.g. functions like `current_time`.  Other
327        # keywords, e.g. `from` make parsing pretty difficult (e.g. "SELECT a from from b" is confusing.)
328        column_alias = ~keyword_nonfunctions + column_name.copy()
329        table_name = identifier.copy()
330        table_alias = identifier.copy()
331        index_name = identifier.copy()
332        function_name = identifier.copy()
333        parameter_name = identifier.copy()
334        # NOTE: The expression in a CASE statement can be an integer.  E.g. this is valid SQL:
335        # select CASE 1 WHEN 1 THEN -1 ELSE -2 END from test_table
336        unquoted_case_identifier = ~keyword + Word(alphanums + "$_")
337        quoted_case_identifier = ~keyword + (
338            QuotedString('"') ^ Suppress("`") + CharsNotIn("`") + Suppress("`")
339        )
340        case_identifier = quoted_case_identifier | unquoted_case_identifier
341        case_expr = (
342            Optional(case_identifier + Suppress("."))
343            + Optional(case_identifier + Suppress("."))
344            + case_identifier
345        )
346
347        # expression
348        expr = Forward().setName("expression")
349
350        integer = Regex(r"[+-]?\d+")
351        numeric_literal = Regex(r"[+-]?\d*\.?\d+([eE][+-]?\d+)?")
352        string_literal = QuotedString("'") | QuotedString('"') | QuotedString("`")
353        regex_literal = "r" + string_literal
354        blob_literal = Regex(r"[xX]'[0-9A-Fa-f]+'")
355        date_or_time_literal = (DATE | TIME | DATETIME | TIMESTAMP) + string_literal
356        literal_value = (
357            numeric_literal
358            | string_literal
359            | regex_literal
360            | blob_literal
361            | date_or_time_literal
362            | NULL
363            | CURRENT_TIME + Optional(LPAR + Optional(string_literal) + RPAR)
364            | CURRENT_DATE + Optional(LPAR + Optional(string_literal) + RPAR)
365            | CURRENT_TIMESTAMP + Optional(LPAR + Optional(string_literal) + RPAR)
366        )
367        bind_parameter = Word("?", nums) | Combine(oneOf(": @ $") + parameter_name)
368        type_name = oneOf(
369            """TEXT REAL INTEGER BLOB NULL TIMESTAMP STRING DATE
370            INT64 NUMERIC FLOAT64 BOOL BYTES DATETIME GEOGRAPHY TIME ARRAY
371            STRUCT""",
372            caseless=True,
373        )
374        date_part = oneOf(
375            """DAY DAY_HOUR DAY_MICROSECOND DAY_MINUTE DAY_SECOND
376            HOUR HOUR_MICROSECOND HOUR_MINUTE HOUR_SECOND MICROSECOND MINUTE
377            MINUTE_MICROSECOND MINUTE_SECOND MONTH QUARTER SECOND
378            SECOND_MICROSECOND WEEK YEAR YEAR_MONTH""",
379            caseless=True,
380        )
381        datetime_operators = (
382            DATE_ADD | DATE_SUB | ADDDATE | SUBDATE | TIMESTAMP_ADD | TIMESTAMP_SUB
383        )
384
385        grouping_term = expr.copy()
386        ordering_term = Group(
387            expr("order_key")
388            + Optional(COLLATE + collation_name("collate"))
389            + Optional(ASC | DESC)("direction")
390        )("ordering_term")
391
392        function_arg = expr.copy()("function_arg")
393        function_args = Optional(
394            "*"
395            | Optional(DISTINCT)
396            + delimitedList(function_arg)
397            + Optional((RESPECT | IGNORE) + NULLS)
398        )("function_args")
399        function_call = (
400            (function_name | keyword)("function_name")
401            + LPAR
402            + Group(function_args)("function_args_group")
403            + RPAR
404        )
405
406        navigation_function_name = (
407            FIRST_VALUE
408            | LAST_VALUE
409            | NTH_VALUE
410            | LEAD
411            | LAG
412            | PERCENTILE_CONT
413            | PRECENTILE_DISC
414        )
415        aggregate_function_name = (
416            ANY_VALUE
417            | ARRAY_AGG
418            | ARRAY_CONCAT_AGG
419            | AVG
420            | BIT_AND
421            | BIT_OR
422            | BIT_XOR
423            | COUNT
424            | COUNTIF
425            | LOGICAL_AND
426            | LOGICAL_OR
427            | MAX
428            | MIN
429            | STRING_AGG
430            | SUM
431        )
432        statistical_aggregate_function_name = (
433            CORR
434            | COVAR_POP
435            | COVAR_SAMP
436            | STDDEV_POP
437            | STDDEV_SAMP
438            | STDDEV
439            | VAR_POP
440            | VAR_SAMP
441            | VARIANCE
442        )
443        numbering_function_name = (
444            RANK | DENSE_RANK | PERCENT_RANK | CUME_DIST | NTILE | ROW_NUMBER
445        )
446        analytic_function_name = (
447            navigation_function_name
448            | aggregate_function_name
449            | statistical_aggregate_function_name
450            | numbering_function_name
451        )("analytic_function_name")
452        partition_expression_list = delimitedList(grouping_term)(
453            "partition_expression_list"
454        )
455        window_frame_boundary_start = (
456            UNBOUNDED + PRECEDING
457            | numeric_literal + (PRECEDING | FOLLOWING)
458            | CURRENT + ROW
459        )
460        window_frame_boundary_end = (
461            UNBOUNDED + FOLLOWING
462            | numeric_literal + (PRECEDING | FOLLOWING)
463            | CURRENT + ROW
464        )
465        window_frame_clause = (ROWS | RANGE) + (
466            ((UNBOUNDED + PRECEDING) | (numeric_literal + PRECEDING) | (CURRENT + ROW))
467            | (BETWEEN + window_frame_boundary_start + AND + window_frame_boundary_end)
468        )
469        window_name = identifier.copy()("window_name")
470        window_specification = (
471            Optional(window_name)
472            + Optional(PARTITION + BY + partition_expression_list)
473            + Optional(ORDER + BY + delimitedList(ordering_term))
474            + Optional(window_frame_clause)("window_specification")
475        )
476        analytic_function = (
477            analytic_function_name
478            + LPAR
479            + function_args
480            + RPAR
481            + OVER
482            + (window_name | LPAR + Optional(window_specification) + RPAR)
483        )("analytic_function")
484
485        string_agg_term = (
486            STRING_AGG
487            + LPAR
488            + Optional(DISTINCT)
489            + expr
490            + Optional(COMMA + string_literal)
491            + Optional(
492                ORDER + BY + expr + Optional(ASC | DESC) + Optional(LIMIT + integer)
493            )
494            + RPAR
495        )("string_agg")
496        array_literal = (
497            Optional(ARRAY + Optional(LT + delimitedList(type_name) + GT))
498            + LBRACKET
499            + delimitedList(expr)
500            + RBRACKET
501        )
502        interval = INTERVAL + expr + date_part
503        array_generator = (
504            GENERATE_ARRAY
505            + LPAR
506            + numeric_literal
507            + COMMA
508            + numeric_literal
509            + COMMA
510            + numeric_literal
511            + RPAR
512        )
513        date_array_generator = (
514            (GENERATE_DATE_ARRAY | GENERATE_TIMESTAMP_ARRAY)
515            + LPAR
516            + expr("start_date")
517            + COMMA
518            + expr("end_date")
519            + Optional(COMMA + interval)
520            + RPAR
521        )
522
523        explicit_struct = (
524            STRUCT
525            + Optional(LT + delimitedList(type_name) + GT)
526            + LPAR
527            + Optional(delimitedList(expr + Optional(AS + identifier)))
528            + RPAR
529        )
530
531        case_when = WHEN + expr.copy()("when")
532        case_then = THEN + expr.copy()("then")
533        case_clauses = Group(ZeroOrMore(case_when + case_then))
534        case_else = ELSE + expr.copy()("else")
535        case_stmt = (
536            CASE
537            + Optional(case_expr.copy())
538            + case_clauses("case_clauses")
539            + Optional(case_else)
540            + END
541        )("case")
542
543        expr_term = (
544            (analytic_function)("analytic_function")
545            | (CAST + LPAR + expr + AS + type_name + RPAR)("cast")
546            | (SAFE_CAST + LPAR + expr + AS + type_name + RPAR)("safe_cast")
547            | (Optional(EXISTS) + LPAR + ungrouped_select_stmt + RPAR)("subselect")
548            | (literal_value)("literal")
549            | (bind_parameter)("bind_parameter")
550            | (EXTRACT + LPAR + expr + FROM + expr + RPAR)("extract")
551            | case_stmt
552            | (datetime_operators + LPAR + expr + COMMA + interval + RPAR)(
553                "date_operation"
554            )
555            | string_agg_term("string_agg_term")
556            | array_literal("array_literal")
557            | array_generator("array_generator")
558            | date_array_generator("date_array_generator")
559            | explicit_struct("explicit_struct")
560            | function_call("function_call")
561            | qualified_column_name("column")
562        ) + Optional(LBRACKET + (OFFSET | ORDINAL) + LPAR + expr + RPAR + RBRACKET)(
563            "offset_ordinal"
564        )
565
566        struct_term = LPAR + delimitedList(expr_term) + RPAR
567
568        UNARY, BINARY, TERNARY = 1, 2, 3
569        expr << infixNotation(
570            (expr_term | struct_term),
571            [
572                (oneOf("- + ~") | NOT, UNARY, opAssoc.RIGHT),
573                (ISNULL | NOTNULL | NOT + NULL, UNARY, opAssoc.LEFT),
574                ("||", BINARY, opAssoc.LEFT),
575                (oneOf("* / %"), BINARY, opAssoc.LEFT),
576                (oneOf("+ -"), BINARY, opAssoc.LEFT),
577                (oneOf("<< >> & |"), BINARY, opAssoc.LEFT),
578                (oneOf("= > < >= <= <> != !< !>"), BINARY, opAssoc.LEFT),
579                (
580                    IS + Optional(NOT)
581                    | Optional(NOT) + IN
582                    | Optional(NOT) + LIKE
583                    | GLOB
584                    | MATCH
585                    | REGEXP,
586                    BINARY,
587                    opAssoc.LEFT,
588                ),
589                ((BETWEEN, AND), TERNARY, opAssoc.LEFT),
590                (
591                    Optional(NOT)
592                    + IN
593                    + LPAR
594                    + Group(ungrouped_select_stmt | delimitedList(expr))
595                    + RPAR,
596                    UNARY,
597                    opAssoc.LEFT,
598                ),
599                (AND, BINARY, opAssoc.LEFT),
600                (OR, BINARY, opAssoc.LEFT),
601            ],
602        )
603        quoted_expr = (
604            expr
605            ^ Suppress('"') + expr + Suppress('"')
606            ^ Suppress("'") + expr + Suppress("'")
607            ^ Suppress("`") + expr + Suppress("`")
608        )("quoted_expr")
609
610        compound_operator = (
611            UNION + Optional(ALL | DISTINCT)
612            | INTERSECT + DISTINCT
613            | EXCEPT + DISTINCT
614            | INTERSECT
615            | EXCEPT
616        )("compound_operator")
617
618        join_constraint = Group(
619            Optional(
620                ON + expr
621                | USING + LPAR + Group(delimitedList(qualified_column_name)) + RPAR
622            )
623        )("join_constraint")
624
625        join_op = (
626            COMMA
627            | Group(
628                Optional(NATURAL)
629                + Optional(
630                    INNER
631                    | CROSS
632                    | LEFT + OUTER
633                    | LEFT
634                    | RIGHT + OUTER
635                    | RIGHT
636                    | FULL + OUTER
637                    | OUTER
638                    | FULL
639                )
640                + JOIN
641            )
642        )("join_op")
643
644        join_source = Forward()
645
646        # We support three kinds of table identifiers.
647        #
648        # First, dot delimited info like project.dataset.table, where
649        # each component follows the rules described in the BigQuery
650        # docs, namely:
651        #  Contain letters (upper or lower case), numbers, and underscores
652        #
653        # Second, a dot delimited quoted string.  Since it's quoted, we'll be
654        # liberal w.r.t. what characters we allow.  E.g.:
655        #  `project.dataset.name-with-dashes`
656        #
657        # Third, a series of quoted strings, delimited by dots, e.g.:
658        #  `project`.`dataset`.`name-with-dashes`
659        #
660        # We also support combinations, like:
661        #  project.dataset.`name-with-dashes`
662        #  `project`.`dataset.name-with-dashes`
663
664        def record_table_identifier(t):
665            identifier_list = t.asList()
666            padded_list = [None] * (3 - len(identifier_list)) + identifier_list
667            cls._table_identifiers.add(tuple(padded_list))
668
669        standard_table_part = ~keyword + Word(alphanums + "_")
670        quoted_project_part = (
671            Suppress('"') + CharsNotIn('"') + Suppress('"')
672            | Suppress("'") + CharsNotIn("'") + Suppress("'")
673            | Suppress("`") + CharsNotIn("`") + Suppress("`")
674        )
675        quoted_table_part = (
676            Suppress('"') + CharsNotIn('".') + Suppress('"')
677            | Suppress("'") + CharsNotIn("'.") + Suppress("'")
678            | Suppress("`") + CharsNotIn("`.") + Suppress("`")
679        )
680        quoted_table_parts_identifier = (
681            Optional(
682                (quoted_project_part("project") | standard_table_part("project"))
683                + Suppress(".")
684            )
685            + Optional(
686                (quoted_table_part("dataset") | standard_table_part("dataset"))
687                + Suppress(".")
688            )
689            + (quoted_table_part("table") | standard_table_part("table"))
690        ).setParseAction(record_table_identifier)
691
692        def record_quoted_table_identifier(t):
693            identifier_list = t.asList()[0].split(".")
694            first = ".".join(identifier_list[0:-2]) or None
695            second = identifier_list[-2]
696            third = identifier_list[-1]
697            identifier_list = [first, second, third]
698            padded_list = [None] * (3 - len(identifier_list)) + identifier_list
699            cls._table_identifiers.add(tuple(padded_list))
700
701        quotable_table_parts_identifier = (
702            Suppress('"') + CharsNotIn('"') + Suppress('"')
703            | Suppress("'") + CharsNotIn("'") + Suppress("'")
704            | Suppress("`") + CharsNotIn("`") + Suppress("`")
705        ).setParseAction(record_quoted_table_identifier)
706
707        table_identifier = (
708            quoted_table_parts_identifier | quotable_table_parts_identifier
709        )
710        single_source = (
711            (
712                table_identifier
713                + Optional(Optional(AS) + table_alias("table_alias*"))
714                + Optional(FOR + SYSTEMTIME + AS + OF + string_literal)
715                + Optional(INDEXED + BY + index_name("name") | NOT + INDEXED)
716            )("index")
717            | (LPAR + ungrouped_select_stmt + RPAR)
718            | (LPAR + join_source + RPAR)
719            | (UNNEST + LPAR + expr + RPAR)
720        ) + Optional(Optional(AS) + table_alias)
721
722        join_source << single_source + ZeroOrMore(
723            join_op + single_source + join_constraint
724        )
725
726        over_partition = (PARTITION + BY + delimitedList(partition_expression_list))(
727            "over_partition"
728        )
729        over_order = ORDER + BY + delimitedList(ordering_term)
730        over_unsigned_value_specification = expr
731        over_window_frame_preceding = (
732            UNBOUNDED + PRECEDING
733            | over_unsigned_value_specification + PRECEDING
734            | CURRENT + ROW
735        )
736        over_window_frame_following = (
737            UNBOUNDED + FOLLOWING
738            | over_unsigned_value_specification + FOLLOWING
739            | CURRENT + ROW
740        )
741        over_window_frame_bound = (
742            over_window_frame_preceding | over_window_frame_following
743        )
744        over_window_frame_between = (
745            BETWEEN + over_window_frame_bound + AND + over_window_frame_bound
746        )
747        over_window_frame_extent = (
748            over_window_frame_preceding | over_window_frame_between
749        )
750        over_row_or_range = (ROWS | RANGE) + over_window_frame_extent
751        over = (
752            OVER
753            + LPAR
754            + Optional(over_partition)
755            + Optional(over_order)
756            + Optional(over_row_or_range)
757            + RPAR
758        )("over")
759
760        result_column = Optional(table_name + ".") + "*" + Optional(
761            EXCEPT + LPAR + delimitedList(column_name) + RPAR
762        ) | Group(quoted_expr + Optional(over) + Optional(Optional(AS) + column_alias))
763
764        window_select_clause = (
765            WINDOW + identifier + AS + LPAR + window_specification + RPAR
766        )
767
768        with_stmt = Forward().setName("with statement")
769        ungrouped_select_no_with = (
770            SELECT
771            + Optional(DISTINCT | ALL)
772            + Group(delimitedList(result_column))("columns")
773            + Optional(FROM + join_source("from*"))
774            + Optional(WHERE + expr)
775            + Optional(
776                GROUP + BY + Group(delimitedList(grouping_term))("group_by_terms")
777            )
778            + Optional(HAVING + expr("having_expr"))
779            + Optional(
780                ORDER + BY + Group(delimitedList(ordering_term))("order_by_terms")
781            )
782            + Optional(delimitedList(window_select_clause))
783        )
784        select_no_with = ungrouped_select_no_with | (
785            LPAR + ungrouped_select_no_with + RPAR
786        )
787        select_core = Optional(with_stmt) + select_no_with
788        grouped_select_core = select_core | (LPAR + select_core + RPAR)
789
790        ungrouped_select_stmt << (
791            grouped_select_core
792            + ZeroOrMore(compound_operator + grouped_select_core)
793            + Optional(
794                LIMIT
795                + (Group(expr + OFFSET + expr) | Group(expr + COMMA + expr) | expr)(
796                    "limit"
797                )
798            )
799        )("select")
800        select_stmt = ungrouped_select_stmt | (LPAR + ungrouped_select_stmt + RPAR)
801
802        # define comment format, and ignore them
803        sql_comment = oneOf("-- #") + restOfLine | cStyleComment
804        select_stmt.ignore(sql_comment)
805
806        def record_with_alias(t):
807            identifier_list = t.asList()
808            padded_list = [None] * (3 - len(identifier_list)) + identifier_list
809            cls._with_aliases.add(tuple(padded_list))
810
811        with_clause = Group(
812            identifier.setParseAction(record_with_alias)
813            + AS
814            + LPAR
815            + select_stmt
816            + RPAR
817        )
818        with_stmt << (WITH + delimitedList(with_clause))
819        with_stmt.ignore(sql_comment)
820
821        cls._parser = select_stmt
822        return cls._parser
823
824    def test(self, sql_stmt, expected_tables, verbose=False):
825        def print_(*args):
826            if verbose:
827                print(*args)
828
829        print_(sql_stmt.strip())
830        found_tables = self.get_table_names(sql_stmt)
831        print_(found_tables)
832        expected_tables_set = set(expected_tables)
833
834        if expected_tables_set != found_tables:
835            raise Exception(
836                f"Test {test_index} failed- expected {expected_tables_set} but got {found_tables}"
837            )
838        print_()
839
840
841if __name__ == "__main__":
842    TEST_CASES = [
843        [
844            """
845            SELECT x FROM y.a, b
846            """,
847            [
848                (None, "y", "a"),
849                (
850                    None,
851                    None,
852                    "b",
853                ),
854            ],
855        ],
856        [
857            """
858            SELECT x FROM y.a JOIN b
859            """,
860            [
861                (None, "y", "a"),
862                (None, None, "b"),
863            ],
864        ],
865        [
866            """
867            select * from xyzzy where z > 100
868            """,
869            [
870                (None, None, "xyzzy"),
871            ],
872        ],
873        [
874            """
875            select * from xyzzy where z > 100 order by zz
876            """,
877            [
878                (None, None, "xyzzy"),
879            ],
880        ],
881        [
882            """
883            select * from xyzzy
884            """,
885            [
886                (
887                    None,
888                    None,
889                    "xyzzy",
890                ),
891            ],
892        ],
893        [
894            """
895            select z.* from xyzzy
896            """,
897            [
898                (
899                    None,
900                    None,
901                    "xyzzy",
902                ),
903            ],
904        ],
905        [
906            """
907            select a, b from test_table where 1=1 and b='yes'
908            """,
909            [
910                (None, None, "test_table"),
911            ],
912        ],
913        [
914            """
915            select a, b from test_table where 1=1 and b in (select bb from foo)
916            """,
917            [
918                (None, None, "test_table"),
919                (None, None, "foo"),
920            ],
921        ],
922        [
923            """
924            select z.a, b from test_table where 1=1 and b in (select bb from foo)
925            """,
926            [
927                (None, None, "test_table"),
928                (None, None, "foo"),
929            ],
930        ],
931        [
932            """
933            select z.a, b from test_table where 1=1 and b in (select bb from foo) order by b,c desc,d
934            """,
935            [
936                (None, None, "test_table"),
937                (None, None, "foo"),
938            ],
939        ],
940        [
941            """
942            select z.a, b from test_table left join test2_table where 1=1 and b in (select bb from foo)
943            """,
944            [
945                (None, None, "test_table"),
946                (None, None, "test2_table"),
947                (None, None, "foo"),
948            ],
949        ],
950        [
951            """
952            select a, db.table.b as BBB from db.table where 1=1 and BBB='yes'
953            """,
954            [
955                (None, "db", "table"),
956            ],
957        ],
958        [
959            """
960            select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes'
961            """,
962            [
963                (None, None, "test_table"),
964                (None, "db", "table"),
965            ],
966        ],
967        [
968            """
969            select a, db.table.b as BBB from test_table,db.table where 1=1 and BBB='yes' limit 50
970            """,
971            [
972                (None, None, "test_table"),
973                (None, "db", "table"),
974            ],
975        ],
976        [
977            """
978            select a, b from test_table where (1=1 or 2=3) and b='yes' group by zx having b=2 order by 1
979            """,
980            [
981                (None, None, "test_table"),
982            ],
983        ],
984        [
985            """
986            select
987                a,
988                b
989                # this is a comment
990            from
991                test_table
992                # another comment
993            where (1=1 or 2=3) and b='yes'
994            #yup, a comment
995            group by zx having b=2 order by 1
996            """,
997            [
998                (None, None, "test_table"),
999            ],
1000        ],
1001        [
1002            """
1003            SELECT COUNT(DISTINCT foo) FROM bar JOIN baz ON bar.baz_id = baz.id
1004            """,
1005            [
1006                (None, None, "bar"),
1007                (None, None, "baz"),
1008            ],
1009        ],
1010        [
1011            """
1012            SELECT COUNT(DISTINCT foo) FROM bar, baz WHERE bar.baz_id = baz.id
1013            """,
1014            [
1015                (None, None, "bar"),
1016                (None, None, "baz"),
1017            ],
1018        ],
1019        [
1020            """
1021            WITH one AS (SELECT id FROM foo) SELECT one.id
1022            """,
1023            [
1024                (None, None, "foo"),
1025            ],
1026        ],
1027        [
1028            """
1029            WITH one AS (SELECT id FROM foo), two AS (select id FROM bar) SELECT one.id, two.id
1030            """,
1031            [
1032                (None, None, "foo"),
1033                (None, None, "bar"),
1034            ],
1035        ],
1036        [
1037            """
1038            SELECT x,
1039              RANK() OVER (ORDER BY x ASC) AS rank,
1040              DENSE_RANK() OVER (ORDER BY x ASC) AS dense_rank,
1041              ROW_NUMBER() OVER (PARTITION BY x ORDER BY y) AS row_num
1042            FROM a
1043            """,
1044            [
1045                (
1046                    None,
1047                    None,
1048                    "a",
1049                ),
1050            ],
1051        ],
1052        [
1053            """
1054            SELECT x, COUNT(*) OVER ( ORDER BY x
1055              RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING ) AS count_x
1056            FROM T
1057            """,
1058            [
1059                (
1060                    None,
1061                    None,
1062                    "T",
1063                ),
1064            ],
1065        ],
1066        [
1067            """
1068            SELECT firstname, department, startdate,
1069              RANK() OVER ( PARTITION BY department ORDER BY startdate ) AS rank
1070            FROM Employees
1071            """,
1072            [
1073                (None, None, "Employees"),
1074            ],
1075        ],
1076        # A fragment from https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions
1077        [
1078            """
1079            SELECT 'Sophia Liu' as name,
1080              TIMESTAMP '2016-10-18 2:51:45' as finish_time,
1081              'F30-34' as division
1082              UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39'
1083              UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34'
1084              UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39'
1085              UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39'
1086              UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39'
1087              UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34'
1088              UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34'
1089              UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29'
1090              UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34'
1091            """,
1092            [],
1093        ],
1094        # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions
1095        [
1096            """
1097            WITH finishers AS
1098             (SELECT 'Sophia Liu' as name,
1099              TIMESTAMP '2016-10-18 2:51:45' as finish_time,
1100              'F30-34' as division
1101              UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39'
1102              UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34'
1103              UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39'
1104              UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39'
1105              UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39'
1106              UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34'
1107              UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34'
1108              UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29'
1109              UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34')
1110            SELECT name,
1111              FORMAT_TIMESTAMP('%X', finish_time) AS finish_time,
1112              division,
1113              FORMAT_TIMESTAMP('%X', fastest_time) AS fastest_time,
1114              TIMESTAMP_DIFF(finish_time, fastest_time, SECOND) AS delta_in_seconds
1115            FROM (
1116              SELECT name,
1117              finish_time,
1118              division,
1119              FIRST_VALUE(finish_time)
1120                OVER (PARTITION BY division ORDER BY finish_time ASC
1121                ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS fastest_time
1122              FROM finishers)
1123            """,
1124            [],
1125        ],
1126        # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions
1127        [
1128            """
1129                WITH finishers AS
1130                 (SELECT 'Sophia Liu' as name,
1131                  TIMESTAMP '2016-10-18 2:51:45' as finish_time,
1132                  'F30-34' as division
1133                  UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39'
1134                  UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34'
1135                  UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39'
1136                  UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39'
1137                  UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39'
1138                  UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34'
1139                  UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34'
1140                  UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29'
1141                  UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34')
1142                SELECT name,
1143                  FORMAT_TIMESTAMP('%X', finish_time) AS finish_time,
1144                  division,
1145                  FORMAT_TIMESTAMP('%X', slowest_time) AS slowest_time,
1146                  TIMESTAMP_DIFF(slowest_time, finish_time, SECOND) AS delta_in_seconds
1147                FROM (
1148                  SELECT name,
1149                  finish_time,
1150                  division,
1151                  LAST_VALUE(finish_time)
1152                    OVER (PARTITION BY division ORDER BY finish_time ASC
1153                    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS slowest_time
1154                  FROM finishers)
1155            """,
1156            [],
1157        ],
1158        # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions
1159        [
1160            """
1161            WITH finishers AS
1162             (SELECT 'Sophia Liu' as name,
1163              TIMESTAMP '2016-10-18 2:51:45' as finish_time,
1164              'F30-34' as division
1165              UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39'
1166              UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34'
1167              UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39'
1168              UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39'
1169              UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39'
1170              UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34'
1171              UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34'
1172              UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29'
1173              UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34')
1174            SELECT name,
1175              FORMAT_TIMESTAMP('%X', finish_time) AS finish_time,
1176              division,
1177              FORMAT_TIMESTAMP('%X', fastest_time) AS fastest_time,
1178              FORMAT_TIMESTAMP('%X', second_fastest) AS second_fastest
1179            FROM (
1180              SELECT name,
1181              finish_time,
1182              division,finishers,
1183              FIRST_VALUE(finish_time)
1184                OVER w1 AS fastest_time,
1185              NTH_VALUE(finish_time, 2)
1186                OVER w1 as second_fastest
1187              FROM finishers
1188              WINDOW w1 AS (
1189                PARTITION BY division ORDER BY finish_time ASC
1190                ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))
1191            """,
1192            [],
1193        ],
1194        # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions
1195        [
1196            """
1197            WITH finishers AS
1198             (SELECT 'Sophia Liu' as name,
1199              TIMESTAMP '2016-10-18 2:51:45' as finish_time,
1200              'F30-34' as division
1201              UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39'
1202              UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34'
1203              UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39'
1204              UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39'
1205              UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39'
1206              UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34'
1207              UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34'
1208              UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29'
1209              UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34')
1210            SELECT name,
1211              finish_time,
1212              division,
1213              LEAD(name)
1214                OVER (PARTITION BY division ORDER BY finish_time ASC) AS followed_by
1215            FROM finishers
1216            """,
1217            [],
1218        ],
1219        # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions
1220        [
1221            """
1222            WITH finishers AS
1223             (SELECT 'Sophia Liu' as name,
1224              TIMESTAMP '2016-10-18 2:51:45' as finish_time,
1225              'F30-34' as division
1226              UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39'
1227              UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34'
1228              UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39'
1229              UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39'
1230              UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39'
1231              UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34'
1232              UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34'
1233              UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29'
1234              UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34')
1235            SELECT name,
1236              finish_time,
1237              division,
1238              LEAD(name, 2)
1239                OVER (PARTITION BY division ORDER BY finish_time ASC) AS two_runners_back
1240            FROM finishers
1241            """,
1242            [],
1243        ],
1244        # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions
1245        [
1246            """
1247            WITH finishers AS
1248             (SELECT 'Sophia Liu' as name,
1249              TIMESTAMP '2016-10-18 2:51:45' as finish_time,
1250              'F30-34' as division
1251              UNION ALL SELECT 'Lisa Stelzner', TIMESTAMP '2016-10-18 2:54:11', 'F35-39'
1252              UNION ALL SELECT 'Nikki Leith', TIMESTAMP '2016-10-18 2:59:01', 'F30-34'
1253              UNION ALL SELECT 'Lauren Matthews', TIMESTAMP '2016-10-18 3:01:17', 'F35-39'
1254              UNION ALL SELECT 'Desiree Berry', TIMESTAMP '2016-10-18 3:05:42', 'F35-39'
1255              UNION ALL SELECT 'Suzy Slane', TIMESTAMP '2016-10-18 3:06:24', 'F35-39'
1256              UNION ALL SELECT 'Jen Edwards', TIMESTAMP '2016-10-18 3:06:36', 'F30-34'
1257              UNION ALL SELECT 'Meghan Lederer', TIMESTAMP '2016-10-18 3:07:41', 'F30-34'
1258              UNION ALL SELECT 'Carly Forte', TIMESTAMP '2016-10-18 3:08:58', 'F25-29'
1259              UNION ALL SELECT 'Lauren Reasoner', TIMESTAMP '2016-10-18 3:10:14', 'F30-34')
1260            SELECT name,
1261              finish_time,
1262              division,
1263              LAG(name)
1264                OVER (PARTITION BY division ORDER BY finish_time ASC) AS preceding_runner
1265            FROM finishers
1266            """,
1267            [],
1268        ],
1269        # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions
1270        [
1271            """
1272            SELECT
1273              PERCENTILE_CONT(x, 0) OVER() AS min,
1274              PERCENTILE_CONT(x, 0.01) OVER() AS percentile1,
1275              PERCENTILE_CONT(x, 0.5) OVER() AS median,
1276              PERCENTILE_CONT(x, 0.9) OVER() AS percentile90,
1277              PERCENTILE_CONT(x, 1) OVER() AS max
1278            FROM UNNEST([0, 3, NULL, 1, 2]) AS x LIMIT 1
1279            """,
1280            [],
1281        ],
1282        # From https://cloud.google.com/bigquery/docs/reference/standard-sql/navigation_functions
1283        [
1284            """
1285            SELECT
1286              x,
1287              PERCENTILE_DISC(x, 0) OVER() AS min,
1288              PERCENTILE_DISC(x, 0.5) OVER() AS median,
1289              PERCENTILE_DISC(x, 1) OVER() AS max
1290            FROM UNNEST(['c', NULL, 'b', 'a']) AS x
1291            """,
1292            [],
1293        ],
1294        # From https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions
1295        [
1296            """
1297            SELECT
1298              TIMESTAMP "2008-12-25 15:30:00 UTC" as original,
1299              TIMESTAMP_ADD(TIMESTAMP "2008-12-25 15:30:00 UTC", INTERVAL 10 MINUTE) AS later
1300            """,
1301            [],
1302        ],
1303        # Previously hosted on https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions, but
1304        # appears to no longer be there
1305        [
1306            """
1307            WITH date_hour_slots AS (
1308             SELECT
1309               [
1310                    STRUCT(
1311                        " 00:00:00 UTC" as hrs,
1312                        GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range),
1313                    STRUCT(
1314                        " 01:00:00 UTC" as hrs,
1315                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1316                    STRUCT(
1317                        " 02:00:00 UTC" as hrs,
1318                        GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range),
1319                    STRUCT(
1320                        " 03:00:00 UTC" as hrs,
1321                        GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range),
1322                    STRUCT(
1323                        " 04:00:00 UTC" as hrs,
1324                        GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range),
1325                    STRUCT(
1326                        " 05:00:00 UTC" as hrs,
1327                        GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range),
1328                    STRUCT(
1329                        " 06:00:00 UTC" as hrs,
1330                        GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range),
1331                    STRUCT(
1332                        " 07:00:00 UTC" as hrs,
1333                        GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range),
1334                    STRUCT(
1335                        " 08:00:00 UTC" as hrs,
1336                        GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY ) as dt_range),
1337                    STRUCT(
1338                        " 09:00:00 UTC" as hrs,
1339                        GENERATE_DATE_ARRAY('2016-01-01', current_date(), INTERVAL 1 DAY) as dt_range),
1340                    STRUCT(
1341                        " 10:00:00 UTC" as hrs,
1342                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1343                    STRUCT(
1344                        " 11:00:00 UTC" as hrs,
1345                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1346                    STRUCT(
1347                        " 12:00:00 UTC" as hrs,
1348                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1349                    STRUCT(
1350                        " 13:00:00 UTC" as hrs,
1351                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1352                    STRUCT(
1353                        " 14:00:00 UTC" as hrs,
1354                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1355                    STRUCT(
1356                        " 15:00:00 UTC" as hrs,
1357                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1358                    STRUCT(
1359                        " 16:00:00 UTC" as hrs,
1360                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1361                    STRUCT(
1362                        " 17:00:00 UTC" as hrs,
1363                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1364                    STRUCT(
1365                        " 18:00:00 UTC" as hrs,
1366                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1367                    STRUCT(
1368                        " 19:00:00 UTC" as hrs,
1369                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1370                    STRUCT(
1371                        " 20:00:00 UTC" as hrs,
1372                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1373                    STRUCT(
1374                        " 21:00:00 UTC" as hrs,
1375                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1376                    STRUCT(
1377                        " 22:00:00 UTC" as hrs,
1378                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range),
1379                    STRUCT(
1380                        " 23:00:00 UTC" as hrs,
1381                        GENERATE_DATE_ARRAY('2016-01-01',current_date(), INTERVAL 1 DAY) as dt_range)
1382                ]
1383                AS full_timestamps)
1384                SELECT
1385              dt AS dates, hrs, CAST(CONCAT( CAST(dt as STRING), CAST(hrs as STRING)) as TIMESTAMP) as timestamp_value
1386              FROM `date_hour_slots`, date_hour_slots.full_timestamps LEFT JOIN full_timestamps.dt_range as dt
1387            """,
1388            [
1389                (None, "date_hour_slots", "full_timestamps"),
1390                (None, "full_timestamps", "dt_range"),
1391            ],
1392        ],
1393        [
1394            """
1395            SELECT
1396                [foo],
1397                ARRAY[foo],
1398                ARRAY<int64, STRING>[foo, bar],
1399                STRUCT(1, 3),
1400                STRUCT<int64, STRING>(2, 'foo'),
1401                current_date(),
1402                GENERATE_ARRAY(5, NULL, 1),
1403                GENERATE_DATE_ARRAY('2016-10-05', '2016-10-01', INTERVAL 1 DAY),
1404                GENERATE_DATE_ARRAY('2016-10-05', NULL),
1405                GENERATE_DATE_ARRAY('2016-01-01', '2016-12-31', INTERVAL 2 MONTH),
1406                GENERATE_DATE_ARRAY('2000-02-01',current_date(), INTERVAL 1 DAY),
1407                GENERATE_TIMESTAMP_ARRAY('2016-10-05 00:00:00', '2016-10-05 00:00:02', INTERVAL 1 SECOND)
1408            FROM
1409                bar
1410            """,
1411            [
1412                (None, None, "bar"),
1413            ],
1414        ],
1415        [
1416            """
1417            SELECT GENERATE_ARRAY(start, 5) AS example_array
1418            FROM UNNEST([3, 4, 5]) AS start
1419            """,
1420            [],
1421        ],
1422        [
1423            """
1424            WITH StartsAndEnds AS (
1425              SELECT DATE '2016-01-01' AS date_start, DATE '2016-01-31' AS date_end
1426              UNION ALL SELECT DATE "2016-04-01", DATE "2016-04-30"
1427              UNION ALL SELECT DATE "2016-07-01", DATE "2016-07-31"
1428              UNION ALL SELECT DATE "2016-10-01", DATE "2016-10-31"
1429            )
1430            SELECT GENERATE_DATE_ARRAY(date_start, date_end, INTERVAL 1 WEEK) AS date_range
1431            FROM StartsAndEnds
1432            """,
1433            [],
1434        ],
1435        [
1436            """
1437            SELECT GENERATE_TIMESTAMP_ARRAY(start_timestamp, end_timestamp, INTERVAL 1 HOUR)
1438              AS timestamp_array
1439            FROM
1440              (SELECT
1441                TIMESTAMP '2016-10-05 00:00:00' AS start_timestamp,
1442                TIMESTAMP '2016-10-05 02:00:00' AS end_timestamp
1443               UNION ALL
1444               SELECT
1445                TIMESTAMP '2016-10-05 12:00:00' AS start_timestamp,
1446                TIMESTAMP '2016-10-05 14:00:00' AS end_timestamp
1447               UNION ALL
1448               SELECT
1449                TIMESTAMP '2016-10-05 23:59:00' AS start_timestamp,
1450                TIMESTAMP '2016-10-06 01:59:00' AS end_timestamp)
1451            """,
1452            [],
1453        ],
1454        [
1455            """
1456            SELECT DATE_SUB(current_date("-08:00")), INTERVAL 2 DAY)
1457            """,
1458            [],
1459        ],
1460        [
1461            """
1462            SELECT
1463                case when (a) then b else c end
1464            FROM d
1465            """,
1466            [
1467                (
1468                    None,
1469                    None,
1470                    "d",
1471                ),
1472            ],
1473        ],
1474        [
1475            """
1476            SELECT
1477                e,
1478                case when (f) then g else h end
1479            FROM i
1480            """,
1481            [
1482                (
1483                    None,
1484                    None,
1485                    "i",
1486                ),
1487            ],
1488        ],
1489        [
1490            """
1491            SELECT
1492                case when j then k else l end
1493            FROM m
1494            """,
1495            [
1496                (
1497                    None,
1498                    None,
1499                    "m",
1500                ),
1501            ],
1502        ],
1503        [
1504            """
1505            SELECT
1506                n,
1507                case when o then p else q end
1508            FROM r
1509            """,
1510            [
1511                (
1512                    None,
1513                    None,
1514                    "r",
1515                ),
1516            ],
1517        ],
1518        [
1519            """
1520            SELECT
1521                case s when (t) then u else v end
1522            FROM w
1523            """,
1524            [
1525                (
1526                    None,
1527                    None,
1528                    "w",
1529                ),
1530            ],
1531        ],
1532        [
1533            """
1534            SELECT
1535                x,
1536                case y when (z) then aa else ab end
1537            FROM ac
1538            """,
1539            [
1540                (
1541                    None,
1542                    None,
1543                    "ac",
1544                ),
1545            ],
1546        ],
1547        [
1548            """
1549            SELECT
1550                case ad when ae then af else ag end
1551            FROM ah
1552            """,
1553            [
1554                (
1555                    None,
1556                    None,
1557                    "ah",
1558                ),
1559            ],
1560        ],
1561        [
1562            """
1563            SELECT
1564                ai,
1565                case aj when ak then al else am end
1566            FROM an
1567            """,
1568            [
1569                (
1570                    None,
1571                    None,
1572                    "an",
1573                ),
1574            ],
1575        ],
1576        [
1577            """
1578            WITH
1579                ONE AS (SELECT x FROM y),
1580                TWO AS (select a FROM b)
1581            SELECT y FROM onE JOIN TWo
1582            """,
1583            [
1584                (
1585                    None,
1586                    None,
1587                    "y",
1588                ),
1589                (
1590                    None,
1591                    None,
1592                    "b",
1593                ),
1594            ],
1595        ],
1596        [
1597            """
1598            SELECT
1599                a,
1600                (SELECT b FROM oNE)
1601            FROM OnE
1602            """,
1603            [
1604                (
1605                    None,
1606                    None,
1607                    "oNE",
1608                ),
1609                (
1610                    None,
1611                    None,
1612                    "OnE",
1613                ),
1614            ],
1615        ],
1616        [
1617            """
1618            SELECT * FROM `a.b.c`
1619            """,
1620            [
1621                ("a", "b", "c"),
1622            ],
1623        ],
1624        [
1625            """
1626            SELECT * FROM `b.c`
1627            """,
1628            [
1629                (None, "b", "c"),
1630            ],
1631        ],
1632        [
1633            """
1634            SELECT * FROM `c`
1635            """,
1636            [
1637                (None, None, "c"),
1638            ],
1639        ],
1640        [
1641            """
1642            SELECT * FROM a.b.c
1643            """,
1644            [
1645                ("a", "b", "c"),
1646            ],
1647        ],
1648        [
1649            """
1650            SELECT * FROM "a"."b"."c"
1651            """,
1652            [
1653                ("a", "b", "c"),
1654            ],
1655        ],
1656        [
1657            """
1658            SELECT * FROM 'a'.'b'.'c'
1659            """,
1660            [
1661                ("a", "b", "c"),
1662            ],
1663        ],
1664        [
1665            """
1666            SELECT * FROM `a`.`b`.`c`
1667            """,
1668            [
1669                ("a", "b", "c"),
1670            ],
1671        ],
1672        [
1673            """
1674            SELECT * FROM "a.b.c"
1675            """,
1676            [
1677                ("a", "b", "c"),
1678            ],
1679        ],
1680        [
1681            """
1682            SELECT * FROM 'a.b.c'
1683            """,
1684            [
1685                ("a", "b", "c"),
1686            ],
1687        ],
1688        [
1689            """
1690            SELECT * FROM `a.b.c`
1691            """,
1692            [
1693                ("a", "b", "c"),
1694            ],
1695        ],
1696        [
1697            """
1698            SELECT *
1699            FROM t1
1700            WHERE t1.a IN (SELECT t2.a
1701                           FROM t2 ) FOR SYSTEM_TIME AS OF t1.timestamp_column)
1702            """,
1703            [
1704                (None, None, "t1"),
1705                (None, None, "t2"),
1706            ],
1707        ],
1708        [
1709            """
1710            WITH a AS (SELECT b FROM c)
1711            SELECT d FROM A JOIN e ON f = g JOIN E ON h = i
1712            """,
1713            [
1714                (None, None, "c"),
1715                (None, None, "e"),
1716                (None, None, "E"),
1717            ],
1718        ],
1719        [
1720            """
1721            with
1722            a as (
1723                (
1724                    select b from
1725                    (
1726                        select c from d
1727                    )
1728                    Union all
1729                    (
1730                        select e from f
1731                    )
1732                )
1733            )
1734
1735            select g from h
1736            """,
1737            [
1738                (None, None, "d"),
1739                (None, None, "f"),
1740                (None, None, "h"),
1741            ],
1742        ],
1743        [
1744            """
1745            select
1746                a AS ESCAPE,
1747                b AS CURRENT_TIME,
1748                c AS CURRENT_DATE,
1749                d AS CURRENT_TIMESTAMP,
1750                e AS DATE_ADD
1751            FROM x
1752            """,
1753            [
1754                (None, None, "x"),
1755            ],
1756        ],
1757        [
1758            """
1759            WITH x AS (
1760                SELECT a
1761                FROM b
1762                WINDOW w as (PARTITION BY a)
1763            )
1764            SELECT y FROM z
1765            """,
1766            [(None, None, "b"), (None, None, "z")],
1767        ],
1768        [
1769            """
1770            SELECT DISTINCT
1771                FIRST_VALUE(x IGNORE NULLS) OVER (PARTITION BY y)
1772            FROM z
1773            """,
1774            [(None, None, "z")],
1775        ],
1776        [
1777            """
1778            SELECT a . b .   c
1779            FROM d
1780            """,
1781            [(None, None, "d")],
1782        ],
1783        [
1784            """
1785            WITH a AS (
1786                SELECT b FROM c
1787                UNION ALL
1788                (
1789                    WITH d AS (
1790                        SELECT e FROM f
1791                    )
1792                    SELECT g FROM d
1793                )
1794            )
1795            SELECT h FROM a
1796            """,
1797            [(None, None, "c"), (None, None, "f")],
1798        ],
1799        [
1800            """
1801            WITH a AS (
1802                SELECT b FROM c
1803                UNION ALL
1804                (
1805                    WITH d AS (
1806                        SELECT e FROM f
1807                    )
1808                    SELECT g FROM d
1809                )
1810            )
1811            (SELECT h FROM a)
1812            """,
1813            [(None, None, "c"), (None, None, "f")],
1814        ],
1815        [
1816            """
1817            SELECT * FROM a.b.`c`
1818            """,
1819            [("a", "b", "c")],
1820        ],
1821        [
1822            """
1823            SELECT * FROM 'a'.b.`c`
1824            """,
1825            [("a", "b", "c")],
1826        ],
1827    ]
1828
1829    parser = BigQueryViewParser()
1830    for test_index, test_case in enumerate(TEST_CASES):
1831        sql, expected = test_case
1832        parser.test(sql_stmt=sql, expected_tables=expected, verbose=True)
1833