1"""
2    pygments.lexers._postgres_builtins
3    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
5    Self-updating data files for PostgreSQL lexer.
6
7    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11
12# Autogenerated: please edit them if you like wasting your time.
13
14KEYWORDS = (
15    'ABORT',
16    'ABSOLUTE',
17    'ACCESS',
18    'ACTION',
19    'ADD',
20    'ADMIN',
21    'AFTER',
22    'AGGREGATE',
23    'ALL',
24    'ALSO',
25    'ALTER',
26    'ALWAYS',
27    'ANALYSE',
28    'ANALYZE',
29    'AND',
30    'ANY',
31    'ARRAY',
32    'AS',
33    'ASC',
34    'ASSERTION',
35    'ASSIGNMENT',
36    'ASYMMETRIC',
37    'AT',
38    'ATTACH',
39    'ATTRIBUTE',
40    'AUTHORIZATION',
41    'BACKWARD',
42    'BEFORE',
43    'BEGIN',
44    'BETWEEN',
45    'BIGINT',
46    'BINARY',
47    'BIT',
48    'BOOLEAN',
49    'BOTH',
50    'BY',
51    'CACHE',
52    'CALL',
53    'CALLED',
54    'CASCADE',
55    'CASCADED',
56    'CASE',
57    'CAST',
58    'CATALOG',
59    'CHAIN',
60    'CHAR',
61    'CHARACTER',
62    'CHARACTERISTICS',
63    'CHECK',
64    'CHECKPOINT',
65    'CLASS',
66    'CLOSE',
67    'CLUSTER',
68    'COALESCE',
69    'COLLATE',
70    'COLLATION',
71    'COLUMN',
72    'COLUMNS',
73    'COMMENT',
74    'COMMENTS',
75    'COMMIT',
76    'COMMITTED',
77    'CONCURRENTLY',
78    'CONFIGURATION',
79    'CONFLICT',
80    'CONNECTION',
81    'CONSTRAINT',
82    'CONSTRAINTS',
83    'CONTENT',
84    'CONTINUE',
85    'CONVERSION',
86    'COPY',
87    'COST',
88    'CREATE',
89    'CROSS',
90    'CSV',
91    'CUBE',
92    'CURRENT',
93    'CURRENT_CATALOG',
94    'CURRENT_DATE',
95    'CURRENT_ROLE',
96    'CURRENT_SCHEMA',
97    'CURRENT_TIME',
98    'CURRENT_TIMESTAMP',
99    'CURRENT_USER',
100    'CURSOR',
101    'CYCLE',
102    'DATA',
103    'DATABASE',
104    'DAY',
105    'DEALLOCATE',
106    'DEC',
107    'DECIMAL',
108    'DECLARE',
109    'DEFAULT',
110    'DEFAULTS',
111    'DEFERRABLE',
112    'DEFERRED',
113    'DEFINER',
114    'DELETE',
115    'DELIMITER',
116    'DELIMITERS',
117    'DEPENDS',
118    'DESC',
119    'DETACH',
120    'DICTIONARY',
121    'DISABLE',
122    'DISCARD',
123    'DISTINCT',
124    'DO',
125    'DOCUMENT',
126    'DOMAIN',
127    'DOUBLE',
128    'DROP',
129    'EACH',
130    'ELSE',
131    'ENABLE',
132    'ENCODING',
133    'ENCRYPTED',
134    'END',
135    'ENUM',
136    'ESCAPE',
137    'EVENT',
138    'EXCEPT',
139    'EXCLUDE',
140    'EXCLUDING',
141    'EXCLUSIVE',
142    'EXECUTE',
143    'EXISTS',
144    'EXPLAIN',
145    'EXPRESSION',
146    'EXTENSION',
147    'EXTERNAL',
148    'EXTRACT',
149    'FALSE',
150    'FAMILY',
151    'FETCH',
152    'FILTER',
153    'FIRST',
154    'FLOAT',
155    'FOLLOWING',
156    'FOR',
157    'FORCE',
158    'FOREIGN',
159    'FORWARD',
160    'FREEZE',
161    'FROM',
162    'FULL',
163    'FUNCTION',
164    'FUNCTIONS',
165    'GENERATED',
166    'GLOBAL',
167    'GRANT',
168    'GRANTED',
169    'GREATEST',
170    'GROUP',
171    'GROUPING',
172    'GROUPS',
173    'HANDLER',
174    'HAVING',
175    'HEADER',
176    'HOLD',
177    'HOUR',
178    'IDENTITY',
179    'IF',
180    'ILIKE',
181    'IMMEDIATE',
182    'IMMUTABLE',
183    'IMPLICIT',
184    'IMPORT',
185    'IN',
186    'INCLUDE',
187    'INCLUDING',
188    'INCREMENT',
189    'INDEX',
190    'INDEXES',
191    'INHERIT',
192    'INHERITS',
193    'INITIALLY',
194    'INLINE',
195    'INNER',
196    'INOUT',
197    'INPUT',
198    'INSENSITIVE',
199    'INSERT',
200    'INSTEAD',
201    'INT',
202    'INTEGER',
203    'INTERSECT',
204    'INTERVAL',
205    'INTO',
206    'INVOKER',
207    'IS',
208    'ISNULL',
209    'ISOLATION',
210    'JOIN',
211    'KEY',
212    'LABEL',
213    'LANGUAGE',
214    'LARGE',
215    'LAST',
216    'LATERAL',
217    'LEADING',
218    'LEAKPROOF',
219    'LEAST',
220    'LEFT',
221    'LEVEL',
222    'LIKE',
223    'LIMIT',
224    'LISTEN',
225    'LOAD',
226    'LOCAL',
227    'LOCALTIME',
228    'LOCALTIMESTAMP',
229    'LOCATION',
230    'LOCK',
231    'LOCKED',
232    'LOGGED',
233    'MAPPING',
234    'MATCH',
235    'MATERIALIZED',
236    'MAXVALUE',
237    'METHOD',
238    'MINUTE',
239    'MINVALUE',
240    'MODE',
241    'MONTH',
242    'MOVE',
243    'NAME',
244    'NAMES',
245    'NATIONAL',
246    'NATURAL',
247    'NCHAR',
248    'NEW',
249    'NEXT',
250    'NFC',
251    'NFD',
252    'NFKC',
253    'NFKD',
254    'NO',
255    'NONE',
256    'NORMALIZE',
257    'NORMALIZED',
258    'NOT',
259    'NOTHING',
260    'NOTIFY',
261    'NOTNULL',
262    'NOWAIT',
263    'NULL',
264    'NULLIF',
265    'NULLS',
266    'NUMERIC',
267    'OBJECT',
268    'OF',
269    'OFF',
270    'OFFSET',
271    'OIDS',
272    'OLD',
273    'ON',
274    'ONLY',
275    'OPERATOR',
276    'OPTION',
277    'OPTIONS',
278    'OR',
279    'ORDER',
280    'ORDINALITY',
281    'OTHERS',
282    'OUT',
283    'OUTER',
284    'OVER',
285    'OVERLAPS',
286    'OVERLAY',
287    'OVERRIDING',
288    'OWNED',
289    'OWNER',
290    'PARALLEL',
291    'PARSER',
292    'PARTIAL',
293    'PARTITION',
294    'PASSING',
295    'PASSWORD',
296    'PLACING',
297    'PLANS',
298    'POLICY',
299    'POSITION',
300    'PRECEDING',
301    'PRECISION',
302    'PREPARE',
303    'PREPARED',
304    'PRESERVE',
305    'PRIMARY',
306    'PRIOR',
307    'PRIVILEGES',
308    'PROCEDURAL',
309    'PROCEDURE',
310    'PROCEDURES',
311    'PROGRAM',
312    'PUBLICATION',
313    'QUOTE',
314    'RANGE',
315    'READ',
316    'REAL',
317    'REASSIGN',
318    'RECHECK',
319    'RECURSIVE',
320    'REF',
321    'REFERENCES',
322    'REFERENCING',
323    'REFRESH',
324    'REINDEX',
325    'RELATIVE',
326    'RELEASE',
327    'RENAME',
328    'REPEATABLE',
329    'REPLACE',
330    'REPLICA',
331    'RESET',
332    'RESTART',
333    'RESTRICT',
334    'RETURNING',
335    'RETURNS',
336    'REVOKE',
337    'RIGHT',
338    'ROLE',
339    'ROLLBACK',
340    'ROLLUP',
341    'ROUTINE',
342    'ROUTINES',
343    'ROW',
344    'ROWS',
345    'RULE',
346    'SAVEPOINT',
347    'SCHEMA',
348    'SCHEMAS',
349    'SCROLL',
350    'SEARCH',
351    'SECOND',
352    'SECURITY',
353    'SELECT',
354    'SEQUENCE',
355    'SEQUENCES',
356    'SERIALIZABLE',
357    'SERVER',
358    'SESSION',
359    'SESSION_USER',
360    'SET',
361    'SETOF',
362    'SETS',
363    'SHARE',
364    'SHOW',
365    'SIMILAR',
366    'SIMPLE',
367    'SKIP',
368    'SMALLINT',
369    'SNAPSHOT',
370    'SOME',
371    'SQL',
372    'STABLE',
373    'STANDALONE',
374    'START',
375    'STATEMENT',
376    'STATISTICS',
377    'STDIN',
378    'STDOUT',
379    'STORAGE',
380    'STORED',
381    'STRICT',
382    'STRIP',
383    'SUBSCRIPTION',
384    'SUBSTRING',
385    'SUPPORT',
386    'SYMMETRIC',
387    'SYSID',
388    'SYSTEM',
389    'TABLE',
390    'TABLES',
391    'TABLESAMPLE',
392    'TABLESPACE',
393    'TEMP',
394    'TEMPLATE',
395    'TEMPORARY',
396    'TEXT',
397    'THEN',
398    'TIES',
399    'TIME',
400    'TIMESTAMP',
401    'TO',
402    'TRAILING',
403    'TRANSACTION',
404    'TRANSFORM',
405    'TREAT',
406    'TRIGGER',
407    'TRIM',
408    'TRUE',
409    'TRUNCATE',
410    'TRUSTED',
411    'TYPE',
412    'TYPES',
413    'UESCAPE',
414    'UNBOUNDED',
415    'UNCOMMITTED',
416    'UNENCRYPTED',
417    'UNION',
418    'UNIQUE',
419    'UNKNOWN',
420    'UNLISTEN',
421    'UNLOGGED',
422    'UNTIL',
423    'UPDATE',
424    'USER',
425    'USING',
426    'VACUUM',
427    'VALID',
428    'VALIDATE',
429    'VALIDATOR',
430    'VALUE',
431    'VALUES',
432    'VARCHAR',
433    'VARIADIC',
434    'VARYING',
435    'VERBOSE',
436    'VERSION',
437    'VIEW',
438    'VIEWS',
439    'VOLATILE',
440    'WHEN',
441    'WHERE',
442    'WHITESPACE',
443    'WINDOW',
444    'WITH',
445    'WITHIN',
446    'WITHOUT',
447    'WORK',
448    'WRAPPER',
449    'WRITE',
450    'XML',
451    'XMLATTRIBUTES',
452    'XMLCONCAT',
453    'XMLELEMENT',
454    'XMLEXISTS',
455    'XMLFOREST',
456    'XMLNAMESPACES',
457    'XMLPARSE',
458    'XMLPI',
459    'XMLROOT',
460    'XMLSERIALIZE',
461    'XMLTABLE',
462    'YEAR',
463    'YES',
464    'ZONE',
465)
466
467DATATYPES = (
468    'bigint',
469    'bigserial',
470    'bit',
471    'bit varying',
472    'bool',
473    'boolean',
474    'box',
475    'bytea',
476    'char',
477    'character',
478    'character varying',
479    'cidr',
480    'circle',
481    'date',
482    'decimal',
483    'double precision',
484    'float4',
485    'float8',
486    'inet',
487    'int',
488    'int2',
489    'int4',
490    'int8',
491    'integer',
492    'interval',
493    'json',
494    'jsonb',
495    'line',
496    'lseg',
497    'macaddr',
498    'macaddr8',
499    'money',
500    'numeric',
501    'path',
502    'pg_lsn',
503    'pg_snapshot',
504    'point',
505    'polygon',
506    'real',
507    'serial',
508    'serial2',
509    'serial4',
510    'serial8',
511    'smallint',
512    'smallserial',
513    'text',
514    'time',
515    'timestamp',
516    'timestamptz',
517    'timetz',
518    'tsquery',
519    'tsvector',
520    'txid_snapshot',
521    'uuid',
522    'varbit',
523    'varchar',
524    'with time zone',
525    'without time zone',
526    'xml',
527)
528
529PSEUDO_TYPES = (
530    'any',
531    'anyarray',
532    'anycompatible',
533    'anycompatiblearray',
534    'anycompatiblenonarray',
535    'anycompatiblerange',
536    'anyelement',
537    'anyenum',
538    'anynonarray',
539    'anyrange',
540    'cstring',
541    'event_trigger',
542    'fdw_handler',
543    'index_am_handler',
544    'internal',
545    'language_handler',
546    'pg_ddl_command',
547    'record',
548    'table_am_handler',
549    'trigger',
550    'tsm_handler',
551    'unknown',
552    'void',
553)
554
555# Remove 'trigger' from types
556PSEUDO_TYPES = tuple(sorted(set(PSEUDO_TYPES) - set(map(str.lower, KEYWORDS))))
557
558PLPGSQL_KEYWORDS = (
559    'ALIAS', 'CONSTANT', 'DIAGNOSTICS', 'ELSIF', 'EXCEPTION', 'EXIT',
560    'FOREACH', 'GET', 'LOOP', 'NOTICE', 'OPEN', 'PERFORM', 'QUERY', 'RAISE',
561    'RETURN', 'REVERSE', 'SQLSTATE', 'WHILE',
562)
563
564
565if __name__ == '__main__':  # pragma: no cover
566    import re
567    try:
568        from urllib import urlopen
569    except ImportError:
570        from urllib.request import urlopen
571
572    from pygments.util import format_lines
573
574    # One man's constant is another man's variable.
575    SOURCE_URL = 'https://github.com/postgres/postgres/raw/master'
576    KEYWORDS_URL = SOURCE_URL + '/src/include/parser/kwlist.h'
577    DATATYPES_URL = SOURCE_URL + '/doc/src/sgml/datatype.sgml'
578
579    def update_myself():
580        content = urlopen(DATATYPES_URL).read().decode('utf-8', errors='ignore')
581        data_file = list(content.splitlines())
582        datatypes = parse_datatypes(data_file)
583        pseudos = parse_pseudos(data_file)
584
585        content = urlopen(KEYWORDS_URL).read().decode('utf-8', errors='ignore')
586        keywords = parse_keywords(content)
587
588        update_consts(__file__, 'DATATYPES', datatypes)
589        update_consts(__file__, 'PSEUDO_TYPES', pseudos)
590        update_consts(__file__, 'KEYWORDS', keywords)
591
592    def parse_keywords(f):
593        kw = []
594        for m in re.finditer(r'PG_KEYWORD\("(.+?)"', f):
595            kw.append(m.group(1).upper())
596
597        if not kw:
598            raise ValueError('no keyword found')
599
600        kw.sort()
601        return kw
602
603    def parse_datatypes(f):
604        dt = set()
605        for line in f:
606            if '<sect1' in line:
607                break
608            if '<entry><type>' not in line:
609                continue
610
611            # Parse a string such as
612            # time [ (<replaceable>p</replaceable>) ] [ without time zone ]
613            # into types "time" and "without time zone"
614
615            # remove all the tags
616            line = re.sub("<replaceable>[^<]+</replaceable>", "", line)
617            line = re.sub("<[^>]+>", "", line)
618
619            # Drop the parts containing braces
620            for tmp in [t for tmp in line.split('[')
621                        for t in tmp.split(']') if "(" not in t]:
622                for t in tmp.split(','):
623                    t = t.strip()
624                    if not t: continue
625                    dt.add(" ".join(t.split()))
626
627        dt = list(dt)
628        dt.sort()
629        return dt
630
631    def parse_pseudos(f):
632        dt = []
633        re_start = re.compile(r'\s*<table id="datatype-pseudotypes-table">')
634        re_entry = re.compile(r'\s*<entry><type>(.+?)</type></entry>')
635        re_end = re.compile(r'\s*</table>')
636
637        f = iter(f)
638        for line in f:
639            if re_start.match(line) is not None:
640                break
641        else:
642            raise ValueError('pseudo datatypes table not found')
643
644        for line in f:
645            m = re_entry.match(line)
646            if m is not None:
647                dt.append(m.group(1))
648
649            if re_end.match(line) is not None:
650                break
651        else:
652            raise ValueError('end of pseudo datatypes table not found')
653
654        if not dt:
655            raise ValueError('pseudo datatypes not found')
656
657        dt.sort()
658        return dt
659
660    def update_consts(filename, constname, content):
661        with open(filename) as f:
662            data = f.read()
663
664        # Line to start/end inserting
665        re_match = re.compile(r'^%s\s*=\s*\($.*?^\s*\)$' % constname, re.M | re.S)
666        m = re_match.search(data)
667        if not m:
668            raise ValueError('Could not find existing definition for %s' %
669                             (constname,))
670
671        new_block = format_lines(constname, content)
672        data = data[:m.start()] + new_block + data[m.end():]
673
674        with open(filename, 'w', newline='\n') as f:
675            f.write(data)
676
677    update_myself()
678