1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.sas
4    ~~~~~~~~~~~~~~~~~~~
5
6    Lexer for SAS.
7
8    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11
12import re
13from pygments.lexer import RegexLexer, include, words
14from pygments.token import Comment, Keyword, Name, Number, String, Text, \
15    Other, Generic
16
17__all__ = ['SASLexer']
18
19
20class SASLexer(RegexLexer):
21    """
22    For `SAS <http://www.sas.com/>`_ files.
23
24    .. versionadded:: 2.2
25    """
26    # Syntax from syntax/sas.vim by James Kidd <james.kidd@covance.com>
27
28    name      = 'SAS'
29    aliases   = ['sas']
30    filenames = ['*.SAS', '*.sas']
31    mimetypes = ['text/x-sas', 'text/sas', 'application/x-sas']
32    flags     = re.IGNORECASE | re.MULTILINE
33
34    builtins_macros = (
35        "bquote", "nrbquote", "cmpres", "qcmpres", "compstor", "datatyp",
36        "display", "do", "else", "end", "eval", "global", "goto", "if",
37        "index", "input", "keydef", "label", "left", "length", "let",
38        "local", "lowcase", "macro", "mend", "nrquote",
39        "nrstr", "put", "qleft", "qlowcase", "qscan",
40        "qsubstr", "qsysfunc", "qtrim", "quote", "qupcase", "scan",
41        "str", "substr", "superq", "syscall", "sysevalf", "sysexec",
42        "sysfunc", "sysget", "syslput", "sysprod", "sysrc", "sysrput",
43        "then", "to", "trim", "unquote", "until", "upcase", "verify",
44        "while", "window"
45    )
46
47    builtins_conditionals = (
48        "do", "if", "then", "else", "end", "until", "while"
49    )
50
51    builtins_statements = (
52        "abort", "array", "attrib", "by", "call", "cards", "cards4",
53        "catname", "continue", "datalines", "datalines4", "delete", "delim",
54        "delimiter", "display", "dm", "drop", "endsas", "error", "file",
55        "filename", "footnote", "format", "goto", "in", "infile", "informat",
56        "input", "keep", "label", "leave", "length", "libname", "link",
57        "list", "lostcard", "merge", "missing", "modify", "options", "output",
58        "out", "page", "put", "redirect", "remove", "rename", "replace",
59        "retain", "return", "select", "set", "skip", "startsas", "stop",
60        "title", "update", "waitsas", "where", "window", "x", "systask"
61    )
62
63    builtins_sql = (
64        "add", "and", "alter", "as", "cascade", "check", "create",
65        "delete", "describe", "distinct", "drop", "foreign", "from",
66        "group", "having", "index", "insert", "into", "in", "key", "like",
67        "message", "modify", "msgtype", "not", "null", "on", "or",
68        "order", "primary", "references", "reset", "restrict", "select",
69        "set", "table", "unique", "update", "validate", "view", "where"
70    )
71
72    builtins_functions = (
73        "abs", "addr", "airy", "arcos", "arsin", "atan", "attrc",
74        "attrn", "band", "betainv", "blshift", "bnot", "bor",
75        "brshift", "bxor", "byte", "cdf", "ceil", "cexist", "cinv",
76        "close", "cnonct", "collate", "compbl", "compound",
77        "compress", "cos", "cosh", "css", "curobs", "cv", "daccdb",
78        "daccdbsl", "daccsl", "daccsyd", "dacctab", "dairy", "date",
79        "datejul", "datepart", "datetime", "day", "dclose", "depdb",
80        "depdbsl", "depsl", "depsyd",
81        "deptab", "dequote", "dhms", "dif", "digamma",
82        "dim", "dinfo", "dnum", "dopen", "doptname", "doptnum",
83        "dread", "dropnote", "dsname", "erf", "erfc", "exist", "exp",
84        "fappend", "fclose", "fcol", "fdelete", "fetch", "fetchobs",
85        "fexist", "fget", "fileexist", "filename", "fileref",
86        "finfo", "finv", "fipname", "fipnamel", "fipstate", "floor",
87        "fnonct", "fnote", "fopen", "foptname", "foptnum", "fpoint",
88        "fpos", "fput", "fread", "frewind", "frlen", "fsep", "fuzz",
89        "fwrite", "gaminv", "gamma", "getoption", "getvarc", "getvarn",
90        "hbound", "hms", "hosthelp", "hour", "ibessel", "index",
91        "indexc", "indexw", "input", "inputc", "inputn", "int",
92        "intck", "intnx", "intrr", "irr", "jbessel", "juldate",
93        "kurtosis", "lag", "lbound", "left", "length", "lgamma",
94        "libname", "libref", "log", "log10", "log2", "logpdf", "logpmf",
95        "logsdf", "lowcase", "max", "mdy", "mean", "min", "minute",
96        "mod", "month", "mopen", "mort", "n", "netpv", "nmiss",
97        "normal", "note", "npv", "open", "ordinal", "pathname",
98        "pdf", "peek", "peekc", "pmf", "point", "poisson", "poke",
99        "probbeta", "probbnml", "probchi", "probf", "probgam",
100        "probhypr", "probit", "probnegb", "probnorm", "probt",
101        "put", "putc", "putn", "qtr", "quote", "ranbin", "rancau",
102        "ranexp", "rangam", "range", "rank", "rannor", "ranpoi",
103        "rantbl", "rantri", "ranuni", "repeat", "resolve", "reverse",
104        "rewind", "right", "round", "saving", "scan", "sdf", "second",
105        "sign", "sin", "sinh", "skewness", "soundex", "spedis",
106        "sqrt", "std", "stderr", "stfips", "stname", "stnamel",
107        "substr", "sum", "symget", "sysget", "sysmsg", "sysprod",
108        "sysrc", "system", "tan", "tanh", "time", "timepart", "tinv",
109        "tnonct", "today", "translate", "tranwrd", "trigamma",
110        "trim", "trimn", "trunc", "uniform", "upcase", "uss", "var",
111        "varfmt", "varinfmt", "varlabel", "varlen", "varname",
112        "varnum", "varray", "varrayx", "vartype", "verify", "vformat",
113        "vformatd", "vformatdx", "vformatn", "vformatnx", "vformatw",
114        "vformatwx", "vformatx", "vinarray", "vinarrayx", "vinformat",
115        "vinformatd", "vinformatdx", "vinformatn", "vinformatnx",
116        "vinformatw", "vinformatwx", "vinformatx", "vlabel",
117        "vlabelx", "vlength", "vlengthx", "vname", "vnamex", "vtype",
118        "vtypex", "weekday", "year", "yyq", "zipfips", "zipname",
119        "zipnamel", "zipstate"
120    )
121
122    tokens = {
123        'root': [
124            include('comments'),
125            include('proc-data'),
126            include('cards-datalines'),
127            include('logs'),
128            include('general'),
129            (r'.', Text),
130        ],
131        # SAS is multi-line regardless, but * is ended by ;
132        'comments': [
133            (r'^\s*\*.*?;', Comment),
134            (r'/\*.*?\*/', Comment),
135            (r'^\s*\*(.|\n)*?;', Comment.Multiline),
136            (r'/[*](.|\n)*?[*]/', Comment.Multiline),
137        ],
138        # Special highlight for proc, data, quit, run
139        'proc-data': [
140            (r'(^|;)\s*(proc \w+|data|run|quit)[\s;]',
141             Keyword.Reserved),
142        ],
143        # Special highlight cards and datalines
144        'cards-datalines': [
145            (r'^\s*(datalines|cards)\s*;\s*$', Keyword, 'data'),
146        ],
147        'data': [
148            (r'(.|\n)*^\s*;\s*$', Other, '#pop'),
149        ],
150        # Special highlight for put NOTE|ERROR|WARNING (order matters)
151        'logs': [
152            (r'\n?^\s*%?put ', Keyword, 'log-messages'),
153        ],
154        'log-messages': [
155            (r'NOTE(:|-).*', Generic, '#pop'),
156            (r'WARNING(:|-).*', Generic.Emph, '#pop'),
157            (r'ERROR(:|-).*', Generic.Error, '#pop'),
158            include('general'),
159        ],
160        'general': [
161            include('keywords'),
162            include('vars-strings'),
163            include('special'),
164            include('numbers'),
165        ],
166        # Keywords, statements, functions, macros
167        'keywords': [
168            (words(builtins_statements,
169                   prefix = r'\b',
170                   suffix = r'\b'),
171             Keyword),
172            (words(builtins_sql,
173                   prefix = r'\b',
174                   suffix = r'\b'),
175             Keyword),
176            (words(builtins_conditionals,
177                   prefix = r'\b',
178                   suffix = r'\b'),
179             Keyword),
180            (words(builtins_macros,
181                   prefix = r'%',
182                   suffix = r'\b'),
183             Name.Builtin),
184            (words(builtins_functions,
185                   prefix = r'\b',
186                   suffix = r'\('),
187             Name.Builtin),
188        ],
189        # Strings and user-defined variables and macros (order matters)
190        'vars-strings': [
191            (r'&[a-z_]\w{0,31}\.?', Name.Variable),
192            (r'%[a-z_]\w{0,31}', Name.Function),
193            (r'\'', String, 'string_squote'),
194            (r'"', String, 'string_dquote'),
195        ],
196        'string_squote': [
197            ('\'', String, '#pop'),
198            (r'\\\\|\\"|\\\n', String.Escape),
199            # AFAIK, macro variables are not evaluated in single quotes
200            # (r'&', Name.Variable, 'validvar'),
201            (r'[^$\'\\]+', String),
202            (r'[$\'\\]', String),
203        ],
204        'string_dquote': [
205            (r'"', String, '#pop'),
206            (r'\\\\|\\"|\\\n', String.Escape),
207            (r'&', Name.Variable, 'validvar'),
208            (r'[^$&"\\]+', String),
209            (r'[$"\\]', String),
210        ],
211        'validvar': [
212            (r'[a-z_]\w{0,31}\.?', Name.Variable, '#pop'),
213        ],
214        # SAS numbers and special variables
215        'numbers': [
216            (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)(E[+-]?[0-9]+)?i?\b',
217             Number),
218        ],
219        'special': [
220            (r'(null|missing|_all_|_automatic_|_character_|_n_|'
221             r'_infile_|_name_|_null_|_numeric_|_user_|_webout_)',
222             Keyword.Constant),
223        ],
224        # 'operators': [
225        #     (r'(-|=|<=|>=|<|>|<>|&|!=|'
226        #      r'\||\*|\+|\^|/|!|~|~=)', Operator)
227        # ],
228    }
229