1# -*- coding: utf-8 -*- 2""" 3 pygments.lexers.sas 4 ~~~~~~~~~~~~~~~~~~~ 5 6 Lexer for SAS. 7 8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. 9 :license: BSD, see LICENSE for details. 10""" 11 12import re 13from pygments.lexer import RegexLexer, include, words 14from pygments.token import Comment, Keyword, Name, Number, String, Text, \ 15 Other, Generic 16 17__all__ = ['SASLexer'] 18 19 20class SASLexer(RegexLexer): 21 """ 22 For `SAS <http://www.sas.com/>`_ files. 23 24 .. versionadded:: 2.2 25 """ 26 # Syntax from syntax/sas.vim by James Kidd <james.kidd@covance.com> 27 28 name = 'SAS' 29 aliases = ['sas'] 30 filenames = ['*.SAS', '*.sas'] 31 mimetypes = ['text/x-sas', 'text/sas', 'application/x-sas'] 32 flags = re.IGNORECASE | re.MULTILINE 33 34 builtins_macros = ( 35 "bquote", "nrbquote", "cmpres", "qcmpres", "compstor", "datatyp", 36 "display", "do", "else", "end", "eval", "global", "goto", "if", 37 "index", "input", "keydef", "label", "left", "length", "let", 38 "local", "lowcase", "macro", "mend", "nrquote", 39 "nrstr", "put", "qleft", "qlowcase", "qscan", 40 "qsubstr", "qsysfunc", "qtrim", "quote", "qupcase", "scan", 41 "str", "substr", "superq", "syscall", "sysevalf", "sysexec", 42 "sysfunc", "sysget", "syslput", "sysprod", "sysrc", "sysrput", 43 "then", "to", "trim", "unquote", "until", "upcase", "verify", 44 "while", "window" 45 ) 46 47 builtins_conditionals = ( 48 "do", "if", "then", "else", "end", "until", "while" 49 ) 50 51 builtins_statements = ( 52 "abort", "array", "attrib", "by", "call", "cards", "cards4", 53 "catname", "continue", "datalines", "datalines4", "delete", "delim", 54 "delimiter", "display", "dm", "drop", "endsas", "error", "file", 55 "filename", "footnote", "format", "goto", "in", "infile", "informat", 56 "input", "keep", "label", "leave", "length", "libname", "link", 57 "list", "lostcard", "merge", "missing", "modify", "options", "output", 58 "out", "page", "put", "redirect", "remove", "rename", "replace", 59 "retain", "return", "select", "set", "skip", "startsas", "stop", 60 "title", "update", "waitsas", "where", "window", "x", "systask" 61 ) 62 63 builtins_sql = ( 64 "add", "and", "alter", "as", "cascade", "check", "create", 65 "delete", "describe", "distinct", "drop", "foreign", "from", 66 "group", "having", "index", "insert", "into", "in", "key", "like", 67 "message", "modify", "msgtype", "not", "null", "on", "or", 68 "order", "primary", "references", "reset", "restrict", "select", 69 "set", "table", "unique", "update", "validate", "view", "where" 70 ) 71 72 builtins_functions = ( 73 "abs", "addr", "airy", "arcos", "arsin", "atan", "attrc", 74 "attrn", "band", "betainv", "blshift", "bnot", "bor", 75 "brshift", "bxor", "byte", "cdf", "ceil", "cexist", "cinv", 76 "close", "cnonct", "collate", "compbl", "compound", 77 "compress", "cos", "cosh", "css", "curobs", "cv", "daccdb", 78 "daccdbsl", "daccsl", "daccsyd", "dacctab", "dairy", "date", 79 "datejul", "datepart", "datetime", "day", "dclose", "depdb", 80 "depdbsl", "depsl", "depsyd", 81 "deptab", "dequote", "dhms", "dif", "digamma", 82 "dim", "dinfo", "dnum", "dopen", "doptname", "doptnum", 83 "dread", "dropnote", "dsname", "erf", "erfc", "exist", "exp", 84 "fappend", "fclose", "fcol", "fdelete", "fetch", "fetchobs", 85 "fexist", "fget", "fileexist", "filename", "fileref", 86 "finfo", "finv", "fipname", "fipnamel", "fipstate", "floor", 87 "fnonct", "fnote", "fopen", "foptname", "foptnum", "fpoint", 88 "fpos", "fput", "fread", "frewind", "frlen", "fsep", "fuzz", 89 "fwrite", "gaminv", "gamma", "getoption", "getvarc", "getvarn", 90 "hbound", "hms", "hosthelp", "hour", "ibessel", "index", 91 "indexc", "indexw", "input", "inputc", "inputn", "int", 92 "intck", "intnx", "intrr", "irr", "jbessel", "juldate", 93 "kurtosis", "lag", "lbound", "left", "length", "lgamma", 94 "libname", "libref", "log", "log10", "log2", "logpdf", "logpmf", 95 "logsdf", "lowcase", "max", "mdy", "mean", "min", "minute", 96 "mod", "month", "mopen", "mort", "n", "netpv", "nmiss", 97 "normal", "note", "npv", "open", "ordinal", "pathname", 98 "pdf", "peek", "peekc", "pmf", "point", "poisson", "poke", 99 "probbeta", "probbnml", "probchi", "probf", "probgam", 100 "probhypr", "probit", "probnegb", "probnorm", "probt", 101 "put", "putc", "putn", "qtr", "quote", "ranbin", "rancau", 102 "ranexp", "rangam", "range", "rank", "rannor", "ranpoi", 103 "rantbl", "rantri", "ranuni", "repeat", "resolve", "reverse", 104 "rewind", "right", "round", "saving", "scan", "sdf", "second", 105 "sign", "sin", "sinh", "skewness", "soundex", "spedis", 106 "sqrt", "std", "stderr", "stfips", "stname", "stnamel", 107 "substr", "sum", "symget", "sysget", "sysmsg", "sysprod", 108 "sysrc", "system", "tan", "tanh", "time", "timepart", "tinv", 109 "tnonct", "today", "translate", "tranwrd", "trigamma", 110 "trim", "trimn", "trunc", "uniform", "upcase", "uss", "var", 111 "varfmt", "varinfmt", "varlabel", "varlen", "varname", 112 "varnum", "varray", "varrayx", "vartype", "verify", "vformat", 113 "vformatd", "vformatdx", "vformatn", "vformatnx", "vformatw", 114 "vformatwx", "vformatx", "vinarray", "vinarrayx", "vinformat", 115 "vinformatd", "vinformatdx", "vinformatn", "vinformatnx", 116 "vinformatw", "vinformatwx", "vinformatx", "vlabel", 117 "vlabelx", "vlength", "vlengthx", "vname", "vnamex", "vtype", 118 "vtypex", "weekday", "year", "yyq", "zipfips", "zipname", 119 "zipnamel", "zipstate" 120 ) 121 122 tokens = { 123 'root': [ 124 include('comments'), 125 include('proc-data'), 126 include('cards-datalines'), 127 include('logs'), 128 include('general'), 129 (r'.', Text), 130 ], 131 # SAS is multi-line regardless, but * is ended by ; 132 'comments': [ 133 (r'^\s*\*.*?;', Comment), 134 (r'/\*.*?\*/', Comment), 135 (r'^\s*\*(.|\n)*?;', Comment.Multiline), 136 (r'/[*](.|\n)*?[*]/', Comment.Multiline), 137 ], 138 # Special highlight for proc, data, quit, run 139 'proc-data': [ 140 (r'(^|;)\s*(proc \w+|data|run|quit)[\s;]', 141 Keyword.Reserved), 142 ], 143 # Special highlight cards and datalines 144 'cards-datalines': [ 145 (r'^\s*(datalines|cards)\s*;\s*$', Keyword, 'data'), 146 ], 147 'data': [ 148 (r'(.|\n)*^\s*;\s*$', Other, '#pop'), 149 ], 150 # Special highlight for put NOTE|ERROR|WARNING (order matters) 151 'logs': [ 152 (r'\n?^\s*%?put ', Keyword, 'log-messages'), 153 ], 154 'log-messages': [ 155 (r'NOTE(:|-).*', Generic, '#pop'), 156 (r'WARNING(:|-).*', Generic.Emph, '#pop'), 157 (r'ERROR(:|-).*', Generic.Error, '#pop'), 158 include('general'), 159 ], 160 'general': [ 161 include('keywords'), 162 include('vars-strings'), 163 include('special'), 164 include('numbers'), 165 ], 166 # Keywords, statements, functions, macros 167 'keywords': [ 168 (words(builtins_statements, 169 prefix = r'\b', 170 suffix = r'\b'), 171 Keyword), 172 (words(builtins_sql, 173 prefix = r'\b', 174 suffix = r'\b'), 175 Keyword), 176 (words(builtins_conditionals, 177 prefix = r'\b', 178 suffix = r'\b'), 179 Keyword), 180 (words(builtins_macros, 181 prefix = r'%', 182 suffix = r'\b'), 183 Name.Builtin), 184 (words(builtins_functions, 185 prefix = r'\b', 186 suffix = r'\('), 187 Name.Builtin), 188 ], 189 # Strings and user-defined variables and macros (order matters) 190 'vars-strings': [ 191 (r'&[a-z_]\w{0,31}\.?', Name.Variable), 192 (r'%[a-z_]\w{0,31}', Name.Function), 193 (r'\'', String, 'string_squote'), 194 (r'"', String, 'string_dquote'), 195 ], 196 'string_squote': [ 197 ('\'', String, '#pop'), 198 (r'\\\\|\\"|\\\n', String.Escape), 199 # AFAIK, macro variables are not evaluated in single quotes 200 # (r'&', Name.Variable, 'validvar'), 201 (r'[^$\'\\]+', String), 202 (r'[$\'\\]', String), 203 ], 204 'string_dquote': [ 205 (r'"', String, '#pop'), 206 (r'\\\\|\\"|\\\n', String.Escape), 207 (r'&', Name.Variable, 'validvar'), 208 (r'[^$&"\\]+', String), 209 (r'[$"\\]', String), 210 ], 211 'validvar': [ 212 (r'[a-z_]\w{0,31}\.?', Name.Variable, '#pop'), 213 ], 214 # SAS numbers and special variables 215 'numbers': [ 216 (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)(E[+-]?[0-9]+)?i?\b', 217 Number), 218 ], 219 'special': [ 220 (r'(null|missing|_all_|_automatic_|_character_|_n_|' 221 r'_infile_|_name_|_null_|_numeric_|_user_|_webout_)', 222 Keyword.Constant), 223 ], 224 # 'operators': [ 225 # (r'(-|=|<=|>=|<|>|<>|&|!=|' 226 # r'\||\*|\+|\^|/|!|~|~=)', Operator) 227 # ], 228 } 229