1# -*- coding: utf-8 -*- 2""" 3 pygments.lexers.stata 4 ~~~~~~~~~~~~~~~~~~~~~ 5 6 Lexer for Stata 7 8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. 9 :license: BSD, see LICENSE for details. 10""" 11 12import re 13from pygments.lexer import RegexLexer, default, include, words 14from pygments.token import Comment, Keyword, Name, Number, \ 15 String, Text, Operator 16 17from pygments.lexers._stata_builtins import builtins_base, builtins_functions 18 19__all__ = ['StataLexer'] 20 21 22class StataLexer(RegexLexer): 23 """ 24 For `Stata <http://www.stata.com/>`_ do files. 25 26 .. versionadded:: 2.2 27 """ 28 # Syntax based on 29 # - http://fmwww.bc.edu/RePEc/bocode/s/synlightlist.ado 30 # - https://github.com/isagalaev/highlight.js/blob/master/src/languages/stata.js 31 # - https://github.com/jpitblado/vim-stata/blob/master/syntax/stata.vim 32 33 name = 'Stata' 34 aliases = ['stata', 'do'] 35 filenames = ['*.do', '*.ado'] 36 mimetypes = ['text/x-stata', 'text/stata', 'application/x-stata'] 37 flags = re.MULTILINE | re.DOTALL 38 39 tokens = { 40 'root': [ 41 include('comments'), 42 include('strings'), 43 include('macros'), 44 include('numbers'), 45 include('keywords'), 46 include('operators'), 47 include('format'), 48 (r'.', Text), 49 ], 50 # Comments are a complicated beast in Stata because they can be 51 # nested and there are a few corner cases with that. See: 52 # - github.com/kylebarron/language-stata/issues/90 53 # - statalist.org/forums/forum/general-stata-discussion/general/1448244 54 'comments': [ 55 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 'comments-double-slash'), 56 (r'^\s*\*', Comment.Single, 'comments-star'), 57 (r'/\*', Comment.Multiline, 'comments-block'), 58 (r'(^///|(?<=\s)///)', Comment.Special, 'comments-triple-slash') 59 ], 60 'comments-block': [ 61 (r'/\*', Comment.Multiline, '#push'), 62 # this ends and restarts a comment block. but need to catch this so 63 # that it doesn\'t start _another_ level of comment blocks 64 (r'\*/\*', Comment.Multiline), 65 (r'(\*/\s+\*(?!/)[^\n]*)|(\*/)', Comment.Multiline, '#pop'), 66 # Match anything else as a character inside the comment 67 (r'.', Comment.Multiline), 68 ], 69 'comments-star': [ 70 (r'///.*?\n', Comment.Single, 71 ('#pop', 'comments-triple-slash')), 72 (r'(^//|(?<=\s)//)(?!/)', Comment.Single, 73 ('#pop', 'comments-double-slash')), 74 (r'/\*', Comment.Multiline, 'comments-block'), 75 (r'.(?=\n)', Comment.Single, '#pop'), 76 (r'.', Comment.Single), 77 ], 78 'comments-triple-slash': [ 79 (r'\n', Comment.Special, '#pop'), 80 # A // breaks out of a comment for the rest of the line 81 (r'//.*?(?=\n)', Comment.Single, '#pop'), 82 (r'.', Comment.Special), 83 ], 84 'comments-double-slash': [ 85 (r'\n', Text, '#pop'), 86 (r'.', Comment.Single), 87 ], 88 # `"compound string"' and regular "string"; note the former are 89 # nested. 90 'strings': [ 91 (r'`"', String, 'string-compound'), 92 (r'(?<!`)"', String, 'string-regular'), 93 ], 94 'string-compound': [ 95 (r'`"', String, '#push'), 96 (r'"\'', String, '#pop'), 97 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), 98 include('macros'), 99 (r'.', String) 100 ], 101 'string-regular': [ 102 (r'(")(?!\')|(?=\n)', String, '#pop'), 103 (r'\\\\|\\"|\\\$|\\`|\\\n', String.Escape), 104 include('macros'), 105 (r'.', String) 106 ], 107 # A local is usually 108 # `\w{0,31}' 109 # `:extended macro' 110 # `=expression' 111 # `[rsen](results)' 112 # `(++--)scalar(++--)' 113 # 114 # However, there are all sorts of weird rules wrt edge 115 # cases. Instead of writing 27 exceptions, anything inside 116 # `' is a local. 117 # 118 # A global is more restricted, so we do follow rules. Note only 119 # locals explicitly enclosed ${} can be nested. 120 'macros': [ 121 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), 122 (r'\$', Name.Variable.Global, 'macro-global-name'), 123 (r'`', Name.Variable, 'macro-local'), 124 ], 125 'macro-local': [ 126 (r'`', Name.Variable, '#push'), 127 (r"'", Name.Variable, '#pop'), 128 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested'), 129 (r'\$', Name.Variable.Global, 'macro-global-name'), 130 (r'.', Name.Variable), # fallback 131 ], 132 'macro-global-nested': [ 133 (r'\$(\{|(?=[$`]))', Name.Variable.Global, '#push'), 134 (r'\}', Name.Variable.Global, '#pop'), 135 (r'\$', Name.Variable.Global, 'macro-global-name'), 136 (r'`', Name.Variable, 'macro-local'), 137 (r'\w', Name.Variable.Global), # fallback 138 default('#pop'), 139 ], 140 'macro-global-name': [ 141 (r'\$(\{|(?=[$`]))', Name.Variable.Global, 'macro-global-nested', '#pop'), 142 (r'\$', Name.Variable.Global, 'macro-global-name', '#pop'), 143 (r'`', Name.Variable, 'macro-local', '#pop'), 144 (r'\w{1,32}', Name.Variable.Global, '#pop'), 145 ], 146 # Built in functions and statements 147 'keywords': [ 148 (words(builtins_functions, prefix = r'\b', suffix = r'(?=\()'), 149 Name.Function), 150 (words(builtins_base, prefix = r'(^\s*|\s)', suffix = r'\b'), 151 Keyword), 152 ], 153 # http://www.stata.com/help.cgi?operators 154 'operators': [ 155 (r'-|==|<=|>=|<|>|&|!=', Operator), 156 (r'\*|\+|\^|/|!|~|==|~=', Operator) 157 ], 158 # Stata numbers 159 'numbers': [ 160 # decimal number 161 (r'\b[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[i]?\b', 162 Number), 163 ], 164 # Stata formats 165 'format': [ 166 (r'%-?\d{1,2}(\.\d{1,2})?[gfe]c?', Name.Other), 167 (r'%(21x|16H|16L|8H|8L)', Name.Other), 168 (r'%-?(tc|tC|td|tw|tm|tq|th|ty|tg)\S{0,32}', Name.Other), 169 (r'%[-~]?\d{1,4}s', Name.Other), 170 ] 171 } 172