1# -*- coding: utf-8 -*- 2""" 3 pygments.lexers.r 4 ~~~~~~~~~~~~~~~~~ 5 6 Lexers for the R/S languages. 7 8 :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. 9 :license: BSD, see LICENSE for details. 10""" 11 12import re 13 14from pygments.lexer import Lexer, RegexLexer, include, do_insertions, bygroups 15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 16 Number, Punctuation, Generic 17 18__all__ = ['RConsoleLexer', 'SLexer', 'RdLexer'] 19 20 21line_re = re.compile('.*?\n') 22 23 24class RConsoleLexer(Lexer): 25 """ 26 For R console transcripts or R CMD BATCH output files. 27 """ 28 29 name = 'RConsole' 30 aliases = ['rconsole', 'rout'] 31 filenames = ['*.Rout'] 32 33 def get_tokens_unprocessed(self, text): 34 slexer = SLexer(**self.options) 35 36 current_code_block = '' 37 insertions = [] 38 39 for match in line_re.finditer(text): 40 line = match.group() 41 if line.startswith('>') or line.startswith('+'): 42 # Colorize the prompt as such, 43 # then put rest of line into current_code_block 44 insertions.append((len(current_code_block), 45 [(0, Generic.Prompt, line[:2])])) 46 current_code_block += line[2:] 47 else: 48 # We have reached a non-prompt line! 49 # If we have stored prompt lines, need to process them first. 50 if current_code_block: 51 # Weave together the prompts and highlight code. 52 yield from do_insertions( 53 insertions, slexer.get_tokens_unprocessed(current_code_block)) 54 # Reset vars for next code block. 55 current_code_block = '' 56 insertions = [] 57 # Now process the actual line itself, this is output from R. 58 yield match.start(), Generic.Output, line 59 60 # If we happen to end on a code block with nothing after it, need to 61 # process the last code block. This is neither elegant nor DRY so 62 # should be changed. 63 if current_code_block: 64 yield from do_insertions( 65 insertions, slexer.get_tokens_unprocessed(current_code_block)) 66 67 68class SLexer(RegexLexer): 69 """ 70 For S, S-plus, and R source code. 71 72 .. versionadded:: 0.10 73 """ 74 75 name = 'S' 76 aliases = ['splus', 's', 'r'] 77 filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron'] 78 mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r', 79 'text/x-R', 'text/x-r-history', 'text/x-r-profile'] 80 81 valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.' 82 tokens = { 83 'comments': [ 84 (r'#.*$', Comment.Single), 85 ], 86 'valid_name': [ 87 (valid_name, Name), 88 ], 89 'punctuation': [ 90 (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation), 91 ], 92 'keywords': [ 93 (r'(if|else|for|while|repeat|in|next|break|return|switch|function)' 94 r'(?![\w.])', 95 Keyword.Reserved), 96 ], 97 'operators': [ 98 (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator), 99 (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator), 100 ], 101 'builtin_symbols': [ 102 (r'(NULL|NA(_(integer|real|complex|character)_)?|' 103 r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))' 104 r'(?![\w.])', 105 Keyword.Constant), 106 (r'(T|F)\b', Name.Builtin.Pseudo), 107 ], 108 'numbers': [ 109 # hex number 110 (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex), 111 # decimal number 112 (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?', 113 Number), 114 ], 115 'statements': [ 116 include('comments'), 117 # whitespaces 118 (r'\s+', Text), 119 (r'\'', String, 'string_squote'), 120 (r'\"', String, 'string_dquote'), 121 include('builtin_symbols'), 122 include('valid_name'), 123 include('numbers'), 124 include('keywords'), 125 include('punctuation'), 126 include('operators'), 127 ], 128 'root': [ 129 # calls: 130 (r'(%s)\s*(?=\()' % valid_name, Name.Function), 131 include('statements'), 132 # blocks: 133 (r'\{|\}', Punctuation), 134 # (r'\{', Punctuation, 'block'), 135 (r'.', Text), 136 ], 137 # 'block': [ 138 # include('statements'), 139 # ('\{', Punctuation, '#push'), 140 # ('\}', Punctuation, '#pop') 141 # ], 142 'string_squote': [ 143 (r'([^\'\\]|\\.)*\'', String, '#pop'), 144 ], 145 'string_dquote': [ 146 (r'([^"\\]|\\.)*"', String, '#pop'), 147 ], 148 } 149 150 def analyse_text(text): 151 if re.search(r'[a-z0-9_\])\s]<-(?!-)', text): 152 return 0.11 153 154 155class RdLexer(RegexLexer): 156 """ 157 Pygments Lexer for R documentation (Rd) files 158 159 This is a very minimal implementation, highlighting little more 160 than the macros. A description of Rd syntax is found in `Writing R 161 Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_ 162 and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_. 163 164 .. versionadded:: 1.6 165 """ 166 name = 'Rd' 167 aliases = ['rd'] 168 filenames = ['*.Rd'] 169 mimetypes = ['text/x-r-doc'] 170 171 # To account for verbatim / LaTeX-like / and R-like areas 172 # would require parsing. 173 tokens = { 174 'root': [ 175 # catch escaped brackets and percent sign 176 (r'\\[\\{}%]', String.Escape), 177 # comments 178 (r'%.*$', Comment), 179 # special macros with no arguments 180 (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant), 181 # macros 182 (r'\\[a-zA-Z]+\b', Keyword), 183 # special preprocessor macros 184 (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc), 185 # non-escaped brackets 186 (r'[{}]', Name.Builtin), 187 # everything else 188 (r'[^\\%\n{}]+', Text), 189 (r'.', Text), 190 ] 191 } 192