1# -*- coding: utf-8 -*-
2"""
3    pygments.lexers.r
4    ~~~~~~~~~~~~~~~~~
5
6    Lexers for the R/S languages.
7
8    :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
9    :license: BSD, see LICENSE for details.
10"""
11
12import re
13
14from pygments.lexer import Lexer, RegexLexer, include, do_insertions, bygroups
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16    Number, Punctuation, Generic
17
18__all__ = ['RConsoleLexer', 'SLexer', 'RdLexer']
19
20
21line_re  = re.compile('.*?\n')
22
23
24class RConsoleLexer(Lexer):
25    """
26    For R console transcripts or R CMD BATCH output files.
27    """
28
29    name = 'RConsole'
30    aliases = ['rconsole', 'rout']
31    filenames = ['*.Rout']
32
33    def get_tokens_unprocessed(self, text):
34        slexer = SLexer(**self.options)
35
36        current_code_block = ''
37        insertions = []
38
39        for match in line_re.finditer(text):
40            line = match.group()
41            if line.startswith('>') or line.startswith('+'):
42                # Colorize the prompt as such,
43                # then put rest of line into current_code_block
44                insertions.append((len(current_code_block),
45                                   [(0, Generic.Prompt, line[:2])]))
46                current_code_block += line[2:]
47            else:
48                # We have reached a non-prompt line!
49                # If we have stored prompt lines, need to process them first.
50                if current_code_block:
51                    # Weave together the prompts and highlight code.
52                    yield from do_insertions(
53                        insertions, slexer.get_tokens_unprocessed(current_code_block))
54                    # Reset vars for next code block.
55                    current_code_block = ''
56                    insertions = []
57                # Now process the actual line itself, this is output from R.
58                yield match.start(), Generic.Output, line
59
60        # If we happen to end on a code block with nothing after it, need to
61        # process the last code block. This is neither elegant nor DRY so
62        # should be changed.
63        if current_code_block:
64            yield from do_insertions(
65                insertions, slexer.get_tokens_unprocessed(current_code_block))
66
67
68class SLexer(RegexLexer):
69    """
70    For S, S-plus, and R source code.
71
72    .. versionadded:: 0.10
73    """
74
75    name = 'S'
76    aliases = ['splus', 's', 'r']
77    filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron']
78    mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r',
79                 'text/x-R', 'text/x-r-history', 'text/x-r-profile']
80
81    valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.'
82    tokens = {
83        'comments': [
84            (r'#.*$', Comment.Single),
85        ],
86        'valid_name': [
87            (valid_name, Name),
88        ],
89        'punctuation': [
90            (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation),
91        ],
92        'keywords': [
93            (r'(if|else|for|while|repeat|in|next|break|return|switch|function)'
94             r'(?![\w.])',
95             Keyword.Reserved),
96        ],
97        'operators': [
98            (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator),
99            (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator),
100        ],
101        'builtin_symbols': [
102            (r'(NULL|NA(_(integer|real|complex|character)_)?|'
103             r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))'
104             r'(?![\w.])',
105             Keyword.Constant),
106            (r'(T|F)\b', Name.Builtin.Pseudo),
107        ],
108        'numbers': [
109            # hex number
110            (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex),
111            # decimal number
112            (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?',
113             Number),
114        ],
115        'statements': [
116            include('comments'),
117            # whitespaces
118            (r'\s+', Text),
119            (r'\'', String, 'string_squote'),
120            (r'\"', String, 'string_dquote'),
121            include('builtin_symbols'),
122            include('valid_name'),
123            include('numbers'),
124            include('keywords'),
125            include('punctuation'),
126            include('operators'),
127        ],
128        'root': [
129            # calls:
130            (r'(%s)\s*(?=\()' % valid_name, Name.Function),
131            include('statements'),
132            # blocks:
133            (r'\{|\}', Punctuation),
134            # (r'\{', Punctuation, 'block'),
135            (r'.', Text),
136        ],
137        # 'block': [
138        #    include('statements'),
139        #    ('\{', Punctuation, '#push'),
140        #    ('\}', Punctuation, '#pop')
141        # ],
142        'string_squote': [
143            (r'([^\'\\]|\\.)*\'', String, '#pop'),
144        ],
145        'string_dquote': [
146            (r'([^"\\]|\\.)*"', String, '#pop'),
147        ],
148    }
149
150    def analyse_text(text):
151        if re.search(r'[a-z0-9_\])\s]<-(?!-)', text):
152            return 0.11
153
154
155class RdLexer(RegexLexer):
156    """
157    Pygments Lexer for R documentation (Rd) files
158
159    This is a very minimal implementation, highlighting little more
160    than the macros. A description of Rd syntax is found in `Writing R
161    Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_
162    and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_.
163
164    .. versionadded:: 1.6
165    """
166    name = 'Rd'
167    aliases = ['rd']
168    filenames = ['*.Rd']
169    mimetypes = ['text/x-r-doc']
170
171    # To account for verbatim / LaTeX-like / and R-like areas
172    # would require parsing.
173    tokens = {
174        'root': [
175            # catch escaped brackets and percent sign
176            (r'\\[\\{}%]', String.Escape),
177            # comments
178            (r'%.*$', Comment),
179            # special macros with no arguments
180            (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant),
181            # macros
182            (r'\\[a-zA-Z]+\b', Keyword),
183            # special preprocessor macros
184            (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc),
185            # non-escaped brackets
186            (r'[{}]', Name.Builtin),
187            # everything else
188            (r'[^\\%\n{}]+', Text),
189            (r'.', Text),
190        ]
191    }
192