1"""
2    pygments.lexers.robotframework
3    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
5    Lexer for Robot Framework.
6
7    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11#  Copyright 2012 Nokia Siemens Networks Oyj
12#
13#  Licensed under the Apache License, Version 2.0 (the "License");
14#  you may not use this file except in compliance with the License.
15#  You may obtain a copy of the License at
16#
17#      http://www.apache.org/licenses/LICENSE-2.0
18#
19#  Unless required by applicable law or agreed to in writing, software
20#  distributed under the License is distributed on an "AS IS" BASIS,
21#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22#  See the License for the specific language governing permissions and
23#  limitations under the License.
24
25import re
26
27from pygments.lexer import Lexer
28from pygments.token import Token
29
30__all__ = ['RobotFrameworkLexer']
31
32
33HEADING = Token.Generic.Heading
34SETTING = Token.Keyword.Namespace
35IMPORT = Token.Name.Namespace
36TC_KW_NAME = Token.Generic.Subheading
37KEYWORD = Token.Name.Function
38ARGUMENT = Token.String
39VARIABLE = Token.Name.Variable
40COMMENT = Token.Comment
41SEPARATOR = Token.Punctuation
42SYNTAX = Token.Punctuation
43GHERKIN = Token.Generic.Emph
44ERROR = Token.Error
45
46
47def normalize(string, remove=''):
48    string = string.lower()
49    for char in remove + ' ':
50        if char in string:
51            string = string.replace(char, '')
52    return string
53
54
55class RobotFrameworkLexer(Lexer):
56    """
57    For `Robot Framework <http://robotframework.org>`_ test data.
58
59    Supports both space and pipe separated plain text formats.
60
61    .. versionadded:: 1.6
62    """
63    name = 'RobotFramework'
64    aliases = ['robotframework']
65    filenames = ['*.robot']
66    mimetypes = ['text/x-robotframework']
67
68    def __init__(self, **options):
69        options['tabsize'] = 2
70        options['encoding'] = 'UTF-8'
71        Lexer.__init__(self, **options)
72
73    def get_tokens_unprocessed(self, text):
74        row_tokenizer = RowTokenizer()
75        var_tokenizer = VariableTokenizer()
76        index = 0
77        for row in text.splitlines():
78            for value, token in row_tokenizer.tokenize(row):
79                for value, token in var_tokenizer.tokenize(value, token):
80                    if value:
81                        yield index, token, str(value)
82                        index += len(value)
83
84
85class VariableTokenizer:
86
87    def tokenize(self, string, token):
88        var = VariableSplitter(string, identifiers='$@%&')
89        if var.start < 0 or token in (COMMENT, ERROR):
90            yield string, token
91            return
92        for value, token in self._tokenize(var, string, token):
93            if value:
94                yield value, token
95
96    def _tokenize(self, var, string, orig_token):
97        before = string[:var.start]
98        yield before, orig_token
99        yield var.identifier + '{', SYNTAX
100        yield from self.tokenize(var.base, VARIABLE)
101        yield '}', SYNTAX
102        if var.index:
103            yield '[', SYNTAX
104            yield from self.tokenize(var.index, VARIABLE)
105            yield ']', SYNTAX
106        yield from self.tokenize(string[var.end:], orig_token)
107
108
109class RowTokenizer:
110
111    def __init__(self):
112        self._table = UnknownTable()
113        self._splitter = RowSplitter()
114        testcases = TestCaseTable()
115        settings = SettingTable(testcases.set_default_template)
116        variables = VariableTable()
117        keywords = KeywordTable()
118        self._tables = {'settings': settings, 'setting': settings,
119                        'metadata': settings,
120                        'variables': variables, 'variable': variables,
121                        'testcases': testcases, 'testcase': testcases,
122                        'tasks': testcases, 'task': testcases,
123                        'keywords': keywords, 'keyword': keywords,
124                        'userkeywords': keywords, 'userkeyword': keywords}
125
126    def tokenize(self, row):
127        commented = False
128        heading = False
129        for index, value in enumerate(self._splitter.split(row)):
130            # First value, and every second after that, is a separator.
131            index, separator = divmod(index-1, 2)
132            if value.startswith('#'):
133                commented = True
134            elif index == 0 and value.startswith('*'):
135                self._table = self._start_table(value)
136                heading = True
137            yield from self._tokenize(value, index, commented,
138                                      separator, heading)
139        self._table.end_row()
140
141    def _start_table(self, header):
142        name = normalize(header, remove='*')
143        return self._tables.get(name, UnknownTable())
144
145    def _tokenize(self, value, index, commented, separator, heading):
146        if commented:
147            yield value, COMMENT
148        elif separator:
149            yield value, SEPARATOR
150        elif heading:
151            yield value, HEADING
152        else:
153            yield from self._table.tokenize(value, index)
154
155
156class RowSplitter:
157    _space_splitter = re.compile('( {2,})')
158    _pipe_splitter = re.compile(r'((?:^| +)\|(?: +|$))')
159
160    def split(self, row):
161        splitter = (row.startswith('| ') and self._split_from_pipes
162                    or self._split_from_spaces)
163        yield from splitter(row)
164        yield '\n'
165
166    def _split_from_spaces(self, row):
167        yield ''  # Start with (pseudo)separator similarly as with pipes
168        yield from self._space_splitter.split(row)
169
170    def _split_from_pipes(self, row):
171        _, separator, rest = self._pipe_splitter.split(row, 1)
172        yield separator
173        while self._pipe_splitter.search(rest):
174            cell, separator, rest = self._pipe_splitter.split(rest, 1)
175            yield cell
176            yield separator
177        yield rest
178
179
180class Tokenizer:
181    _tokens = None
182
183    def __init__(self):
184        self._index = 0
185
186    def tokenize(self, value):
187        values_and_tokens = self._tokenize(value, self._index)
188        self._index += 1
189        if isinstance(values_and_tokens, type(Token)):
190            values_and_tokens = [(value, values_and_tokens)]
191        return values_and_tokens
192
193    def _tokenize(self, value, index):
194        index = min(index, len(self._tokens) - 1)
195        return self._tokens[index]
196
197    def _is_assign(self, value):
198        if value.endswith('='):
199            value = value[:-1].strip()
200        var = VariableSplitter(value, identifiers='$@&')
201        return var.start == 0 and var.end == len(value)
202
203
204class Comment(Tokenizer):
205    _tokens = (COMMENT,)
206
207
208class Setting(Tokenizer):
209    _tokens = (SETTING, ARGUMENT)
210    _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown',
211                         'suitepostcondition', 'testsetup', 'tasksetup', 'testprecondition',
212                         'testteardown','taskteardown', 'testpostcondition', 'testtemplate', 'tasktemplate')
213    _import_settings = ('library', 'resource', 'variables')
214    _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags',
215                       'testtimeout','tasktimeout')
216    _custom_tokenizer = None
217
218    def __init__(self, template_setter=None):
219        Tokenizer.__init__(self)
220        self._template_setter = template_setter
221
222    def _tokenize(self, value, index):
223        if index == 1 and self._template_setter:
224            self._template_setter(value)
225        if index == 0:
226            normalized = normalize(value)
227            if normalized in self._keyword_settings:
228                self._custom_tokenizer = KeywordCall(support_assign=False)
229            elif normalized in self._import_settings:
230                self._custom_tokenizer = ImportSetting()
231            elif normalized not in self._other_settings:
232                return ERROR
233        elif self._custom_tokenizer:
234            return self._custom_tokenizer.tokenize(value)
235        return Tokenizer._tokenize(self, value, index)
236
237
238class ImportSetting(Tokenizer):
239    _tokens = (IMPORT, ARGUMENT)
240
241
242class TestCaseSetting(Setting):
243    _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition',
244                         'template')
245    _import_settings = ()
246    _other_settings = ('documentation', 'tags', 'timeout')
247
248    def _tokenize(self, value, index):
249        if index == 0:
250            type = Setting._tokenize(self, value[1:-1], index)
251            return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)]
252        return Setting._tokenize(self, value, index)
253
254
255class KeywordSetting(TestCaseSetting):
256    _keyword_settings = ('teardown',)
257    _other_settings = ('documentation', 'arguments', 'return', 'timeout', 'tags')
258
259
260class Variable(Tokenizer):
261    _tokens = (SYNTAX, ARGUMENT)
262
263    def _tokenize(self, value, index):
264        if index == 0 and not self._is_assign(value):
265            return ERROR
266        return Tokenizer._tokenize(self, value, index)
267
268
269class KeywordCall(Tokenizer):
270    _tokens = (KEYWORD, ARGUMENT)
271
272    def __init__(self, support_assign=True):
273        Tokenizer.__init__(self)
274        self._keyword_found = not support_assign
275        self._assigns = 0
276
277    def _tokenize(self, value, index):
278        if not self._keyword_found and self._is_assign(value):
279            self._assigns += 1
280            return SYNTAX  # VariableTokenizer tokenizes this later.
281        if self._keyword_found:
282            return Tokenizer._tokenize(self, value, index - self._assigns)
283        self._keyword_found = True
284        return GherkinTokenizer().tokenize(value, KEYWORD)
285
286
287class GherkinTokenizer:
288    _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE)
289
290    def tokenize(self, value, token):
291        match = self._gherkin_prefix.match(value)
292        if not match:
293            return [(value, token)]
294        end = match.end()
295        return [(value[:end], GHERKIN), (value[end:], token)]
296
297
298class TemplatedKeywordCall(Tokenizer):
299    _tokens = (ARGUMENT,)
300
301
302class ForLoop(Tokenizer):
303
304    def __init__(self):
305        Tokenizer.__init__(self)
306        self._in_arguments = False
307
308    def _tokenize(self, value, index):
309        token = self._in_arguments and ARGUMENT or SYNTAX
310        if value.upper() in ('IN', 'IN RANGE'):
311            self._in_arguments = True
312        return token
313
314
315class _Table:
316    _tokenizer_class = None
317
318    def __init__(self, prev_tokenizer=None):
319        self._tokenizer = self._tokenizer_class()
320        self._prev_tokenizer = prev_tokenizer
321        self._prev_values_on_row = []
322
323    def tokenize(self, value, index):
324        if self._continues(value, index):
325            self._tokenizer = self._prev_tokenizer
326            yield value, SYNTAX
327        else:
328            yield from self._tokenize(value, index)
329        self._prev_values_on_row.append(value)
330
331    def _continues(self, value, index):
332        return value == '...' and all(self._is_empty(t)
333                                      for t in self._prev_values_on_row)
334
335    def _is_empty(self, value):
336        return value in ('', '\\')
337
338    def _tokenize(self, value, index):
339        return self._tokenizer.tokenize(value)
340
341    def end_row(self):
342        self.__init__(prev_tokenizer=self._tokenizer)
343
344
345class UnknownTable(_Table):
346    _tokenizer_class = Comment
347
348    def _continues(self, value, index):
349        return False
350
351
352class VariableTable(_Table):
353    _tokenizer_class = Variable
354
355
356class SettingTable(_Table):
357    _tokenizer_class = Setting
358
359    def __init__(self, template_setter, prev_tokenizer=None):
360        _Table.__init__(self, prev_tokenizer)
361        self._template_setter = template_setter
362
363    def _tokenize(self, value, index):
364        if index == 0 and normalize(value) == 'testtemplate':
365            self._tokenizer = Setting(self._template_setter)
366        return _Table._tokenize(self, value, index)
367
368    def end_row(self):
369        self.__init__(self._template_setter, prev_tokenizer=self._tokenizer)
370
371
372class TestCaseTable(_Table):
373    _setting_class = TestCaseSetting
374    _test_template = None
375    _default_template = None
376
377    @property
378    def _tokenizer_class(self):
379        if self._test_template or (self._default_template and
380                                   self._test_template is not False):
381            return TemplatedKeywordCall
382        return KeywordCall
383
384    def _continues(self, value, index):
385        return index > 0 and _Table._continues(self, value, index)
386
387    def _tokenize(self, value, index):
388        if index == 0:
389            if value:
390                self._test_template = None
391            return GherkinTokenizer().tokenize(value, TC_KW_NAME)
392        if index == 1 and self._is_setting(value):
393            if self._is_template(value):
394                self._test_template = False
395                self._tokenizer = self._setting_class(self.set_test_template)
396            else:
397                self._tokenizer = self._setting_class()
398        if index == 1 and self._is_for_loop(value):
399            self._tokenizer = ForLoop()
400        if index == 1 and self._is_empty(value):
401            return [(value, SYNTAX)]
402        return _Table._tokenize(self, value, index)
403
404    def _is_setting(self, value):
405        return value.startswith('[') and value.endswith(']')
406
407    def _is_template(self, value):
408        return normalize(value) == '[template]'
409
410    def _is_for_loop(self, value):
411        return value.startswith(':') and normalize(value, remove=':') == 'for'
412
413    def set_test_template(self, template):
414        self._test_template = self._is_template_set(template)
415
416    def set_default_template(self, template):
417        self._default_template = self._is_template_set(template)
418
419    def _is_template_set(self, template):
420        return normalize(template) not in ('', '\\', 'none', '${empty}')
421
422
423class KeywordTable(TestCaseTable):
424    _tokenizer_class = KeywordCall
425    _setting_class = KeywordSetting
426
427    def _is_template(self, value):
428        return False
429
430
431# Following code copied directly from Robot Framework 2.7.5.
432
433class VariableSplitter:
434
435    def __init__(self, string, identifiers):
436        self.identifier = None
437        self.base = None
438        self.index = None
439        self.start = -1
440        self.end = -1
441        self._identifiers = identifiers
442        self._may_have_internal_variables = False
443        try:
444            self._split(string)
445        except ValueError:
446            pass
447        else:
448            self._finalize()
449
450    def get_replaced_base(self, variables):
451        if self._may_have_internal_variables:
452            return variables.replace_string(self.base)
453        return self.base
454
455    def _finalize(self):
456        self.identifier = self._variable_chars[0]
457        self.base = ''.join(self._variable_chars[2:-1])
458        self.end = self.start + len(self._variable_chars)
459        if self._has_list_or_dict_variable_index():
460            self.index = ''.join(self._list_and_dict_variable_index_chars[1:-1])
461            self.end += len(self._list_and_dict_variable_index_chars)
462
463    def _has_list_or_dict_variable_index(self):
464        return self._list_and_dict_variable_index_chars\
465        and self._list_and_dict_variable_index_chars[-1] == ']'
466
467    def _split(self, string):
468        start_index, max_index = self._find_variable(string)
469        self.start = start_index
470        self._open_curly = 1
471        self._state = self._variable_state
472        self._variable_chars = [string[start_index], '{']
473        self._list_and_dict_variable_index_chars = []
474        self._string = string
475        start_index += 2
476        for index, char in enumerate(string[start_index:]):
477            index += start_index  # Giving start to enumerate only in Py 2.6+
478            try:
479                self._state(char, index)
480            except StopIteration:
481                return
482            if index  == max_index and not self._scanning_list_variable_index():
483                return
484
485    def _scanning_list_variable_index(self):
486        return self._state in [self._waiting_list_variable_index_state,
487                               self._list_variable_index_state]
488
489    def _find_variable(self, string):
490        max_end_index = string.rfind('}')
491        if max_end_index == -1:
492            raise ValueError('No variable end found')
493        if self._is_escaped(string, max_end_index):
494            return self._find_variable(string[:max_end_index])
495        start_index = self._find_start_index(string, 1, max_end_index)
496        if start_index == -1:
497            raise ValueError('No variable start found')
498        return start_index, max_end_index
499
500    def _find_start_index(self, string, start, end):
501        index = string.find('{', start, end) - 1
502        if index < 0:
503            return -1
504        if self._start_index_is_ok(string, index):
505            return index
506        return self._find_start_index(string, index+2, end)
507
508    def _start_index_is_ok(self, string, index):
509        return string[index] in self._identifiers\
510        and not self._is_escaped(string, index)
511
512    def _is_escaped(self, string, index):
513        escaped = False
514        while index > 0 and string[index-1] == '\\':
515            index -= 1
516            escaped = not escaped
517        return escaped
518
519    def _variable_state(self, char, index):
520        self._variable_chars.append(char)
521        if char == '}' and not self._is_escaped(self._string, index):
522            self._open_curly -= 1
523            if self._open_curly == 0:
524                if not self._is_list_or_dict_variable():
525                    raise StopIteration
526                self._state = self._waiting_list_variable_index_state
527        elif char in self._identifiers:
528            self._state = self._internal_variable_start_state
529
530    def _is_list_or_dict_variable(self):
531        return self._variable_chars[0] in ('@','&')
532
533    def _internal_variable_start_state(self, char, index):
534        self._state = self._variable_state
535        if char == '{':
536            self._variable_chars.append(char)
537            self._open_curly += 1
538            self._may_have_internal_variables = True
539        else:
540            self._variable_state(char, index)
541
542    def _waiting_list_variable_index_state(self, char, index):
543        if char != '[':
544            raise StopIteration
545        self._list_and_dict_variable_index_chars.append(char)
546        self._state = self._list_variable_index_state
547
548    def _list_variable_index_state(self, char, index):
549        self._list_and_dict_variable_index_chars.append(char)
550        if char == ']':
551            raise StopIteration
552