1""" 2 pygments.lexers.robotframework 3 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 5 Lexer for Robot Framework. 6 7 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. 8 :license: BSD, see LICENSE for details. 9""" 10 11# Copyright 2012 Nokia Siemens Networks Oyj 12# 13# Licensed under the Apache License, Version 2.0 (the "License"); 14# you may not use this file except in compliance with the License. 15# You may obtain a copy of the License at 16# 17# http://www.apache.org/licenses/LICENSE-2.0 18# 19# Unless required by applicable law or agreed to in writing, software 20# distributed under the License is distributed on an "AS IS" BASIS, 21# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22# See the License for the specific language governing permissions and 23# limitations under the License. 24 25import re 26 27from pygments.lexer import Lexer 28from pygments.token import Token 29 30__all__ = ['RobotFrameworkLexer'] 31 32 33HEADING = Token.Generic.Heading 34SETTING = Token.Keyword.Namespace 35IMPORT = Token.Name.Namespace 36TC_KW_NAME = Token.Generic.Subheading 37KEYWORD = Token.Name.Function 38ARGUMENT = Token.String 39VARIABLE = Token.Name.Variable 40COMMENT = Token.Comment 41SEPARATOR = Token.Punctuation 42SYNTAX = Token.Punctuation 43GHERKIN = Token.Generic.Emph 44ERROR = Token.Error 45 46 47def normalize(string, remove=''): 48 string = string.lower() 49 for char in remove + ' ': 50 if char in string: 51 string = string.replace(char, '') 52 return string 53 54 55class RobotFrameworkLexer(Lexer): 56 """ 57 For `Robot Framework <http://robotframework.org>`_ test data. 58 59 Supports both space and pipe separated plain text formats. 60 61 .. versionadded:: 1.6 62 """ 63 name = 'RobotFramework' 64 aliases = ['robotframework'] 65 filenames = ['*.robot'] 66 mimetypes = ['text/x-robotframework'] 67 68 def __init__(self, **options): 69 options['tabsize'] = 2 70 options['encoding'] = 'UTF-8' 71 Lexer.__init__(self, **options) 72 73 def get_tokens_unprocessed(self, text): 74 row_tokenizer = RowTokenizer() 75 var_tokenizer = VariableTokenizer() 76 index = 0 77 for row in text.splitlines(): 78 for value, token in row_tokenizer.tokenize(row): 79 for value, token in var_tokenizer.tokenize(value, token): 80 if value: 81 yield index, token, str(value) 82 index += len(value) 83 84 85class VariableTokenizer: 86 87 def tokenize(self, string, token): 88 var = VariableSplitter(string, identifiers='$@%&') 89 if var.start < 0 or token in (COMMENT, ERROR): 90 yield string, token 91 return 92 for value, token in self._tokenize(var, string, token): 93 if value: 94 yield value, token 95 96 def _tokenize(self, var, string, orig_token): 97 before = string[:var.start] 98 yield before, orig_token 99 yield var.identifier + '{', SYNTAX 100 yield from self.tokenize(var.base, VARIABLE) 101 yield '}', SYNTAX 102 if var.index: 103 yield '[', SYNTAX 104 yield from self.tokenize(var.index, VARIABLE) 105 yield ']', SYNTAX 106 yield from self.tokenize(string[var.end:], orig_token) 107 108 109class RowTokenizer: 110 111 def __init__(self): 112 self._table = UnknownTable() 113 self._splitter = RowSplitter() 114 testcases = TestCaseTable() 115 settings = SettingTable(testcases.set_default_template) 116 variables = VariableTable() 117 keywords = KeywordTable() 118 self._tables = {'settings': settings, 'setting': settings, 119 'metadata': settings, 120 'variables': variables, 'variable': variables, 121 'testcases': testcases, 'testcase': testcases, 122 'tasks': testcases, 'task': testcases, 123 'keywords': keywords, 'keyword': keywords, 124 'userkeywords': keywords, 'userkeyword': keywords} 125 126 def tokenize(self, row): 127 commented = False 128 heading = False 129 for index, value in enumerate(self._splitter.split(row)): 130 # First value, and every second after that, is a separator. 131 index, separator = divmod(index-1, 2) 132 if value.startswith('#'): 133 commented = True 134 elif index == 0 and value.startswith('*'): 135 self._table = self._start_table(value) 136 heading = True 137 yield from self._tokenize(value, index, commented, 138 separator, heading) 139 self._table.end_row() 140 141 def _start_table(self, header): 142 name = normalize(header, remove='*') 143 return self._tables.get(name, UnknownTable()) 144 145 def _tokenize(self, value, index, commented, separator, heading): 146 if commented: 147 yield value, COMMENT 148 elif separator: 149 yield value, SEPARATOR 150 elif heading: 151 yield value, HEADING 152 else: 153 yield from self._table.tokenize(value, index) 154 155 156class RowSplitter: 157 _space_splitter = re.compile('( {2,})') 158 _pipe_splitter = re.compile(r'((?:^| +)\|(?: +|$))') 159 160 def split(self, row): 161 splitter = (row.startswith('| ') and self._split_from_pipes 162 or self._split_from_spaces) 163 yield from splitter(row) 164 yield '\n' 165 166 def _split_from_spaces(self, row): 167 yield '' # Start with (pseudo)separator similarly as with pipes 168 yield from self._space_splitter.split(row) 169 170 def _split_from_pipes(self, row): 171 _, separator, rest = self._pipe_splitter.split(row, 1) 172 yield separator 173 while self._pipe_splitter.search(rest): 174 cell, separator, rest = self._pipe_splitter.split(rest, 1) 175 yield cell 176 yield separator 177 yield rest 178 179 180class Tokenizer: 181 _tokens = None 182 183 def __init__(self): 184 self._index = 0 185 186 def tokenize(self, value): 187 values_and_tokens = self._tokenize(value, self._index) 188 self._index += 1 189 if isinstance(values_and_tokens, type(Token)): 190 values_and_tokens = [(value, values_and_tokens)] 191 return values_and_tokens 192 193 def _tokenize(self, value, index): 194 index = min(index, len(self._tokens) - 1) 195 return self._tokens[index] 196 197 def _is_assign(self, value): 198 if value.endswith('='): 199 value = value[:-1].strip() 200 var = VariableSplitter(value, identifiers='$@&') 201 return var.start == 0 and var.end == len(value) 202 203 204class Comment(Tokenizer): 205 _tokens = (COMMENT,) 206 207 208class Setting(Tokenizer): 209 _tokens = (SETTING, ARGUMENT) 210 _keyword_settings = ('suitesetup', 'suiteprecondition', 'suiteteardown', 211 'suitepostcondition', 'testsetup', 'tasksetup', 'testprecondition', 212 'testteardown','taskteardown', 'testpostcondition', 'testtemplate', 'tasktemplate') 213 _import_settings = ('library', 'resource', 'variables') 214 _other_settings = ('documentation', 'metadata', 'forcetags', 'defaulttags', 215 'testtimeout','tasktimeout') 216 _custom_tokenizer = None 217 218 def __init__(self, template_setter=None): 219 Tokenizer.__init__(self) 220 self._template_setter = template_setter 221 222 def _tokenize(self, value, index): 223 if index == 1 and self._template_setter: 224 self._template_setter(value) 225 if index == 0: 226 normalized = normalize(value) 227 if normalized in self._keyword_settings: 228 self._custom_tokenizer = KeywordCall(support_assign=False) 229 elif normalized in self._import_settings: 230 self._custom_tokenizer = ImportSetting() 231 elif normalized not in self._other_settings: 232 return ERROR 233 elif self._custom_tokenizer: 234 return self._custom_tokenizer.tokenize(value) 235 return Tokenizer._tokenize(self, value, index) 236 237 238class ImportSetting(Tokenizer): 239 _tokens = (IMPORT, ARGUMENT) 240 241 242class TestCaseSetting(Setting): 243 _keyword_settings = ('setup', 'precondition', 'teardown', 'postcondition', 244 'template') 245 _import_settings = () 246 _other_settings = ('documentation', 'tags', 'timeout') 247 248 def _tokenize(self, value, index): 249 if index == 0: 250 type = Setting._tokenize(self, value[1:-1], index) 251 return [('[', SYNTAX), (value[1:-1], type), (']', SYNTAX)] 252 return Setting._tokenize(self, value, index) 253 254 255class KeywordSetting(TestCaseSetting): 256 _keyword_settings = ('teardown',) 257 _other_settings = ('documentation', 'arguments', 'return', 'timeout', 'tags') 258 259 260class Variable(Tokenizer): 261 _tokens = (SYNTAX, ARGUMENT) 262 263 def _tokenize(self, value, index): 264 if index == 0 and not self._is_assign(value): 265 return ERROR 266 return Tokenizer._tokenize(self, value, index) 267 268 269class KeywordCall(Tokenizer): 270 _tokens = (KEYWORD, ARGUMENT) 271 272 def __init__(self, support_assign=True): 273 Tokenizer.__init__(self) 274 self._keyword_found = not support_assign 275 self._assigns = 0 276 277 def _tokenize(self, value, index): 278 if not self._keyword_found and self._is_assign(value): 279 self._assigns += 1 280 return SYNTAX # VariableTokenizer tokenizes this later. 281 if self._keyword_found: 282 return Tokenizer._tokenize(self, value, index - self._assigns) 283 self._keyword_found = True 284 return GherkinTokenizer().tokenize(value, KEYWORD) 285 286 287class GherkinTokenizer: 288 _gherkin_prefix = re.compile('^(Given|When|Then|And) ', re.IGNORECASE) 289 290 def tokenize(self, value, token): 291 match = self._gherkin_prefix.match(value) 292 if not match: 293 return [(value, token)] 294 end = match.end() 295 return [(value[:end], GHERKIN), (value[end:], token)] 296 297 298class TemplatedKeywordCall(Tokenizer): 299 _tokens = (ARGUMENT,) 300 301 302class ForLoop(Tokenizer): 303 304 def __init__(self): 305 Tokenizer.__init__(self) 306 self._in_arguments = False 307 308 def _tokenize(self, value, index): 309 token = self._in_arguments and ARGUMENT or SYNTAX 310 if value.upper() in ('IN', 'IN RANGE'): 311 self._in_arguments = True 312 return token 313 314 315class _Table: 316 _tokenizer_class = None 317 318 def __init__(self, prev_tokenizer=None): 319 self._tokenizer = self._tokenizer_class() 320 self._prev_tokenizer = prev_tokenizer 321 self._prev_values_on_row = [] 322 323 def tokenize(self, value, index): 324 if self._continues(value, index): 325 self._tokenizer = self._prev_tokenizer 326 yield value, SYNTAX 327 else: 328 yield from self._tokenize(value, index) 329 self._prev_values_on_row.append(value) 330 331 def _continues(self, value, index): 332 return value == '...' and all(self._is_empty(t) 333 for t in self._prev_values_on_row) 334 335 def _is_empty(self, value): 336 return value in ('', '\\') 337 338 def _tokenize(self, value, index): 339 return self._tokenizer.tokenize(value) 340 341 def end_row(self): 342 self.__init__(prev_tokenizer=self._tokenizer) 343 344 345class UnknownTable(_Table): 346 _tokenizer_class = Comment 347 348 def _continues(self, value, index): 349 return False 350 351 352class VariableTable(_Table): 353 _tokenizer_class = Variable 354 355 356class SettingTable(_Table): 357 _tokenizer_class = Setting 358 359 def __init__(self, template_setter, prev_tokenizer=None): 360 _Table.__init__(self, prev_tokenizer) 361 self._template_setter = template_setter 362 363 def _tokenize(self, value, index): 364 if index == 0 and normalize(value) == 'testtemplate': 365 self._tokenizer = Setting(self._template_setter) 366 return _Table._tokenize(self, value, index) 367 368 def end_row(self): 369 self.__init__(self._template_setter, prev_tokenizer=self._tokenizer) 370 371 372class TestCaseTable(_Table): 373 _setting_class = TestCaseSetting 374 _test_template = None 375 _default_template = None 376 377 @property 378 def _tokenizer_class(self): 379 if self._test_template or (self._default_template and 380 self._test_template is not False): 381 return TemplatedKeywordCall 382 return KeywordCall 383 384 def _continues(self, value, index): 385 return index > 0 and _Table._continues(self, value, index) 386 387 def _tokenize(self, value, index): 388 if index == 0: 389 if value: 390 self._test_template = None 391 return GherkinTokenizer().tokenize(value, TC_KW_NAME) 392 if index == 1 and self._is_setting(value): 393 if self._is_template(value): 394 self._test_template = False 395 self._tokenizer = self._setting_class(self.set_test_template) 396 else: 397 self._tokenizer = self._setting_class() 398 if index == 1 and self._is_for_loop(value): 399 self._tokenizer = ForLoop() 400 if index == 1 and self._is_empty(value): 401 return [(value, SYNTAX)] 402 return _Table._tokenize(self, value, index) 403 404 def _is_setting(self, value): 405 return value.startswith('[') and value.endswith(']') 406 407 def _is_template(self, value): 408 return normalize(value) == '[template]' 409 410 def _is_for_loop(self, value): 411 return value.startswith(':') and normalize(value, remove=':') == 'for' 412 413 def set_test_template(self, template): 414 self._test_template = self._is_template_set(template) 415 416 def set_default_template(self, template): 417 self._default_template = self._is_template_set(template) 418 419 def _is_template_set(self, template): 420 return normalize(template) not in ('', '\\', 'none', '${empty}') 421 422 423class KeywordTable(TestCaseTable): 424 _tokenizer_class = KeywordCall 425 _setting_class = KeywordSetting 426 427 def _is_template(self, value): 428 return False 429 430 431# Following code copied directly from Robot Framework 2.7.5. 432 433class VariableSplitter: 434 435 def __init__(self, string, identifiers): 436 self.identifier = None 437 self.base = None 438 self.index = None 439 self.start = -1 440 self.end = -1 441 self._identifiers = identifiers 442 self._may_have_internal_variables = False 443 try: 444 self._split(string) 445 except ValueError: 446 pass 447 else: 448 self._finalize() 449 450 def get_replaced_base(self, variables): 451 if self._may_have_internal_variables: 452 return variables.replace_string(self.base) 453 return self.base 454 455 def _finalize(self): 456 self.identifier = self._variable_chars[0] 457 self.base = ''.join(self._variable_chars[2:-1]) 458 self.end = self.start + len(self._variable_chars) 459 if self._has_list_or_dict_variable_index(): 460 self.index = ''.join(self._list_and_dict_variable_index_chars[1:-1]) 461 self.end += len(self._list_and_dict_variable_index_chars) 462 463 def _has_list_or_dict_variable_index(self): 464 return self._list_and_dict_variable_index_chars\ 465 and self._list_and_dict_variable_index_chars[-1] == ']' 466 467 def _split(self, string): 468 start_index, max_index = self._find_variable(string) 469 self.start = start_index 470 self._open_curly = 1 471 self._state = self._variable_state 472 self._variable_chars = [string[start_index], '{'] 473 self._list_and_dict_variable_index_chars = [] 474 self._string = string 475 start_index += 2 476 for index, char in enumerate(string[start_index:]): 477 index += start_index # Giving start to enumerate only in Py 2.6+ 478 try: 479 self._state(char, index) 480 except StopIteration: 481 return 482 if index == max_index and not self._scanning_list_variable_index(): 483 return 484 485 def _scanning_list_variable_index(self): 486 return self._state in [self._waiting_list_variable_index_state, 487 self._list_variable_index_state] 488 489 def _find_variable(self, string): 490 max_end_index = string.rfind('}') 491 if max_end_index == -1: 492 raise ValueError('No variable end found') 493 if self._is_escaped(string, max_end_index): 494 return self._find_variable(string[:max_end_index]) 495 start_index = self._find_start_index(string, 1, max_end_index) 496 if start_index == -1: 497 raise ValueError('No variable start found') 498 return start_index, max_end_index 499 500 def _find_start_index(self, string, start, end): 501 index = string.find('{', start, end) - 1 502 if index < 0: 503 return -1 504 if self._start_index_is_ok(string, index): 505 return index 506 return self._find_start_index(string, index+2, end) 507 508 def _start_index_is_ok(self, string, index): 509 return string[index] in self._identifiers\ 510 and not self._is_escaped(string, index) 511 512 def _is_escaped(self, string, index): 513 escaped = False 514 while index > 0 and string[index-1] == '\\': 515 index -= 1 516 escaped = not escaped 517 return escaped 518 519 def _variable_state(self, char, index): 520 self._variable_chars.append(char) 521 if char == '}' and not self._is_escaped(self._string, index): 522 self._open_curly -= 1 523 if self._open_curly == 0: 524 if not self._is_list_or_dict_variable(): 525 raise StopIteration 526 self._state = self._waiting_list_variable_index_state 527 elif char in self._identifiers: 528 self._state = self._internal_variable_start_state 529 530 def _is_list_or_dict_variable(self): 531 return self._variable_chars[0] in ('@','&') 532 533 def _internal_variable_start_state(self, char, index): 534 self._state = self._variable_state 535 if char == '{': 536 self._variable_chars.append(char) 537 self._open_curly += 1 538 self._may_have_internal_variables = True 539 else: 540 self._variable_state(char, index) 541 542 def _waiting_list_variable_index_state(self, char, index): 543 if char != '[': 544 raise StopIteration 545 self._list_and_dict_variable_index_chars.append(char) 546 self._state = self._list_variable_index_state 547 548 def _list_variable_index_state(self, char, index): 549 self._list_and_dict_variable_index_chars.append(char) 550 if char == ']': 551 raise StopIteration 552