1# 2# -*- coding: utf-8 -*- 3"""Statement parsing classes for cmd2""" 4 5import re 6import shlex 7from typing import ( 8 Any, 9 Dict, 10 Iterable, 11 List, 12 Optional, 13 Tuple, 14 Union, 15) 16 17import attr 18 19from . import ( 20 constants, 21 utils, 22) 23from .exceptions import ( 24 Cmd2ShlexError, 25) 26 27 28def shlex_split(str_to_split: str) -> List[str]: 29 """ 30 A wrapper around shlex.split() that uses cmd2's preferred arguments. 31 This allows other classes to easily call split() the same way StatementParser does. 32 33 :param str_to_split: the string being split 34 :return: A list of tokens 35 """ 36 return shlex.split(str_to_split, comments=False, posix=False) 37 38 39@attr.s(auto_attribs=True, frozen=True) 40class MacroArg: 41 """ 42 Information used to replace or unescape arguments in a macro value when the macro is resolved 43 Normal argument syntax: {5} 44 Escaped argument syntax: {{5}} 45 """ 46 47 # The starting index of this argument in the macro value 48 start_index: int = attr.ib(validator=attr.validators.instance_of(int)) 49 50 # The number string that appears between the braces 51 # This is a string instead of an int because we support unicode digits and must be able 52 # to reproduce this string later 53 number_str: str = attr.ib(validator=attr.validators.instance_of(str)) 54 55 # Tells if this argument is escaped and therefore needs to be unescaped 56 is_escaped: bool = attr.ib(validator=attr.validators.instance_of(bool)) 57 58 # Pattern used to find normal argument 59 # Digits surrounded by exactly 1 brace on a side and 1 or more braces on the opposite side 60 # Match strings like: {5}, {{{{{4}, {2}}}}} 61 macro_normal_arg_pattern = re.compile(r'(?<!{){\d+}|{\d+}(?!})') 62 63 # Pattern used to find escaped arguments 64 # Digits surrounded by 2 or more braces on both sides 65 # Match strings like: {{5}}, {{{{{4}}, {{2}}}}} 66 macro_escaped_arg_pattern = re.compile(r'{{2}\d+}{2}') 67 68 # Finds a string of digits 69 digit_pattern = re.compile(r'\d+') 70 71 72@attr.s(auto_attribs=True, frozen=True) 73class Macro: 74 """Defines a cmd2 macro""" 75 76 # Name of the macro 77 name: str = attr.ib(validator=attr.validators.instance_of(str)) 78 79 # The string the macro resolves to 80 value: str = attr.ib(validator=attr.validators.instance_of(str)) 81 82 # The minimum number of args the user has to pass to this macro 83 minimum_arg_count: int = attr.ib(validator=attr.validators.instance_of(int)) 84 85 # Used to fill in argument placeholders in the macro 86 arg_list: List[MacroArg] = attr.ib(default=attr.Factory(list), validator=attr.validators.instance_of(list)) 87 88 89@attr.s(auto_attribs=True, frozen=True) 90class Statement(str): # type: ignore[override] 91 """String subclass with additional attributes to store the results of parsing. 92 93 The ``cmd`` module in the standard library passes commands around as a 94 string. To retain backwards compatibility, ``cmd2`` does the same. However, 95 we need a place to capture the additional output of the command parsing, so 96 we add our own attributes to this subclass. 97 98 Instances of this class should not be created by anything other than the 99 :meth:`cmd2.parsing.StatementParser.parse` method, nor should any of the 100 attributes be modified once the object is created. 101 102 The string portion of the class contains the arguments, but not the 103 command, nor the output redirection clauses. 104 105 Tips: 106 107 1. `argparse <https://docs.python.org/3/library/argparse.html>`_ is your 108 friend for anything complex. ``cmd2`` has the decorator 109 (:func:`~cmd2.decorators.with_argparser`) which you can 110 use to make your command method receive a namespace of parsed arguments, 111 whether positional or denoted with switches. 112 113 2. For commands with simple positional arguments, use 114 :attr:`~cmd2.Statement.args` or :attr:`~cmd2.Statement.arg_list` 115 116 3. If you don't want to have to worry about quoted arguments, see 117 :attr:`argv` for a trick which strips quotes off for you. 118 """ 119 120 # the arguments, but not the command, nor the output redirection clauses. 121 args: str = attr.ib(default='', validator=attr.validators.instance_of(str)) 122 123 # string containing exactly what we input by the user 124 raw: str = attr.ib(default='', validator=attr.validators.instance_of(str)) 125 126 # the command, i.e. the first whitespace delimited word 127 command: str = attr.ib(default='', validator=attr.validators.instance_of(str)) 128 129 # list of arguments to the command, not including any output redirection or terminators; quoted args remain quoted 130 arg_list: List[str] = attr.ib(default=attr.Factory(list), validator=attr.validators.instance_of(list)) 131 132 # if the command is a multiline command, the name of the command, otherwise empty 133 multiline_command: str = attr.ib(default='', validator=attr.validators.instance_of(str)) 134 135 # the character which terminated the multiline command, if there was one 136 terminator: str = attr.ib(default='', validator=attr.validators.instance_of(str)) 137 138 # characters appearing after the terminator but before output redirection, if any 139 suffix: str = attr.ib(default='', validator=attr.validators.instance_of(str)) 140 141 # if output was piped to a shell command, the shell command as a string 142 pipe_to: str = attr.ib(default='', validator=attr.validators.instance_of(str)) 143 144 # if output was redirected, the redirection token, i.e. '>>' 145 output: str = attr.ib(default='', validator=attr.validators.instance_of(str)) 146 147 # if output was redirected, the destination file token (quotes preserved) 148 output_to: str = attr.ib(default='', validator=attr.validators.instance_of(str)) 149 150 # Used in JSON dictionaries 151 _args_field = 'args' 152 153 def __new__(cls, value: object, *pos_args: Any, **kw_args: Any) -> 'Statement': 154 """Create a new instance of Statement. 155 156 We must override __new__ because we are subclassing `str` which is 157 immutable and takes a different number of arguments as Statement. 158 159 NOTE: attrs takes care of initializing other members in the __init__ it 160 generates. 161 """ 162 stmt = super().__new__(cls, value) 163 return stmt 164 165 @property 166 def command_and_args(self) -> str: 167 """Combine command and args with a space separating them. 168 169 Quoted arguments remain quoted. Output redirection and piping are 170 excluded, as are any command terminators. 171 """ 172 if self.command and self.args: 173 rtn = f'{self.command} {self.args}' 174 elif self.command: 175 # there were no arguments to the command 176 rtn = self.command 177 else: 178 rtn = '' 179 return rtn 180 181 @property 182 def post_command(self) -> str: 183 """A string containing any ending terminator, suffix, and redirection chars""" 184 rtn = '' 185 if self.terminator: 186 rtn += self.terminator 187 188 if self.suffix: 189 rtn += ' ' + self.suffix 190 191 if self.pipe_to: 192 rtn += ' | ' + self.pipe_to 193 194 if self.output: 195 rtn += ' ' + self.output 196 if self.output_to: 197 rtn += ' ' + self.output_to 198 199 return rtn 200 201 @property 202 def expanded_command_line(self) -> str: 203 """Concatenate :meth:`~cmd2.Statement.command_and_args` 204 and :meth:`~cmd2.Statement.post_command`""" 205 return self.command_and_args + self.post_command 206 207 @property 208 def argv(self) -> List[str]: 209 """a list of arguments a-la ``sys.argv``. 210 211 The first element of the list is the command after shortcut and macro 212 expansion. Subsequent elements of the list contain any additional 213 arguments, with quotes removed, just like bash would. This is very 214 useful if you are going to use ``argparse.parse_args()``. 215 216 If you want to strip quotes from the input, you can use ``argv[1:]``. 217 """ 218 if self.command: 219 rtn = [utils.strip_quotes(self.command)] 220 for cur_token in self.arg_list: 221 rtn.append(utils.strip_quotes(cur_token)) 222 else: 223 rtn = [] 224 225 return rtn 226 227 def to_dict(self) -> Dict[str, Any]: 228 """Utility method to convert this Statement into a dictionary for use in persistent JSON history files""" 229 return self.__dict__.copy() 230 231 @staticmethod 232 def from_dict(source_dict: Dict[str, Any]) -> 'Statement': 233 """ 234 Utility method to restore a Statement from a dictionary 235 236 :param source_dict: source data dictionary (generated using to_dict()) 237 :return: Statement object 238 :raises KeyError: if source_dict is missing required elements 239 """ 240 # value needs to be passed as a positional argument. It corresponds to the args field. 241 try: 242 value = source_dict[Statement._args_field] 243 except KeyError as ex: 244 raise KeyError(f"Statement dictionary is missing {ex} field") 245 246 # Pass the rest at kwargs (minus args) 247 kwargs = source_dict.copy() 248 del kwargs[Statement._args_field] 249 250 return Statement(value, **kwargs) 251 252 253class StatementParser: 254 """Parse user input as a string into discrete command components.""" 255 256 def __init__( 257 self, 258 terminators: Optional[Iterable[str]] = None, 259 multiline_commands: Optional[Iterable[str]] = None, 260 aliases: Optional[Dict[str, str]] = None, 261 shortcuts: Optional[Dict[str, str]] = None, 262 ) -> None: 263 """Initialize an instance of StatementParser. 264 265 The following will get converted to an immutable tuple before storing internally: 266 terminators, multiline commands, and shortcuts. 267 268 :param terminators: iterable containing strings which should terminate commands 269 :param multiline_commands: iterable containing the names of commands that accept multiline input 270 :param aliases: dictionary containing aliases 271 :param shortcuts: dictionary containing shortcuts 272 """ 273 self.terminators: Tuple[str, ...] 274 if terminators is None: 275 self.terminators = (constants.MULTILINE_TERMINATOR,) 276 else: 277 self.terminators = tuple(terminators) 278 self.multiline_commands: Tuple[str, ...] = tuple(multiline_commands) if multiline_commands is not None else () 279 self.aliases: Dict[str, str] = aliases if aliases is not None else {} 280 281 if shortcuts is None: 282 shortcuts = constants.DEFAULT_SHORTCUTS 283 284 # Sort the shortcuts in descending order by name length because the longest match 285 # should take precedence. (e.g., @@file should match '@@' and not '@'. 286 self.shortcuts = tuple(sorted(shortcuts.items(), key=lambda x: len(x[0]), reverse=True)) 287 288 # commands have to be a word, so make a regular expression 289 # that matches the first word in the line. This regex has three 290 # parts: 291 # - the '\A\s*' matches the beginning of the string (even 292 # if contains multiple lines) and gobbles up any leading 293 # whitespace 294 # - the first parenthesis enclosed group matches one 295 # or more non-whitespace characters with a non-greedy match 296 # (that's what the '+?' part does). The non-greedy match 297 # ensures that this first group doesn't include anything 298 # matched by the second group 299 # - the second parenthesis group must be dynamically created 300 # because it needs to match either whitespace, something in 301 # REDIRECTION_CHARS, one of the terminators, or the end of 302 # the string (\Z matches the end of the string even if it 303 # contains multiple lines) 304 # 305 invalid_command_chars = [] 306 invalid_command_chars.extend(constants.QUOTES) 307 invalid_command_chars.extend(constants.REDIRECTION_CHARS) 308 invalid_command_chars.extend(self.terminators) 309 # escape each item so it will for sure get treated as a literal 310 second_group_items = [re.escape(x) for x in invalid_command_chars] 311 # add the whitespace and end of string, not escaped because they 312 # are not literals 313 second_group_items.extend([r'\s', r'\Z']) 314 # join them up with a pipe 315 second_group = '|'.join(second_group_items) 316 # build the regular expression 317 expr = rf'\A\s*(\S*?)({second_group})' 318 self._command_pattern = re.compile(expr) 319 320 def is_valid_command(self, word: str, *, is_subcommand: bool = False) -> Tuple[bool, str]: 321 """Determine whether a word is a valid name for a command. 322 323 Commands cannot include redirection characters, whitespace, 324 or termination characters. They also cannot start with a 325 shortcut. 326 327 :param word: the word to check as a command 328 :param is_subcommand: Flag whether this command name is a subcommand name 329 :return: a tuple of a boolean and an error string 330 331 If word is not a valid command, return ``False`` and an error string 332 suitable for inclusion in an error message of your choice:: 333 334 checkit = '>' 335 valid, errmsg = statement_parser.is_valid_command(checkit) 336 if not valid: 337 errmsg = f"alias: {errmsg}" 338 """ 339 valid = False 340 341 if not isinstance(word, str): 342 return False, f'must be a string. Received {str(type(word))} instead' # type: ignore[unreachable] 343 344 if not word: 345 return False, 'cannot be an empty string' 346 347 if word.startswith(constants.COMMENT_CHAR): 348 return False, 'cannot start with the comment character' 349 350 if not is_subcommand: 351 for (shortcut, _) in self.shortcuts: 352 if word.startswith(shortcut): 353 # Build an error string with all shortcuts listed 354 errmsg = 'cannot start with a shortcut: ' 355 errmsg += ', '.join(shortcut for (shortcut, _) in self.shortcuts) 356 return False, errmsg 357 358 errmsg = 'cannot contain: whitespace, quotes, ' 359 errchars = [] 360 errchars.extend(constants.REDIRECTION_CHARS) 361 errchars.extend(self.terminators) 362 errmsg += ', '.join([shlex.quote(x) for x in errchars]) 363 364 match = self._command_pattern.search(word) 365 if match: 366 if word == match.group(1): 367 valid = True 368 errmsg = '' 369 return valid, errmsg 370 371 def tokenize(self, line: str) -> List[str]: 372 """ 373 Lex a string into a list of tokens. Shortcuts and aliases are expanded and 374 comments are removed. 375 376 :param line: the command line being lexed 377 :return: A list of tokens 378 :raises: Cmd2ShlexError if a shlex error occurs (e.g. No closing quotation) 379 """ 380 381 # expand shortcuts and aliases 382 line = self._expand(line) 383 384 # check if this line is a comment 385 if line.lstrip().startswith(constants.COMMENT_CHAR): 386 return [] 387 388 # split on whitespace 389 try: 390 tokens = shlex_split(line) 391 except ValueError as ex: 392 raise Cmd2ShlexError(ex) 393 394 # custom lexing 395 tokens = self.split_on_punctuation(tokens) 396 return tokens 397 398 def parse(self, line: str) -> Statement: 399 """ 400 Tokenize the input and parse it into a :class:`~cmd2.Statement` object, 401 stripping comments, expanding aliases and shortcuts, and extracting output 402 redirection directives. 403 404 :param line: the command line being parsed 405 :return: a new :class:`~cmd2.Statement` object 406 :raises: Cmd2ShlexError if a shlex error occurs (e.g. No closing quotation) 407 """ 408 409 # handle the special case/hardcoded terminator of a blank line 410 # we have to do this before we tokenize because tokenizing 411 # destroys all unquoted whitespace in the input 412 terminator = '' 413 if line[-1:] == constants.LINE_FEED: 414 terminator = constants.LINE_FEED 415 416 command = '' 417 args = '' 418 arg_list = [] 419 420 # lex the input into a list of tokens 421 tokens = self.tokenize(line) 422 423 # of the valid terminators, find the first one to occur in the input 424 terminator_pos = len(tokens) + 1 425 for pos, cur_token in enumerate(tokens): 426 for test_terminator in self.terminators: 427 if cur_token.startswith(test_terminator): 428 terminator_pos = pos 429 terminator = test_terminator 430 # break the inner loop, and we want to break the 431 # outer loop too 432 break 433 else: 434 # this else clause is only run if the inner loop 435 # didn't execute a break. If it didn't, then 436 # continue to the next iteration of the outer loop 437 continue 438 # inner loop was broken, break the outer 439 break 440 441 if terminator: 442 if terminator == constants.LINE_FEED: 443 terminator_pos = len(tokens) + 1 444 445 # everything before the first terminator is the command and the args 446 (command, args) = self._command_and_args(tokens[:terminator_pos]) 447 arg_list = tokens[1:terminator_pos] 448 # we will set the suffix later 449 # remove all the tokens before and including the terminator 450 tokens = tokens[terminator_pos + 1 :] 451 else: 452 (testcommand, testargs) = self._command_and_args(tokens) 453 if testcommand in self.multiline_commands: 454 # no terminator on this line but we have a multiline command 455 # everything else on the line is part of the args 456 # because redirectors can only be after a terminator 457 command = testcommand 458 args = testargs 459 arg_list = tokens[1:] 460 tokens = [] 461 462 pipe_to = '' 463 output = '' 464 output_to = '' 465 466 # Find which redirector character appears first in the command 467 try: 468 pipe_index = tokens.index(constants.REDIRECTION_PIPE) 469 except ValueError: 470 pipe_index = len(tokens) 471 472 try: 473 redir_index = tokens.index(constants.REDIRECTION_OUTPUT) 474 except ValueError: 475 redir_index = len(tokens) 476 477 try: 478 append_index = tokens.index(constants.REDIRECTION_APPEND) 479 except ValueError: 480 append_index = len(tokens) 481 482 # Check if output should be piped to a shell command 483 if pipe_index < redir_index and pipe_index < append_index: 484 485 # Get the tokens for the pipe command and expand ~ where needed 486 pipe_to_tokens = tokens[pipe_index + 1 :] 487 utils.expand_user_in_tokens(pipe_to_tokens) 488 489 # Build the pipe command line string 490 pipe_to = ' '.join(pipe_to_tokens) 491 492 # remove all the tokens after the pipe 493 tokens = tokens[:pipe_index] 494 495 # Check for output redirect/append 496 elif redir_index != append_index: 497 if redir_index < append_index: 498 output = constants.REDIRECTION_OUTPUT 499 output_index = redir_index 500 else: 501 output = constants.REDIRECTION_APPEND 502 output_index = append_index 503 504 # Check if we are redirecting to a file 505 if len(tokens) > output_index + 1: 506 unquoted_path = utils.strip_quotes(tokens[output_index + 1]) 507 if unquoted_path: 508 output_to = utils.expand_user(tokens[output_index + 1]) 509 510 # remove all the tokens after the output redirect 511 tokens = tokens[:output_index] 512 513 if terminator: 514 # whatever is left is the suffix 515 suffix = ' '.join(tokens) 516 else: 517 # no terminator, so whatever is left is the command and the args 518 suffix = '' 519 if not command: 520 # command could already have been set, if so, don't set it again 521 (command, args) = self._command_and_args(tokens) 522 arg_list = tokens[1:] 523 524 # set multiline 525 if command in self.multiline_commands: 526 multiline_command = command 527 else: 528 multiline_command = '' 529 530 # build the statement 531 statement = Statement( 532 args, 533 raw=line, 534 command=command, 535 arg_list=arg_list, 536 multiline_command=multiline_command, 537 terminator=terminator, 538 suffix=suffix, 539 pipe_to=pipe_to, 540 output=output, 541 output_to=output_to, 542 ) 543 return statement 544 545 def parse_command_only(self, rawinput: str) -> Statement: 546 """Partially parse input into a :class:`~cmd2.Statement` object. 547 548 The command is identified, and shortcuts and aliases are expanded. 549 Multiline commands are identified, but terminators and output 550 redirection are not parsed. 551 552 This method is used by tab completion code and therefore must not 553 generate an exception if there are unclosed quotes. 554 555 The :class:`~cmd2.Statement` object returned by this method can at most 556 contain values in the following attributes: 557 :attr:`~cmd2.Statement.args`, :attr:`~cmd2.Statement.raw`, 558 :attr:`~cmd2.Statement.command`, 559 :attr:`~cmd2.Statement.multiline_command` 560 561 :attr:`~cmd2.Statement.args` will include all output redirection 562 clauses and command terminators. 563 564 Different from :meth:`~cmd2.parsing.StatementParser.parse` this method 565 does not remove redundant whitespace within args. However, it does 566 ensure args has no leading or trailing whitespace. 567 568 :param rawinput: the command line as entered by the user 569 :return: a new :class:`~cmd2.Statement` object 570 """ 571 # expand shortcuts and aliases 572 line = self._expand(rawinput) 573 574 command = '' 575 args = '' 576 match = self._command_pattern.search(line) 577 if match: 578 # we got a match, extract the command 579 command = match.group(1) 580 581 # take everything from the end of the first match group to 582 # the end of the line as the arguments (stripping leading 583 # and trailing spaces) 584 args = line[match.end(1) :].strip() 585 # if the command is empty that means the input was either empty 586 # or something weird like '>'. args should be empty if we couldn't 587 # parse a command 588 if not command or not args: 589 args = '' 590 591 # set multiline 592 if command in self.multiline_commands: 593 multiline_command = command 594 else: 595 multiline_command = '' 596 597 # build the statement 598 statement = Statement(args, raw=rawinput, command=command, multiline_command=multiline_command) 599 return statement 600 601 def get_command_arg_list( 602 self, command_name: str, to_parse: Union[Statement, str], preserve_quotes: bool 603 ) -> Tuple[Statement, List[str]]: 604 """ 605 Convenience method used by the argument parsing decorators. 606 607 Retrieves just the arguments being passed to their ``do_*`` methods as a list. 608 609 :param command_name: name of the command being run 610 :param to_parse: what is being passed to the ``do_*`` method. It can be one of two types: 611 612 1. An already parsed :class:`~cmd2.Statement` 613 2. An argument string in cases where a ``do_*`` method is 614 explicitly called. Calling ``do_help('alias create')`` would 615 cause ``to_parse`` to be 'alias create'. 616 617 In this case, the string will be converted to a 618 :class:`~cmd2.Statement` and returned along with 619 the argument list. 620 621 :param preserve_quotes: if ``True``, then quotes will not be stripped from 622 the arguments 623 :return: A tuple containing the :class:`~cmd2.Statement` and a list of 624 strings representing the arguments 625 """ 626 # Check if to_parse needs to be converted to a Statement 627 if not isinstance(to_parse, Statement): 628 to_parse = self.parse(command_name + ' ' + to_parse) 629 630 if preserve_quotes: 631 return to_parse, to_parse.arg_list 632 else: 633 return to_parse, to_parse.argv[1:] 634 635 def _expand(self, line: str) -> str: 636 """Expand aliases and shortcuts""" 637 638 # Make a copy of aliases so we can keep track of what aliases have been resolved to avoid an infinite loop 639 remaining_aliases = list(self.aliases.keys()) 640 keep_expanding = bool(remaining_aliases) 641 642 while keep_expanding: 643 keep_expanding = False 644 645 # apply our regex to line 646 match = self._command_pattern.search(line) 647 if match: 648 # we got a match, extract the command 649 command = match.group(1) 650 651 # Check if this command matches an alias that wasn't already processed 652 if command in remaining_aliases: 653 # rebuild line with the expanded alias 654 line = self.aliases[command] + match.group(2) + line[match.end(2) :] 655 remaining_aliases.remove(command) 656 keep_expanding = bool(remaining_aliases) 657 658 # expand shortcuts 659 for (shortcut, expansion) in self.shortcuts: 660 if line.startswith(shortcut): 661 # If the next character after the shortcut isn't a space, then insert one 662 shortcut_len = len(shortcut) 663 if len(line) == shortcut_len or line[shortcut_len] != ' ': 664 expansion += ' ' 665 666 # Expand the shortcut 667 line = line.replace(shortcut, expansion, 1) 668 break 669 return line 670 671 @staticmethod 672 def _command_and_args(tokens: List[str]) -> Tuple[str, str]: 673 """Given a list of tokens, return a tuple of the command 674 and the args as a string. 675 """ 676 command = '' 677 args = '' 678 679 if tokens: 680 command = tokens[0] 681 682 if len(tokens) > 1: 683 args = ' '.join(tokens[1:]) 684 685 return command, args 686 687 def split_on_punctuation(self, tokens: List[str]) -> List[str]: 688 """Further splits tokens from a command line using punctuation characters. 689 690 Punctuation characters are treated as word breaks when they are in 691 unquoted strings. Each run of punctuation characters is treated as a 692 single token. 693 694 :param tokens: the tokens as parsed by shlex 695 :return: a new list of tokens, further split using punctuation 696 """ 697 punctuation: List[str] = [] 698 punctuation.extend(self.terminators) 699 punctuation.extend(constants.REDIRECTION_CHARS) 700 701 punctuated_tokens = [] 702 703 for cur_initial_token in tokens: 704 705 # Save tokens up to 1 character in length or quoted tokens. No need to parse these. 706 if len(cur_initial_token) <= 1 or cur_initial_token[0] in constants.QUOTES: 707 punctuated_tokens.append(cur_initial_token) 708 continue 709 710 # Iterate over each character in this token 711 cur_index = 0 712 cur_char = cur_initial_token[cur_index] 713 714 # Keep track of the token we are building 715 new_token = '' 716 717 while True: 718 if cur_char not in punctuation: 719 720 # Keep appending to new_token until we hit a punctuation char 721 while cur_char not in punctuation: 722 new_token += cur_char 723 cur_index += 1 724 if cur_index < len(cur_initial_token): 725 cur_char = cur_initial_token[cur_index] 726 else: 727 break 728 729 else: 730 cur_punc = cur_char 731 732 # Keep appending to new_token until we hit something other than cur_punc 733 while cur_char == cur_punc: 734 new_token += cur_char 735 cur_index += 1 736 if cur_index < len(cur_initial_token): 737 cur_char = cur_initial_token[cur_index] 738 else: 739 break 740 741 # Save the new token 742 punctuated_tokens.append(new_token) 743 new_token = '' 744 745 # Check if we've viewed all characters 746 if cur_index >= len(cur_initial_token): 747 break 748 749 return punctuated_tokens 750