1# -*- coding: iso-8859-1 -*- 2### -*- coding: utf-8 -*- 3 4# Authors: Eric S. Raymond, 21 Dec 1998 5# Andrew Jewett (jewett.aij at g mail) 6# LICENSE: The PSF license: 7# https://docs.python.org/3/license.html 8# The PSF license is compatible with the GPL license. It is not a copyleft 9# license. It is apparently similar to the BSD and MIT licenses. 10# 11# Contributions: 12# Module and documentation by Eric S. Raymond, 21 Dec 1998 13# Input stacking and error message cleanup added by ESR, March 2000 14# push_source() and pop_source() made explicit by ESR, January 2001. 15# Posix compliance, split(), string arguments, and 16# iterator interface by Gustavo Niemeyer, April 2003. 17# Unicode support hack ("wordterminators") and numerous other hideous 18# ttree-specific hacks added by Andrew Jewett September 2011. 19 20 21"""A lexical analyzer class for simple shell-like syntaxes. 22 This version has been modified slightly to work better with unicode. 23 It was forked from the version of shlex that ships with python 3.2.2. 24 A few minor features and functions have been added. -Andrew Jewett 2011 """ 25 26 27import os.path 28import sys 29from collections import deque 30import re 31import fnmatch 32import string 33#import gc 34 35 36try: 37 from cStringIO import StringIO 38except ImportError: 39 try: 40 from StringIO import StringIO 41 except ImportError: 42 from io import StringIO 43 44__all__ = ["TtreeShlex", 45 "split", 46 "LineLex", 47 "SplitQuotedString", 48 "ExtractVarName", 49 "GetVarName", 50 "EscCharStrToChar", 51 "SafelyEncodeString", 52 "RemoveOuterQuotes", 53 "MaxLenStr", 54 "VarNameToRegex", 55 "HasRE", 56 "HasWildcard", 57 "MatchesPattern", 58 "InputError", 59 "ErrorLeader", 60 "SrcLoc", 61 "OSrcLoc", 62 "TextBlock", 63 "VarRef", 64 "VarNPtr", 65 "VarBinding", 66 "SplitTemplate", 67 "SplitTemplateMulti", 68 "TableFromTemplate", 69 "ExtractCatName", 70 #"_TableFromTemplate", 71 #"_DeleteLineFromTemplate", 72 "DeleteLinesWithBadVars", 73 "TemplateLexer"] 74 75 76class TtreeShlex(object): 77 """ A lexical analyzer class for simple shell-like syntaxes. 78 TtreeShlex is a backwards-compatible version of python's standard shlex 79 module. It has the additional member: "self.wordterminators", which 80 overrides the "self.wordchars" member. This enables better handling of 81 unicode characters by allowing a much larger variety of characters to 82 appear in words or tokens parsed by TtreeShlex. 83 84 """ 85 86 def __init__(self, 87 instream=None, 88 infile=None, 89 posix=False): 90 if isinstance(instream, str): 91 instream = StringIO(instream) 92 if instream is not None: 93 self.instream = instream 94 self.infile = infile 95 else: 96 self.instream = sys.stdin 97 self.infile = None 98 self.posix = posix 99 if posix: 100 self.eof = None 101 else: 102 self.eof = '' 103 self.commenters = '#' 104 self.wordchars = ('abcdfeghijklmnopqrstuvwxyz' 105 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_') 106 #if self.posix: 107 # self.wordchars += ('��������������������������������' 108 # '������������������������������') 109 110 if self.posix: 111 self.wordchars += ('��������������������������������' 112 '������������������������������') 113 114 self.wordterminators = set([]) 115 self.prev_space_terminator = '' 116 self.whitespace = ' \t\r\f\n' 117 self.whitespace_split = False 118 self.quotes = '\'"' 119 self.escape = '\\' 120 self.escapedquotes = '"' 121 self.operators = '=' #binary numeric operators like +-*/ might be added 122 self.state = ' ' 123 self.pushback = deque() 124 self.lineno = 1 125 self.debug = 0 126 self.token = '' 127 self.filestack = deque() 128 # self.source_triggers 129 # are tokens which allow the seamless insertion of other 130 # files into the file being read. 131 self.source_triggers = set(['source']) 132 self.source_triggers_x = set([]) 133 # self.source_triggers_x is a subset of self.source_triggers. 134 # In this case file inclusion is exclusive. 135 # In other words, the file is only included 136 # if it has not been included already. It does this 137 # by checking if one of these tokens has been encountered. 138 self.source_files_restricted = set([]) 139 self.include_path = [] 140 if 'TTREE_PATH' in os.environ: 141 include_path_list = os.environ['TTREE_PATH'].split(':') 142 self.include_path += [d for d in include_path_list if len(d) > 0] 143 if self.debug: 144 sys.stderr.write('TtreeShlex: reading from %s, line %d' 145 % (self.instream, self.lineno)) 146 self.end_encountered = False 147 148 @staticmethod 149 def _belongs_to(char, include_chars, exclude_chars): 150 if ((not exclude_chars) or (len(exclude_chars)==0)): 151 return char in include_chars 152 else: 153 return char not in exclude_chars 154 155 def push_raw_text(self, text): 156 """Push a block of text onto the stack popped by the ReadLine() method. 157 (If multiple lines are present in the text, (which is determined by 158 self.line_terminators), then the text is split into multiple lines 159 and each one of them is pushed onto this stack individually. 160 The "self.lineno" counter is also adjusted, depending on the number 161 of newline characters in "line". 162 Do not strip off the newline, or other line terminators 163 at the end of the text block before using push_raw_text()! 164 165 """ 166 if self.debug >= 1: 167 sys.stderr.write("TtreeShlex: pushing token " + repr(text)) 168 for c in reversed(text): 169 self.pushback.appendleft(c) 170 if c == '\n': 171 self.lineno -= 1 172 if len(text) > 0: 173 self.end_encountered = False 174 175 def push_token(self, text): 176 "Push a token onto the stack popped by the get_token method" 177 self.push_raw_text(text + self.prev_space_terminator) 178 179 def push_source(self, newstream, newfile=None): 180 "Push an input source onto the lexer's input source stack." 181 if isinstance(newstream, str): 182 newstream = StringIO(newstream) 183 self.filestack.appendleft((self.infile, self.instream, self.lineno)) 184 self.infile = newfile 185 self.instream = newstream 186 self.lineno = 1 187 if self.debug: 188 if newfile is not None: 189 sys.stderr.write('TtreeShlex: pushing to file %s' % (self.infile,)) 190 else: 191 sys.stderr.write('TtreeShlex: pushing to stream %s' % (self.instream,)) 192 193 def pop_source(self): 194 "Pop the input source stack." 195 self.instream.close() 196 (self.infile, self.instream, self.lineno) = self.filestack.popleft() 197 if self.debug: 198 sys.stderr.write('TtreeShlex: popping to %s, line %d' 199 % (self.instream, self.lineno)) 200 self.state = ' ' 201 202 def get_token(self): 203 "Get a token from the input stream (or from stack if it's nonempty)" 204 #### #CHANGING: self.pushback is now a stack of characters, not tokens 205 #### if self.pushback: 206 #### tok = self.pushback.popleft() 207 #### if self.debug >= 1: 208 #### sys.stderr.write("TtreeShlex: popping token " + repr(tok)) 209 #### return tok 210 #### No pushback. Get a token. 211 raw = self.read_token() 212 # Handle inclusions 213 if self.source_triggers is not None: 214 while raw in self.source_triggers: 215 fname = self.read_token() 216 spec = self.sourcehook(fname) 217 if spec: 218 (newfile, newstream) = spec 219 if ((raw not in self.source_triggers_x) or 220 (newfile not in self.source_files_restricted)): 221 self.push_source(newstream, newfile) 222 if raw in self.source_triggers_x: 223 self.source_files_restricted.add(newfile) 224 else: 225 if self.debug >= 1: 226 sys.stderr.write( 227 '\ndebug warning: duplicate attempt to import file:\n \"' + newfile + '\"\n') 228 raw = self.get_token() 229 230 # Maybe we got EOF instead? 231 while raw == self.eof: 232 if not self.filestack: 233 return self.eof 234 else: 235 self.pop_source() 236 raw = self.get_token() 237 # Neither inclusion nor EOF 238 if self.debug >= 1: 239 if raw != self.eof: 240 sys.stderr.write("TtreeShlex: token=" + repr(raw)) 241 else: 242 sys.stderr.write("TtreeShlex: token=EOF") 243 244 if raw == self.eof: 245 self.end_encountered = True 246 247 return raw 248 249 def read_char(self): 250 if self.pushback: 251 nextchar = self.pushback.popleft() 252 assert((type(nextchar) is str) and (len(nextchar)==1)) 253 else: 254 nextchar = self.instream.read(1) 255 return nextchar 256 257 def read_token(self): 258 self.prev_space_terminator = '' 259 quoted = False 260 escapedstate = ' ' 261 while True: 262 #### self.pushback is now a stack of characters, not tokens 263 nextchar = self.read_char() 264 if nextchar == '\n': 265 self.lineno = self.lineno + 1 266 if self.debug >= 3: 267 sys.stderr.write("TtreeShlex: in state", repr(self.state), 268 "I see character:", repr(nextchar)) 269 if self.state is None: 270 self.token = '' # past end of file 271 break 272 elif self.state == ' ': 273 if not nextchar: 274 self.state = None # end of file 275 break 276 elif nextchar in self.whitespace: 277 if self.debug >= 2: 278 sys.stderr.write("TtreeShlex: I see whitespace in whitespace state") 279 if self.token or (self.posix and quoted): 280 # Keep track of which whitespace 281 # character terminated the token. 282 self.prev_space_terminator = nextchar 283 break # emit current token 284 else: 285 continue 286 elif nextchar in self.commenters: 287 self.instream.readline() 288 self.lineno = self.lineno + 1 289 elif self.posix and nextchar in self.escape: 290 escapedstate = 'a' 291 self.state = nextchar 292 elif TtreeShlex._belongs_to(nextchar, 293 self.wordchars, 294 self.wordterminators): 295 self.token = nextchar 296 self.state = 'a' 297 elif nextchar in self.quotes: 298 if not self.posix: 299 self.token = nextchar 300 self.state = nextchar 301 elif self.whitespace_split: 302 self.token = nextchar 303 self.state = 'a' 304 else: 305 self.token = nextchar 306 if self.token or (self.posix and quoted): 307 break # emit current token 308 else: 309 continue 310 elif self.state in self.quotes: 311 quoted = True 312 if not nextchar: # end of file 313 if self.debug >= 2: 314 sys.stderr.write("TtreeShlex: I see EOF in quotes state") 315 # XXX what error should be raised here? 316 raise ValueError("Error at or before " + self.error_leader() + "\n" 317 " No closing quotation.") 318 if nextchar == self.state: 319 if not self.posix: 320 self.token = self.token + nextchar 321 self.state = ' ' 322 break 323 else: 324 self.state = 'a' 325 elif self.posix and nextchar in self.escape and \ 326 self.state in self.escapedquotes: 327 escapedstate = self.state 328 self.state = nextchar 329 else: 330 self.token = self.token + nextchar 331 elif self.state in self.escape: 332 if not nextchar: # end of file 333 if self.debug >= 2: 334 sys.stderr.write("TtreeShlex: I see EOF in escape state") 335 # What error should be raised here? 336 raise InputError('File terminated immediately following an escape character.') 337 # In posix shells, only the quote itself or the escape 338 # character may be escaped within quotes. 339 if escapedstate in self.quotes and \ 340 nextchar != self.state and nextchar != escapedstate: 341 self.token = self.token + self.state 342 self.token = self.token + nextchar 343 self.state = escapedstate 344 elif self.state == 'a': 345 if not nextchar: 346 self.state = None # end of file 347 break 348 elif nextchar in self.whitespace: 349 if self.debug >= 2: 350 sys.stderr.write("TtreeShlex: I see whitespace in word state") 351 self.state = ' ' 352 if self.token or (self.posix and quoted): 353 # Keep track of which whitespace 354 # character terminated the token. 355 self.prev_space_terminator = nextchar 356 break # emit current token 357 else: 358 continue 359 elif nextchar in self.commenters: 360 comment_contents = self.instream.readline() 361 self.lineno = self.lineno + 1 362 if self.posix: 363 self.state = ' ' 364 if self.token or (self.posix and quoted): 365 # Keep track of which character(s) terminated 366 # the token (including whitespace and comments). 367 self.prev_space_terminator = nextchar + comment_contents 368 break # emit current token 369 else: 370 continue 371 elif self.posix and nextchar in self.quotes: 372 self.state = nextchar 373 elif self.posix and nextchar in self.escape: 374 escapedstate = 'a' 375 self.state = nextchar 376 elif (TtreeShlex._belongs_to(nextchar, 377 self.wordchars, 378 self.wordterminators) 379 or (nextchar in self.quotes) 380 or (self.whitespace_split)): 381 self.token = self.token + nextchar 382 else: 383 self.pushback.appendleft(nextchar) 384 if self.debug >= 2: 385 sys.stderr.write("TtreeShlex: I see punctuation in word state") 386 self.state = ' ' 387 if self.token: 388 break # emit current token 389 else: 390 continue 391 result = self.token 392 self.token = '' 393 if self.posix and not quoted and result == '': 394 result = None 395 if self.debug > 1: 396 if result: 397 sys.stderr.write("TtreeShlex: raw token=" + repr(result)) 398 else: 399 sys.stderr.write("TtreeShlex: raw token=EOF") 400 return result 401 402 def sourcehook(self, newfile): 403 "Hook called on a filename to be sourced." 404 newfile = RemoveOuterQuotes(newfile) 405 # This implements cpp-like semantics for relative-path inclusion. 406 newfile_full = newfile 407 if isinstance(self.infile, str) and not os.path.isabs(newfile): 408 newfile_full = os.path.join(os.path.dirname(self.infile), newfile) 409 try: 410 f = open(newfile_full, "r") 411 except IOError: 412 # If not found, 413 err = True 414 # ...then check to see if the file is in one of the 415 # directories in the self.include_path list. 416 for d in self.include_path: 417 newfile_full = os.path.join(d, newfile) 418 try: 419 f = open(newfile_full, "r") 420 err = False 421 break 422 except IOError: 423 err = True 424 if err: 425 raise InputError('Error at ' + self.error_leader() + '\n' 426 ' unable to open file \"' + newfile + '\"\n' 427 ' for reading.\n') 428 return (newfile, f) 429 430 def error_leader(self, infile=None, lineno=None): 431 "Emit a C-compiler-like, Emacs-friendly error-message leader." 432 if infile is None: 433 infile = self.infile 434 if lineno is None: 435 lineno = self.lineno 436 return "\"%s\", line %d: " % (infile, lineno) 437 438 def __iter__(self): 439 return self 440 441 def __next__(self): 442 token = self.get_token() 443 if token == self.eof: 444 raise StopIteration 445 return token 446 447 def __bool__(self): 448 return not self.end_encountered 449 450 # For compatibility with python 2.x, I must also define: 451 def __nonzero__(self): 452 return self.__bool__() 453 454 455# The split() function was originally from shlex 456# It is included for backwards compatibility. 457def split(s, comments=False, posix=True): 458 lex = TtreeShlex(s, posix=posix) 459 lex.whitespace_split = True 460 if not comments: 461 lex.commenters = '' 462 return list(lex) 463 464 465##################### NEW ADDITIONS (may be removed later) ################# 466 467#""" 468# -- linelex.py -- 469# linelex.py defines the LineLex class, which inherits from, and further 470# augments the capabilities of TtreeShlex by making it easier to parse 471# individual lines one at a time. (The original shlex's "source" inclusion 472# ability still works when reading entire lines, and lines are still counted.) 473# 474#""" 475 476#import sys 477 478 479class InputError(Exception): 480 """ A generic exception object containing a string for error reporting. 481 (Raising this exception implies that the caller has provided 482 a faulty input file or argument.) 483 484 """ 485 486 def __init__(self, err_msg): 487 self.err_msg = err_msg 488 489 def __str__(self): 490 return self.err_msg 491 492 def __repr__(self): 493 return str(self) 494 495 496def ErrorLeader(infile, lineno): 497 return '\"' + infile + '\", line ' + str(lineno) 498 499 500class SrcLoc(object): 501 """ SrcLoc is essentially nothing more than a 2-tuple containing the name 502 of a file (str) and a particular line number inside that file (an integer). 503 504 """ 505 __slots__ = ["infile", "lineno"] 506 507 def __init__(self, infile='', lineno=-1): 508 self.infile = infile 509 self.lineno = lineno 510 511 512def SplitQuotedString(string, 513 quotes='\'\"', 514 delimiters=' \t\r\f\n', 515 escape='\\', 516 comment_char='#', 517 endquote=None): 518 tokens = [] 519 token = '' 520 reading_token = True 521 escaped_state = False 522 quote_state = None 523 for c in string: 524 525 if (c in comment_char) and (not escaped_state) and (quote_state == None): 526 tokens.append(token) 527 return tokens 528 529 elif (c in delimiters) and (not escaped_state) and (quote_state == None): 530 if reading_token: 531 tokens.append(token) 532 token = '' 533 reading_token = False 534 535 elif c in escape: 536 if escaped_state: 537 token += c 538 reading_token = True 539 escaped_state = False 540 else: 541 escaped_state = True 542 # and leave c (the '\' character) out of token 543 elif (c == quote_state) and (not escaped_state) and (quote_state != None): 544 quote_state = None 545 if include_endquote: 546 token += c 547 elif (c in quotes) and (not escaped_state): 548 if quote_state == None: 549 if endquote != None: 550 quote_state = endquote 551 else: 552 quote_state = c 553 # Now deal with strings like 554 # a "b" "c d" efg"h i j" 555 # Assuming quotes='"', then we want this to be split into: 556 # ['a', 'b', 'c d', 'efg"h i j"'] 557 # ...in other words, include the end quote if the token did 558 # not begin with a quote 559 include_endquote = False 560 if token != '': 561 # if this is not the first character in the token 562 include_endquote = True 563 token += c 564 reading_token = True 565 else: 566 if (c == 'n') and (escaped_state == True): 567 c = '\n' 568 elif (c == 't') and (escaped_state == True): 569 c = '\t' 570 elif (c == 'r') and (escaped_state == True): 571 c = '\r' 572 elif (c == 'f') and (escaped_state == True): 573 c = '\f' 574 token += c 575 reading_token = True 576 escaped_state = False 577 578 if len(string) > 0: 579 tokens.append(token) 580 return tokens 581 582 583 584 585def GetVarName(lex): 586 """ Read a string like 'atom:A ' or '{/atom:A B/C/../D }ABC ' 587 and return ('','atom:A',' ') or ('{','/atom:A B/C/../D ','}ABC') 588 These are 3-tuples containing the portion of the text containing 589 only the variable's name (assumed to be within the text), 590 ...in addition to the text on either side of the variable name. 591 """ 592 escape = '\'' 593 lparen = '{' 594 rparen = '}' 595 if hasattr(lex, 'escape'): 596 escape = lex.escape 597 if hasattr(lex, 'var_open_paren'): 598 lparen = lex.var_open_paren 599 if hasattr(lex, 'var_close_paren'): 600 rparen = lex.var_close_paren 601 602 nextchar = lex.read_char() 603 # Skip past the left-hand side paren '{' 604 paren_depth = 0 605 escaped = False 606 if nextchar == lparen: 607 paren_depth = 1 608 elif nextchar in lex.escape: 609 escaped = True 610 elif (hasattr(lex, 'wordterminators') and 611 (nextchar in lex.wordterminators)): 612 lex.push_raw_text(nextchar) 613 return '' 614 else: 615 lex.push_raw_text(nextchar) 616 # Now read the variable name: 617 var_name_l = [] 618 while lex: 619 nextchar=lex.read_char() 620 if nextchar == '': 621 break 622 elif nextchar == '\n': 623 lex.lineno += 1 624 if paren_depth > 0: 625 var_name_l.append(nextchar) 626 else: 627 lex.push_raw_text(nextchar) 628 break 629 elif escaped: 630 var_name_l.append(nextchar) 631 escaped = False 632 elif nextchar in lex.escape: 633 escaped = True 634 elif nextchar == lparen: 635 paren_depth += 1 636 if (hasattr(lex, 'wordterminators') and 637 (nextchar in lex.wordterminators)): 638 lex.push_raw_text(nextchar) 639 break 640 else: 641 var_name_l.append(nextchar) 642 elif nextchar == rparen: 643 paren_depth -= 1 644 if paren_depth == 0: 645 break 646 elif (hasattr(lex, 'wordterminators') and 647 (nextchar in lex.wordterminators)): 648 lex.push_raw_text(nextchar) 649 break 650 else: 651 var_name_l.append(nextchar) 652 elif paren_depth > 0: 653 var_name_l.append(nextchar) 654 escaped = False 655 elif nextchar in lex.whitespace: 656 lex.push_raw_text(nextchar) 657 break 658 elif (hasattr(lex, 'wordterminators') and 659 (nextchar in lex.wordterminators) and 660 (paren_depth == 0)): 661 lex.push_raw_text(nextchar) 662 break 663 elif nextchar in lex.commenters: 664 lex.instream.readline() 665 lex.lineno += 1 666 break 667 else: 668 var_name_l.append(nextchar) 669 escaped = False 670 var_name = ''.join(var_name_l) 671 return var_name 672 673 674 675def ExtractVarName(text, 676 commenters = '#', 677 whitespace = ' \t\r\f\n'): 678 """ Read a string like 'atom:A ' or '{/atom:A B/C/../D }ABC ' 679 and return ('','atom:A',' ') or ('{','/atom:A B/C/../D ','}ABC') 680 These are 3-tuples containing the portion of the text containing 681 only the variable's name (assumed to be within the text), 682 ...in addition to the text on either side of the variable name. 683 """ 684 ibegin = 0 685 left_paren = '' 686 if text[0] == '{': 687 ibegin = 1 688 left_paren = text[0] #(GetVarName() strips the leading '{' character) 689 # The best way to insure consistency with other code is to use 690 # lex.GetVarName() to figure out where the variable name ends. 691 lex = TtreeShlex(StringIO(text)) 692 var_name = GetVarName(lex) 693 # Any text following the end of the variable name should be returned as well 694 text_after_list = [] 695 if left_paren: 696 text_after_list.append('}') #(GetVarName() strips the trailing '}' char) 697 while lex: 698 c = lex.read_char() 699 if c == '': 700 break 701 text_after_list.append(c) 702 text_after = ''.join(text_after_list) 703 return (left_paren, var_name, text_after) 704 705 706def EscCharStrToChar(s_in, escape='\\'): 707 """ 708 EscCharStrToChar() replaces any escape sequences 709 in a string with their 1-character equivalents. 710 711 """ 712 assert(len(escape) > 0) 713 out_lstr = [] 714 escaped_state = False 715 for c in s_in: 716 if escaped_state: 717 if (c == 'n'): 718 out_lstr.append('\n') 719 elif (c == 't'): 720 out_lstr.append('\t') 721 elif (c == 'r'): 722 out_lstr.append('\r') 723 elif (c == 'f'): 724 out_lstr.append('\f') 725 elif (c == '\''): 726 out_lstr.append('\'') 727 elif (c == '\"'): 728 out_lstr.append('\"') 729 elif c in escape: 730 out_lstr.append(c) 731 else: 732 out_lstr.append(escape + c) # <- keep both characters 733 escaped_state = False 734 else: 735 if c in escape: 736 escaped_state = True 737 else: 738 out_lstr.append(c) 739 740 return ''.join(out_lstr) 741 742 743def SafelyEncodeString(in_str, 744 quotes='\'\"', 745 delimiters=' \t\r\f\n', 746 escape='\\', 747 comment_char='#'): 748 """ 749 SafelyEncodeString(in_str) scans through the input string (in_str), 750 and returns a new string in which probletic characters 751 (like newlines, tabs, quotes, etc), are replaced by their two-character 752 backslashed equivalents (like '\n', '\t', '\'', '\"', etc). 753 The escape character is the backslash by default, but it too can be 754 overridden to create custom escape sequences 755 (but this does not effect the encoding for characters like '\n', '\t'). 756 757 """ 758 assert(len(escape) > 0) 759 out_lstr = [] 760 use_outer_quotes = False 761 for c in in_str: 762 if (c == '\n'): 763 c = '\\n' 764 elif (c == '\t'): 765 c = '\\t' 766 elif (c == '\r'): 767 c = '\\r' 768 elif (c == '\f'): 769 c = '\\f' 770 elif c in quotes: 771 c = escape[0] + c 772 elif c in escape: 773 c = c + c 774 elif c in delimiters: 775 use_outer_quotes = True 776 # hmm... that's all that comes to mind. Did I leave anything out? 777 out_lstr.append(c) 778 779 if use_outer_quotes: 780 out_lstr = ['\"'] + out_lstr + ['\"'] 781 782 return ''.join(out_lstr) 783 784 785def RemoveOuterQuotes(text, quotes='\"\''): 786 if ((len(text) >= 2) and (text[0] in quotes) and (text[-1] == text[0])): 787 return text[1:-1] 788 else: 789 return text 790 791 792def MaxLenStr(s1, s2): 793 if len(s2) > len(s1): 794 return s2 795 else: 796 return s1 797 798 799def VarNameToRegex(s): 800 """ 801 Returns the portion of a TTREE-style variable name (eg "@atom:re.C[1-5]") 802 that corresponds to a regular expression (eg "C[1-5]"). A variable name 803 is assumed to encode a regular expression if it begins with "re.", OR if 804 the a ':' character is followed by "re.". 805 If so, the text in s (excluding "re.") is assumed to be a regular expresion 806 and is returned to the caller. 807 If not, the empty string ('') is returned. 808 If the first or second character is a '{', and if the final character 809 is '}', they will be deleted. Consequently: 810 VarNameToRegex('@atom:C') returns '' 811 VarNameToRegex('@atom:re.C[1-5]') returns '@atom:C[1-5]' 812 VarNameToRegex('@{/atom:re.C[1-5]}') returns '@/atom:C[1-5]' 813 VarNameToRegex('@bond:AB') returns '' 814 VarNameToRegex('@bond:re.A*B') returns '@bond:a*b' 815 VarNameToRegex('bond:re.A*B') returns 'bond:a*b' 816 VarNameToRegex('{bond:re.A*B}') returns 'bond:a*b' 817 VarNameToRegex('@{bond:re.A*B}') returns '@bond:a*b' 818 """ 819 # First, deal with parenthesis {} 820 iparen_L = s.find('{') 821 iparen_R = s.rfind('}') 822 if (((iparen_L == 0) or (iparen_L == 1)) and (iparen_R == len(s)-1)): 823 optional_char = '' 824 if iparen_L == 1: 825 optional_char = s[0] 826 s = optional_char + s[iparen_L+1:iparen_R] 827 # Now check to see if the remaining string contains 're.' or ':re.' 828 icolon = s.find(':') 829 # If 're.' is not found immediately after the first ':' character 830 # or following a '/' character 831 # (or if it is not found at the beginning when no ':' is present) 832 # then there is no regular expression. In that case, return '' 833 ire = s.find('re.') 834 if ((ire == -1) or 835 (not ((ire > 0) and ((s[ire-1] == ':') or (s[ire-1] == '/'))))): 836 return '' 837 return s[0:ire] + s[ire+3:] 838 839 840def HasRE(pat): 841 """ 842 Returns true if a string (pat) begins with 're.' 843 """ 844 return len(VarNameToRegex(pat)) > 0 845 846 847def HasWildcard(pat): 848 """ 849 Returns true if a string (pat) contains a '*' or '?' character. 850 851 """ 852 return (pat.find('*') != -1) or (pat.find('?') != -1) 853 854 855# def HasWildcard(pat): 856# """ 857# Returns true if a string (pat) contains a non-backslash-protected 858# * or ? character. 859# 860# """ 861# N=len(pat) 862# i=0 863# while i < N: 864# i = pat.find('*', i, N) 865# if i == -1: 866# break 867# elif (i==0) or (pat[i-1] != '\\'): 868# return True 869# i += 1 870# i=0 871# while i < N: 872# i = pat.find('?', i, N) 873# if i == -1: 874# break 875# elif (i==0) or (pat[i-1] != '\\'): 876# return True 877# i += 1 878# return False 879 880 881def MatchesPattern(s, pattern): 882 if type(pattern) is str: 883 # old code: 884 # if ((len(s) > 1) and (s[0] == '/') and (s[-1] == '/'): 885 # re_string = p[1:-1] # strip off the slashes '/' and '/' 886 # if not re.search(re_string, s): 887 # return False 888 # new code: 889 # uses precompiled regular expressions (See "pattern.search" below) 890 if HasWildcard(pattern): 891 if not fnmatch.fnmatchcase(s, pattern): 892 return False 893 elif s != pattern: 894 return False 895 else: 896 #assert(type(p) is _sre.SRE_Match) 897 # I assume pattern = re.compile(some_reg_expr) 898 if not pattern.search(s): 899 return False 900 return True 901 902 903def MatchesAll(multi_string, pattern): 904 assert(len(multi_string) == len(pattern)) 905 for i in range(0, len(pattern)): 906 if not MatchesPattern(multi_string[i], pattern[i]): 907 return False 908 return True 909 910 911class LineLex(TtreeShlex): 912 """ This class extends the TtreeShlex module (a slightly modified 913 version of the python 3.2.2 version of shlex). LineLex has the 914 ability to read one line at a time (in addition to one token at a time). 915 (Many files and scripts must be parsed one line at a time instead of one 916 token at a time. In these cases, the whitespace position also matters.) 917 918 Arguably, this class might not be necessary. 919 I could get rid of this class completely. That would be nice. To do that 920 we would need to augment and generalize shlex's get_token() member function 921 to make it read lines, not just tokens. Of course, you can always 922 change the wordchars (or wordterminators). Even so, there are two other 923 difficulties using the current version of shlex.get_token() to read lines: 924 1) File inclusion happen whenever the beginning of a line/token matches one 925 of the "source_triggers" (not the whole line as required by get_token()). 926 2) Lines ending in a special character (by default the backslash character) 927 continue on to the next line. 928 This code seems to work on our test files, but I'm sure there are bugs. 929 Andrew 2012-3-25 930 931 """ 932 933 def __init__(self, 934 instream=None, 935 infile=None, 936 posix=False): 937 TtreeShlex.__init__(self, instream, infile, posix) 938 self.line_terminators = '\n' 939 self.line_extend_chars = '\\' 940 self.skip_comments_during_readline = True 941 942 def _StripComments(self, line): 943 if self.skip_comments_during_readline: 944 for i in range(0, len(line)): 945 if ((line[i] in self.commenters) and 946 ((i == 0) or (line[i - 1] not in self.escape))): 947 return line[:i] 948 return line 949 950 def _ReadLine(self, 951 recur_level=0): 952 """ 953 This function retrieves a block of text, halting at a 954 terminal character. Escape sequences are respected. 955 The self.lineno (newline counter) is also maintained. 956 957 The main difference between Readline and get_token() 958 is the way they handle the "self.source_triggers" member. 959 Both Readline() and get_token() insert text from other files when they 960 encounter a string in "self.source_triggers" in the text they read. 961 However ReadLine() ONLY inserts text from other files if the token which 962 matches with self.source_triggers appears at the beginning of the line. 963 get_token() inserts text only if lex.source matches the entire token. 964 965 comment-to-self: 966 At some point, once I'm sure this code is working, I should replace 967 shlex.get_token() with the code from ReadLine() which is more general. 968 It would be nice to get rid of "class LineLex" entirely. ReadLine() 969 is the only new feature that LineLex which was lacking in shlex. 970 971 To do this I would need to add a couple optional arguments to 972 "get_token()", allowing it to mimic ReadLine(), such as: 973 "override_wordterms" argument (which we can pass a '\n'), and 974 "token_extender" argument (like '\' for extending lines) 975 976 """ 977 first_token = '' 978 line = '' 979 escaped_state = False 980 found_space = False 981 while True: 982 nextchar = self.read_char() 983 # sys.stderr.write('nextchar=\"'+nextchar+'\"\n') 984 while nextchar == '': 985 if not self.filestack: 986 return self._StripComments(line), '', first_token, found_space 987 else: 988 self.pop_source() 989 nextchar = self.read_char() 990 if nextchar == '\n': 991 self.lineno += 1 992 993 if escaped_state: 994 escaped_state = False 995 else: 996 if nextchar in self.escape: 997 line += nextchar 998 escaped_state = True 999 else: 1000 escaped_state = False 1001 1002 if not escaped_state: 1003 if (nextchar in self.whitespace): 1004 found_space = True 1005 while first_token in self.source_triggers: 1006 fname = RemoveOuterQuotes(self.get_token()) 1007 if (fname == '') or (fname in self.source_triggers): 1008 raise InputError('Error: near ' + self.error_leader() + '\n' 1009 ' Nonsensical file inclusion request.\n') 1010 if self.debug >= 0: 1011 sys.stderr.write((' ' * recur_level) + 1012 'reading file \"' + fname + '\"\n') 1013 spec = self.sourcehook(fname) 1014 if spec: 1015 (fname, subfile) = spec 1016 if ((first_token not in self.source_triggers_x) or 1017 (fname not in self.source_files_restricted)): 1018 self.push_source(subfile, fname) 1019 if first_token in self.source_triggers_x: 1020 self.source_files_restricted.add(fname) 1021 else: 1022 if self.debug >= 0: 1023 sys.stderr.write('\nWarning at ' + self.error_leader() + ':\n' 1024 ' duplicate attempt to import file:\n \"' + fname + '\"\n') 1025 1026 line, nextchar, first_token, found_space = \ 1027 self._ReadLine(recur_level + 1) 1028 1029 if nextchar in self.line_terminators: 1030 line_nrw = line.rstrip(self.whitespace) 1031 # sys.stderr.write('line_nrw=\"'+line_nrw+'\"\n') 1032 if ((len(line_nrw) > 0) and 1033 (line_nrw[-1] in self.line_extend_chars) and 1034 ((len(line_nrw) < 2) or (line_nrw[-2] not in self.escape))): 1035 # delete the line_extend character 1036 line = line_nrw[:-1] 1037 # from the end of that line and keep reading... 1038 else: 1039 return self._StripComments(line), nextchar, first_token, found_space 1040 else: 1041 line += nextchar 1042 if not found_space: 1043 first_token += nextchar 1044 1045 def ReadLine(self, recur_level=0): 1046 line, nextchar, first_token, found_space = \ 1047 self._ReadLine(recur_level) 1048 if nextchar == self.eof: 1049 self.end_encountered = True 1050 return line + nextchar 1051 1052 @staticmethod 1053 def TextBlock2Lines(text, delimiters, keep_delim=True): 1054 """ This splits a string into a list of sub-strings split by delimiter 1055 characters. This function is different from the standard str.split() 1056 function: The string is split at every character which belongs to the 1057 "delimiters" argument (which can be a string or some other container). 1058 This character is included at the end of every substring. Example: 1059 TextBlock2Lines('\nabc\nde^fg\nhi j\n', '^\n') 1060 returns: 1061 ['\n', 'abc\n', 'de^', 'fg\n', 'hi j\n'] 1062 1063 """ 1064 ls = [] 1065 i = 0 1066 i_prev = 0 1067 while i < len(text): 1068 if text[i] in delimiters: 1069 if keep_delim: 1070 ls.append(text[i_prev:i + 1]) 1071 else: 1072 ls.append(text[i_prev:i]) 1073 i_prev = i + 1 1074 i += 1 1075 if (i_prev < len(text)): 1076 ls.append(text[i_prev:i + 1]) 1077 return ls 1078 1079 def __iter__(self): 1080 return self 1081 1082 def __next__(self): 1083 line = self.ReadLine() 1084 if line == self.eof: 1085 raise StopIteration 1086 return line 1087 1088 1089class OSrcLoc(object): 1090 """ OSrcLoc is barely more than a 2-tuple containing the name of a file 1091 (a string) and a particular line number inside that file (an integer). 1092 These objects are passed around and stored in the nodes of 1093 every tree, so that if a syntax error or broken link in that node 1094 is discovered, an error message can be provided to the user. 1095 1096 """ 1097 1098 __slots__ = ["infile", "lineno", "order"] 1099 count = 0 1100 1101 def __init__(self, infile='', lineno=-1): 1102 self.infile = infile 1103 self.lineno = lineno 1104 OSrcLoc.count += 1 1105 self.order = OSrcLoc.count # keep track of how many times it was called 1106 1107 def __lt__(self, x): 1108 return self.order < x.order 1109 1110 # def __repr__(self): 1111 # return repr((self.infile, self.lineno, self.order)) 1112 1113 1114class TextBlock(object): 1115 """TextBlock is just a 3-tuple consisting of a string, and an OSrcLoc 1116 to help locate it in the original file from which it was read.""" 1117 1118 __slots__ = ["text", "srcloc"] 1119 1120 def __init__(self, text, srcloc): # srcloc_end): 1121 self.text = text 1122 if srcloc == None: 1123 self.srcloc = OSrcLoc() 1124 else: 1125 self.srcloc = srcloc 1126 # if srcloc_end == None: 1127 # self.srcloc_end = OSrcLoc() 1128 # else: 1129 # self.srcloc_end = srcloc_end 1130 1131 def __repr__(self): 1132 return '\"' + self.text + '\"' 1133 1134 1135class VarRef(object): 1136 """VarRef stores variable names, and paths, and other attribute information, 1137 as well as a "OSrcLoc" to keep track of the file it was defined in.""" 1138 1139 __slots__ = ["prefix", "descr_str", "suffix", "srcloc", "binding", "nptr"] 1140 1141 def __init__(self, 1142 prefix='', # '$' or '${' 1143 descr_str='', # <- descriptor string: "cpath/category:lpath" 1144 suffix='', # '}' 1145 srcloc=None, # location in file where defined 1146 binding=None, # a pointer to a tuple storing the value 1147 nptr=None): # <- see class VarNPtr 1148 1149 self.prefix = prefix # Any text before the descriptor string goes here 1150 self.suffix = suffix # Any text after the descriptor string goes here 1151 self.descr_str = descr_str 1152 if srcloc == None: # <- Location in text file where variable appears 1153 self.srcloc = OSrcLoc() 1154 else: 1155 self.srcloc = srcloc 1156 1157 self.binding = binding 1158 1159 if nptr == None: 1160 self.nptr = VarNPtr() 1161 else: 1162 self.nptr = nptr 1163 1164 def __lt__(self, x): 1165 return self.order < x.order 1166 1167 # def __repr__(self): 1168 # return repr((self.prefix + self.descr_str + self.suffix, srcloc)) 1169 1170 1171class VarNPtr(object): 1172 """ 1173 Every time a variable appears in a template, it has has a "descriptor". 1174 For example, consider the variable 1175 "$atom:CA" 1176 This is a string which encodes 3 pieces of information. 1177 1) the category name: This is essentialy indicates the variable's type. 1178 (ie "atom", in the example above) 1179 2) the category node: Some TYPES have limited scope. Users can 1180 specify the root node of the portion of the tree 1181 in which this variable's type makes sense. 1182 If this node is the root node, then that category 1183 is relevant everywhere, and is not molecule or class 1184 specific. All variables have a category node, which 1185 is often not explicitly defined to by the user. 1186 (Category node = the root "/", in the example above.) 1187 3) the leaf node: This is a node whose ".name" member matches the name 1188 of a variable. This node is created for this purpose 1189 and it's position in the tree is a reflection of 1190 that variable's intended scope. 1191 In a molecule this "name" might be the name 1192 of a type of atom, or an atom ID, or a bond type, 1193 which is found in a particular molecule. 1194 (Leaf node would be named "CA" in the example above.) 1195 1196 The VarNPtr class is simply a 3-tuple which 1197 keeps these 3 pieces of data together. 1198 1199 """ 1200 1201 __slots__ = ["cat_name", "cat_node", "leaf_node"] 1202 1203 def __init__(self, cat_name='', cat_node=None, leaf_node=None): 1204 self.cat_name = cat_name 1205 self.cat_node = cat_node 1206 self.leaf_node = leaf_node 1207 1208 # def __repr__(self): 1209 # return repr((self.cat_name, self.cat_node.name, self.leaf_node.name)) 1210 1211 1212class VarBinding(object): 1213 """ VarBinding is essentially a tuple consistng of (full_name, binding, refs): 1214 1215 "self.full_name" is canonical name for this variable. This is a string 1216 which specifies full path leading to the category node (beginning with '/'), 1217 the category name (followed by a ':'), 1218 as well as the leaf node (including the path leading up to it from cat_node) 1219 This triplet identifies the variable uniquely. 1220 1221 "self.value" is the data that the variable refers to (usually a string). 1222 1223 "self.refs" stores a list of VarRefs which mention the same variable 1224 from the various places inside various templates in the tree. 1225 1226 """ 1227 1228 __slots__ = ["full_name", "nptr", "value", "refs", "order", "category"] 1229 1230 def __init__(self, 1231 full_name='', 1232 nptr=None, 1233 value=None, 1234 refs=None, 1235 order=-1, 1236 category=None): 1237 self.full_name = full_name 1238 self.nptr = nptr 1239 self.value = value 1240 self.refs = refs 1241 self.order = order 1242 self.category = category 1243 1244 def __lt__(self, x): 1245 return self.order < x.order 1246 1247 def __repr__(self): 1248 return repr((self.full_name, self.value, self.order)) 1249 1250 1251def ExtractCatName(descr_str): 1252 """ When applied to a VarRef's "descr_str" member, 1253 this function will extract the "catname" of it's corresponding 1254 "nptr" member. This can be useful for error reporting. 1255 (I use it to insure that the user is using the correct counter 1256 variable types at various locations in their input files.) 1257 1258 """ 1259 1260 ib = descr_str.find(':') 1261 if ib == -1: 1262 ib = len(descr_str) 1263 ia = descr_str.rfind('/') 1264 if ia == -1: 1265 ia = 0 1266 return descr_str[ia:ib] 1267 else: 1268 str_before_colon = descr_str[0:ib] 1269 ia = str_before_colon.rfind('/') 1270 if ia == -1: 1271 return str_before_colon 1272 else: 1273 return str_before_colon[ia + 1:] 1274 1275 1276def _DeleteLineFromTemplate(tmpl_list, 1277 i_entry, # index into tmpl_list 1278 newline_delimiter='\n'): 1279 """ Delete a single line from tmpl_list. 1280 tmpl_list is an alternating list of VarRefs and TextBlocks. 1281 To identify the line, the index corresponding to one of the 1282 entries in the tmpl_list is used. (Usually it is a VarRef) 1283 The text after the preceeding newline, and the text up to the next newline 1284 (starting from the beginning of the current entry, if a TextBlock) 1285 is deleted, including any VarRef (variables) located in between. 1286 1287 It returns the index corresponding to the next 1288 entry in the list (after deletion). 1289 1290 """ 1291 1292 i_prev_newline = i_entry 1293 while i_prev_newline >= 0: 1294 entry = tmpl_list[i_prev_newline] 1295 if isinstance(entry, TextBlock): 1296 i_char_newline = entry.text.rfind(newline_delimiter) 1297 if i_char_newline != -1: # then newline found 1298 # Delete the text after this newline 1299 entry.text = entry.text[:i_char_newline + 1] 1300 break 1301 i_prev_newline -= 1 1302 1303 first_var = True 1304 #i_next_newline = i_entry 1305 i_next_newline = i_prev_newline + 1 1306 while i_next_newline < len(tmpl_list): 1307 entry = tmpl_list[i_next_newline] 1308 if isinstance(entry, TextBlock): 1309 i_char_newline = entry.text.find(newline_delimiter) 1310 if i_char_newline != -1: # then newline found 1311 # Delete the text before this newline (including the newline) 1312 entry.text = entry.text[i_char_newline + 1:] 1313 break 1314 # Invoke DeleteSelf() on the first variables on this line. This will 1315 # insure that it is deleted from the ttree_assignments.txt file. 1316 elif isinstance(entry, VarRef): 1317 if first_var: 1318 entry.nptr.leaf_node.DeleteSelf() 1319 first_var = False 1320 i_next_newline += 1 1321 1322 del tmpl_list[i_prev_newline + 1: i_next_newline] 1323 return i_prev_newline + 1 1324 1325 1326def DeleteLinesWithBadVars(tmpl_list, 1327 delete_entire_template=False, 1328 newline_delimiter='\n'): 1329 """ 1330 Loop through the entries in a template, 1331 an alternating list of TextBlocks and VarRefs (tmpl_list). 1332 If a VarRef points to a leaf_node which no longer exists 1333 (ie. no longer in the corresponding category's .bindings list). 1334 Then delete the line it came from from the template (tmpl_list). 1335 1336 """ 1337 1338 out_str_list = [] 1339 i = 0 1340 while i < len(tmpl_list): 1341 entry = tmpl_list[i] 1342 if isinstance(entry, VarRef): 1343 var_ref = entry 1344 var_bindings = var_ref.nptr.cat_node.categories[ 1345 var_ref.nptr.cat_name].bindings 1346 # if var_ref.nptr.leaf_node not in var_bindings: 1347 if var_ref.nptr.leaf_node.IsDeleted(): 1348 if delete_entire_template: 1349 del tmpl_list[:] 1350 return 0 1351 else: 1352 i = _DeleteLineFromTemplate(tmpl_list, 1353 i, 1354 newline_delimiter) 1355 else: 1356 i += 1 1357 else: 1358 i += 1 1359 1360 1361def SplitTemplate(ltmpl, delim, delete_blanks=False): 1362 """ 1363 Split a template "ltmpl" into a list of "tokens" (sub-templates) 1364 using a single delimiter string "delim". 1365 1366 INPUT arguments: 1367 "ltmpl" should be an list of TextBlocks and VarRefs. 1368 "delim" should be a simple string (type str) 1369 "delete_blanks" should be a boolean True/False value. 1370 When true, successive occurrences of the delimiter 1371 should not create blank entries in the output list. 1372 1373 OUTPUT: 1374 A list of tokens. 1375 Each "token" is either a TextBlock, a VarRef, 1376 or a (flat, 1-dimensional) list containing more than one of these objects. 1377 The number of "tokens" returned equals the number of times the delimiter 1378 is encountered in any of the TextBlocks in the "ltmpl" argument, plus one. 1379 (... Unless "delete_blanks" is set to True. 1380 Again, in that case, empty entries in this list are deleted.) 1381 1382 """ 1383 assert(type(delim) is str) 1384 if not hasattr(ltmpl, '__len__'): 1385 ltmpl = [ltmpl] 1386 1387 tokens_lltmpl = [] 1388 token_ltmpl = [] 1389 i = 0 1390 while i < len(ltmpl): 1391 1392 entry = ltmpl[i] 1393 #sys.stderr.write('ltmpl['+str(i)+'] = '+str(entry)+'\n') 1394 1395 if isinstance(entry, TextBlock): 1396 # if hasattr(entry, 'text'): 1397 prev_src_loc = entry.srcloc 1398 1399 tokens_str = entry.text.split(delim) 1400 1401 lineno = entry.srcloc.lineno 1402 1403 j = 0 1404 while j < len(tokens_str): 1405 token_str = tokens_str[j] 1406 1407 delim_found = False 1408 if (j < len(tokens_str) - 1): 1409 delim_found = True 1410 1411 if token_str == '': 1412 if delete_blanks: 1413 if delim == '\n': 1414 lineno += 1 1415 if len(token_ltmpl) > 0: 1416 if len(token_ltmpl) == 1: 1417 tokens_lltmpl.append(token_ltmpl[0]) 1418 else: 1419 tokens_lltmpl.append(token_ltmpl) 1420 del token_ltmpl 1421 token_ltmpl = [] 1422 j += 1 1423 continue 1424 1425 new_src_loc = OSrcLoc(prev_src_loc.infile, lineno) 1426 new_src_loc.order = prev_src_loc.order 1427 1428 for c in token_str: 1429 # Reminder to self: c != delim (so c!='\n' if delim='\n') 1430 # (We keep track of '\n' characters in delimiters above.) 1431 if c == '\n': 1432 lineno += 1 1433 1434 new_src_loc.lineno = lineno 1435 1436 text_block = TextBlock(token_str, 1437 new_src_loc) 1438 1439 prev_src_loc = new_src_loc 1440 1441 if len(token_ltmpl) == 0: 1442 if delim_found: 1443 tokens_lltmpl.append(text_block) 1444 del token_ltmpl 1445 token_ltmpl = [] 1446 else: 1447 token_ltmpl.append(text_block) 1448 else: 1449 if delim_found: 1450 if len(token_str) > 0: 1451 token_ltmpl.append(text_block) 1452 tokens_lltmpl.append(token_ltmpl) 1453 del token_ltmpl 1454 token_ltmpl = [] 1455 else: 1456 assert(not delete_blanks) 1457 if (isinstance(token_ltmpl[-1], VarRef) 1458 and 1459 ((j > 0) 1460 or 1461 ((j == len(tokens_str) - 1) and 1462 (i == len(ltmpl) - 1)) 1463 )): 1464 # In that case, this empty token_str corresponds 1465 # to a delimiter which was located immediately 1466 # after the variable name, 1467 # AND 1468 # -there is more text to follow, 1469 # OR 1470 # -we are at the end of the template. 1471 token_ltmpl.append(text_block) 1472 if len(token_ltmpl) == 1: 1473 tokens_lltmpl.append(token_ltmpl[0]) 1474 else: 1475 tokens_lltmpl.append(token_ltmpl) 1476 del token_ltmpl 1477 token_ltmpl = [] 1478 else: 1479 token_ltmpl.append(text_block) 1480 1481 if (delim_found and (delim == '\n')): 1482 lineno += 1 1483 1484 j += 1 1485 1486 elif isinstance(entry, VarRef): 1487 # elif hasattr(entry, 'descr_str'): 1488 lineno = entry.srcloc.lineno 1489 if ((len(token_ltmpl) == 1) and 1490 isinstance(token_ltmpl[0], TextBlock) and 1491 (len(token_ltmpl[0].text) == 0)): 1492 # special case: if the previous entry was "", then it means 1493 # the delimeter appeared at the end of the previous text block 1494 # leading up to this variable. It separates the variable from 1495 # the previous text block. It is not a text block of length 0. 1496 token_ltmpl[0] = entry 1497 else: 1498 token_ltmpl.append(entry) 1499 elif entry == None: 1500 token_ltmpl.append(entry) 1501 else: 1502 assert(False) 1503 1504 i += 1 1505 1506 # Append left over remains of the last token 1507 if len(token_ltmpl) == 1: 1508 tokens_lltmpl.append(token_ltmpl[0]) 1509 elif len(token_ltmpl) > 1: 1510 tokens_lltmpl.append(token_ltmpl) 1511 del token_ltmpl 1512 1513 return tokens_lltmpl 1514 1515 1516def SplitTemplateMulti(ltmpl, delims, delete_blanks=False): 1517 """ 1518 Split a template "ltmpl" into a list of templates using a 1519 single one or more delimiter strings "delim_list". 1520 If multiple delimiter strings are provided, splitting 1521 begins using the first delimiter string in the list. 1522 Then each token in the resulting list of templates 1523 is split using the next delimiter string 1524 and so on until we run out of delimiter strings. 1525 1526 "ltmpl" should be an list of TextBlocks and VarRefs. 1527 "delims" should be a simple string (type str) or a list of strings 1528 "delete_blanks" is either True or False 1529 If True, then any blank entries in the resulting list of 1530 tokens (sub-templates) will be deleted. 1531 1532 """ 1533 1534 if hasattr(delims, '__len__'): # then it hopefully is a list of strings 1535 delim_list = delims 1536 else: 1537 delim_list = [delims] # then it hopefully is a string 1538 1539 tokens = [ltmpl] 1540 for delim in delim_list: 1541 assert(type(delim) is str) 1542 tokens_il = [] 1543 for t in tokens: 1544 sub_tokens = SplitTemplate(t, delim, delete_blanks) 1545 for st in sub_tokens: 1546 if hasattr(st, '__len__'): 1547 if (len(st) > 0) or (not delete_blanks): 1548 tokens_il.append(st) 1549 else: 1550 tokens_il.append(st) 1551 tokens = tokens_il 1552 del tokens_il 1553 1554 return tokens 1555 1556 1557def _TableFromTemplate(d, ltmpl, delimiters, delete_blanks): 1558 """ 1559 See the docstring for the TableFromTemplate() function for an explanation. 1560 (This _TableFromTemplate() and SplitTemplate() are the workhorse functions 1561 for TableFromTemplate().) 1562 1563 """ 1564 1565 output = SplitTemplateMulti(ltmpl, delimiters[d], delete_blanks[d]) 1566 1567 if d > 0: 1568 i = 0 1569 while i < len(output): 1570 output[i] = _TableFromTemplate(d - 1, 1571 output[i], 1572 delimiters, 1573 delete_blanks) 1574 # Delete empty LISTS? 1575 if (delete_blanks[d] and 1576 hasattr(output[i], '__len__') and 1577 (len(output[i]) == 0)): 1578 del output[i] 1579 else: 1580 i += 1 1581 1582 return output 1583 1584 1585def TableFromTemplate(ltmpl, delimiters, delete_blanks=True): 1586 """ 1587 This function can be used to split a template 1588 (a list containing TextBlocks and VarRefs) into a table 1589 into a multidimensional table, with an arbitrary number of dimensions. 1590 1591 Arguments: 1592 1593 ltmpl 1594 1595 An alternating list of TextBlocks and VarRefs containing 1596 the contents of this text template. 1597 1598 delimiters 1599 1600 The user must supply a list or tuple of delimiters: one delimiter for 1601 each dimension in the table, with low-priority delimiters 1602 (such as spaces ' ') appearing first, and higher-priority delimiters 1603 (sich as newlines '\n') appearing later on in the list. 1604 This function will divide the entire "ltmpl" into an n-dimensional 1605 table. Initially the text is split into a list of text using the 1606 highest-priority delimiter. Then each entry in the resulting list is 1607 split into another list according to the next highest-priority delimiter. 1608 This continues until all of the delimiters are used up and an 1609 n-dimensional list-of-lists is remaining. 1610 1611 delete_blanks 1612 1613 The optional "delete_blanks" argument can be used to indicate whether 1614 or not to delete blank entries in the table (which occur as a result 1615 of placing two delimiters next to each other). It should be either 1616 None (default), or it should be an array of booleans matching the 1617 size of the "delimiters" argument. This allows the caller to customize 1618 the merge settings separately for each dimension (for example: to allow 1619 merging of whitespace within a line, without ignoring blank lines). 1620 1621 1622 ---- Details: ---- 1623 1624 1) Multi-character delimiters ARE allowed (like '\n\n'). 1625 1626 2) If a delimiter in the "delimiters" argument is not a string 1627 but is a tuple (or a list) of strings, then the text is split according 1628 to any of the delimiters in that tuple/list (starting from the last entry). 1629 This way, users can use this feature to split text according to multiple 1630 different kinds of whitespace characters (such as ' ' and '\t'), for 1631 example, buy setting delimiters[0] = (' ','\t'). If, additionally, 1632 delete_blanks[0] == True, then this will cause this function to 1633 divide text in without regard to whitespace on a given line (for example). 1634 1635 Detailed example: 1636 1637 table2D = TableFromTmplList(ltmpl, 1638 delimiters = ((' ','\t'), '\n'), 1639 delete_blanks = (True, False)) 1640 1641 This divides text in a similar way that the "awk" program does by default, 1642 ie, by ignoring various kinds of whitespace between text fields, but NOT 1643 ignoring blank lines. 1644 1645 3) Any text contained in variable-names is ignored. 1646 1647 """ 1648 1649 # Make a copy of ltmpl 1650 # (The workhorse function "_TableFromTemplate()" makes in-place changes to 1651 # its "ltmpl" argument. I don't want to modify "ltmpl", so I make a copy 1652 # of it before I invoke "_TableFromTemplate()" on it.) 1653 1654 output = [ltmpl[i] for i in range(0, len(ltmpl))] 1655 1656 d = len(delimiters) - 1 1657 output = _TableFromTemplate(d, output, delimiters, delete_blanks) 1658 return output 1659 1660 1661class TemplateLexer(TtreeShlex): 1662 """ This class extends the standard python lexing module, shlex, adding a 1663 new member function (ReadTemplate()), which can read in a block of raw text, 1664 (halting at an (non-escaped) terminal character), and split the text into 1665 alternating blocks of text and variables. (As far as this lexer is 1666 concerned, "variables" are simply tokens preceeded by $ or @ characters, 1667 and surrounded by optional curly-brackets {}.) 1668 1669 """ 1670 1671 def __init__(self, 1672 instream=None, 1673 infile=None, 1674 posix=False): 1675 TtreeShlex.__init__(self, instream, infile, posix) 1676 self.var_delim = '$@' # characters which can begin a variable name 1677 self.var_open_paren = '{' # optional parenthesis surround a variable 1678 self.var_close_paren = '}' # optional parenthesis surround a variable 1679 self.newline = '\n' 1680 self.comment_skip_var = '#' 1681 1682 # Which characters belong in words? 1683 # 1684 # We want to allow these characters: 1685 # ./$@&%^!*~`-_:;?<>[]() 1686 # to appear inside the tokens that TtreeShlex.get_token() 1687 # retrieves (TtreeShlex.get_token() is used to read class 1688 # names, and instance names, and variable names) 1689 # 1690 # settings.lex.wordchars+='./$@&%^!*~`-_+:;?<>[]' #Allow these chars 1691 # 1692 # Ommisions: 1693 # Note: I left out quotes, whitespace, comment chars ('#'), and escape 1694 # characters ('\\') because they are also dealt with separately. 1695 # Those characters should not overlap with settings.lex.wordchars. 1696 # 1697 # Enabling unicode support requires that we override this choice 1698 # by specifying "lex.wordterminators" instead of "wordchars". 1699 # 1700 # lex.wordterminators should be the (printable) set inverse of lex.wordchars 1701 # I'm not sure which ascii characters are NOT included in the string above 1702 # (We need to figure that out, and put them in settings.lex.wordterminators) 1703 # To figure that out, uncomment the 8 lines below: 1704 # 1705 # self.wordterminators='' 1706 # for i in range(0,256): 1707 # c = chr(i) 1708 # if c not in self.wordchars: 1709 # self.wordterminators += c 1710 #sys.stderr.write('-------- wordterminators = --------\n') 1711 # sys.stderr.write(self.wordterminators+'\n') 1712 # sys.stderr.write('-----------------------------------\n') 1713 # 1714 # Here is the result: 1715 self.wordterminators = '(){|}' + \ 1716 self.whitespace + \ 1717 self.quotes + \ 1718 self.operators + \ 1719 self.escape + \ 1720 self.commenters 1721 1722 # Note: 1723 # self.whitespace = ' \t\r\f\n' 1724 # self.quotes = '\'"' 1725 # self.escape = '\\' 1726 # self.commenters = '#' 1727 # Note: I do not terminate on these characters: +-=*'"` 1728 # because they appear in the names of atom types in many force-fields. 1729 # Also * characters are needed for variables containing wildcards 1730 # in the name (which will be dealt with later). 1731 1732 self.source_triggers = set(['include', 'import']) 1733 self.source_triggers_x = set(['import']) 1734 1735 def GetSrcLoc(self): 1736 return OSrcLoc(self.infile, self.lineno) 1737 1738 def ReadTemplate(self, 1739 simplify_output=False, 1740 terminators='}', 1741 remove_esc_preceeding='{\\', #explained below 1742 var_terminators='{}(),', #(var_delim, spaces also included) 1743 keep_terminal_char=True): 1744 """ 1745 ReadTemplate() reads a block of text (between terminators) 1746 and divides it into variables (tokens following a '$' or '@' character) 1747 and raw text. This is similar to pythons string.Template(), 1748 however it reads from streams (files), not strings, and it allows use 1749 of more complicated variable names with multiple variable delimiters 1750 (eg '$' and '@'). 1751 This readline()-like member function terminates when reaching a 1752 user-specified terminator character character (second argument), 1753 or when variable (eg: "$var"$ is encountered). The result is 1754 a list of variable-separated text-blocks (stored in the first 1755 argument). For example, the string: 1756 "string with $var1 and $var2 variables.}" contains: 1757 "string with ", 1758 $var1, 1759 " and ", 1760 $var2, 1761 " variables.}" 1762 This simplifies the final process of rendering 1763 (substituting text into) the text blocks later on. 1764 Output: 1765 This function returns a list of (alternating) blocks of 1766 text, and variable names. Each entry in the list is either: 1767 1) a text block: 1768 Raw text is copied from the source, verbatim, along with 1769 some additional data (filename and line numbers), to 1770 help retroactively identify where the text came from 1771 (in case a syntax error in the text is discovered later). 1772 In this case, the list entry is stored as a list 1773 The format (TextBlock) is similar to: 1774 [text_string, ((filenameA,lineBegin), (filenameB,lineEnd))], 1775 where the tuples, (filenameA,lineBegin) and (filenameB,lineEnd) 1776 denote the source file(s) from which the text was read, and 1777 line number at the beginning and ending of the text block. 1778 (This information is useful for generating helpful error 1779 messages. Note that the "TtreeShlex" class allows users to 1780 combine multiple files transparently into one stream using 1781 the "source" (or "sourcehook()") member. For this reason, it 1782 is possible, although unlikely, that the text-block 1783 we are reading could span multiple different files.) 1784 2) a variable (for example "$var" or "${var}"): 1785 In this case, the list entry is stored in the "VarRef" format 1786 which is essentialy shown below: 1787 [[var_prefix, var_nptr, var_suffix], (filename,lineno)] 1788 where var_prefix and var_suffix are strings containing brackets 1789 and other text enclosing the variable name (and may be empty). 1790 1791 As an example, we consider a file named "datafile" which 1792 contains the text containing 2 text blocks and 1 variable: 1793 "some\n text\n before ${var}. Text after\n". 1794 ReadTemplate() will read this and return a list with 3 entries: 1795 [ ['some\n text\n before', (('datafile', 1), ('datafile', 3))], 1796 [['${', 'var', '}'], ('datafile', 3, 3)], 1797 ['Text after\n', (('datafile', 3), ('datafile', 4))] ] 1798 1799 Note that while parsing the text, self.lineno counter is 1800 incremented whenever a newline character is encountered. 1801 (Also: Unlike shlex.get_token(), this function does not 1802 delete commented text, or insert text from other files.) 1803 1804 Exceptional Cases: 1805 Terminator characters are ignored if they are part of a variable 1806 reference. (For example, the '}' in "${cat:var}", is used to denote a 1807 bracketed variable, and does not cause ReadTemplate() to stop reading) 1808 OR if they are part of a two-character escape sequence 1809 (for example, '}' in "\}" does not cause terminate parsing). 1810 In that case, the text is considered normal text. (However the 1811 \ character is also stripped out. It is also stripped out if it 1812 preceeds any characters in "remove_esc_preceeding", which is 1813 the second argument. Otherwise it is left in the text block.) 1814 What is the purpose of "remove_esc_preceeding"? To force ReadTemplate() 1815 to remove the preceeding \ when it otherwise would not. For example, 1816 we want to remove \ whenever it preceeds another \ character, so we 1817 include it in the remove_esc_preceeding string variable. We alse include 1818 '{' because we want to remove \ when it preceeds the '{' character. 1819 That way the \ gets deleted when it preceeds either '{' or '}'. 1820 (The \ character is already removed before the '}' character.) 1821 We want consistent behavior that people expect, so that 1822 "\{abc\}" -> ReadTemplate() -> "{abc}" (instead of "\{abc}"). 1823 In retrospect, perhaps this is a confusing way to implement this. 1824 1825 """ 1826 1827 #sys.stderr.write(' ReadTemplate('+terminators+') invoked at '+self.error_leader()) 1828 1829 # The main loop of the parser reads only one variable at time. 1830 # The following variables keep track of where we are in the template. 1831 reading_var = False # Are we currently reading in the name of a variable? 1832 1833 prev_char_delim = False # True iff we just read a var_delim character like '$' 1834 # True iff we just read a (non-escaped) esc character '\' 1835 escaped_state = False 1836 # True iff we are in a region of text where vars should be ignored 1837 commented_state = False 1838 var_paren_depth = 0 # This is non-zero iff we are inside a 1839 # bracketed variable's name for example: "${var}" 1840 var_terminators += self.whitespace + self.newline + self.var_delim 1841 1842 tmpl_list = [] # List of alternating tuples of text_blocks and 1843 # variable names (see format comment above) 1844 # This list will be returned to the caller. 1845 1846 # sys.stderr.write('report_progress='+str(report_progress)) 1847 1848 prev_filename = self.infile 1849 prev_lineno = self.lineno 1850 var_prefix = '' 1851 var_descr_plist = [] 1852 var_suffix = '' 1853 text_block_plist = [] 1854 1855 done_reading = False 1856 1857 while not done_reading: 1858 1859 terminate_text = False 1860 terminate_var = False 1861 #delete_prior_escape = False 1862 1863 nextchar = self.read_char() 1864 1865 #sys.stderr.write(' ReadTemplate() nextchar=\''+nextchar+'\' at '+self.error_leader()+' esc='+str(escaped_state)+', pvar='+str(prev_char_delim)+', paren='+str(var_paren_depth)) 1866 1867 # Count newlines: 1868 if nextchar in self.newline: 1869 commented_state = False 1870 self.lineno += 1 1871 1872 elif ((nextchar in self.comment_skip_var) and 1873 (not escaped_state)): 1874 commented_state = True 1875 1876 # Check for end-of-file: 1877 if nextchar == '': 1878 1879 if escaped_state: 1880 raise InputError('Error: in ' + self.error_leader() + '\n\n' 1881 'File terminated immediately following an escape character.') 1882 terminate_var = True 1883 else: 1884 terminate_text = True 1885 1886 done_reading = True 1887 1888 # --- Now process the character: --- 1889 1890 # What we do next depends on which "mode" we are in. 1891 # If we are reading a regular text block (reading_var == False), 1892 # then we keep appending characters onto the end of "text_block", 1893 # checking for terminal characters, or variable delimiters. 1894 # If we are reading a variable name (reading_var == True), 1895 # then we append characters to the end of "var_descr_plist[]", 1896 # checking for variable terminator characters, as well as 1897 # parenthesis (some variables are surrounded by parenthesis). 1898 1899 elif reading_var: 1900 1901 if nextchar in terminators: 1902 #sys.stdout.write(' ReadTemplate() readmode found terminator.\n') 1903 if escaped_state: 1904 # In this case, the '\' char was only to prevent terminating 1905 # string prematurely, so delete the '\' character. 1906 #delete_prior_escape = True 1907 del var_descr_plist[-1] 1908 var_descr_plist.append(nextchar) 1909 #escaped_state = False 1910 elif not ((var_paren_depth > 0) and 1911 (nextchar in self.var_close_paren)): 1912 terminate_var = True 1913 done_reading = True 1914 1915 if nextchar in self.var_open_paren: # eg: nextchar == '{' 1916 #sys.stdout.write(' ReadTemplate() readmode found {\n') 1917 if escaped_state: 1918 var_descr_plist.append(nextchar) 1919 #escaped_state = False 1920 else: 1921 # "${var}" is a valid way to refer to a variable 1922 if prev_char_delim: 1923 var_prefix += nextchar 1924 var_paren_depth = 1 1925 # "${{var}}" is also a valid way to refer to a variable, 1926 # (although strange), but "$va{r}" is not. 1927 # Parenthesis (in bracketed variable names) must 1928 # immediately follow the '$' character (as in "${var}") 1929 elif var_paren_depth > 0: 1930 var_paren_depth += 1 1931 var_descr_plist.append(nextchar) 1932 1933 elif nextchar in self.var_close_paren: 1934 #sys.stdout.write(' ReadTemplate() readmode found }.\n') 1935 if escaped_state: 1936 # In this case, the '\' char was only to prevent 1937 # interpreting '}' as a variable suffix, 1938 # delete_prior_escape=True #so skip the '\' character 1939 del var_descr_plist[-1] 1940 var_descr_plist.append(nextchar) 1941 #escaped_state = False 1942 else: 1943 if var_paren_depth > 0: 1944 var_paren_depth -= 1 1945 if var_paren_depth == 0: 1946 var_suffix = nextchar 1947 terminate_var = True 1948 else: 1949 var_descr_plist.append(nextchar) 1950 1951 elif nextchar in var_terminators: 1952 #sys.stdout.write(' ReadTemplate() readmode found var_terminator \"'+nextchar+'\"\n') 1953 if (escaped_state or (var_paren_depth > 0)): 1954 # In that case ignore the terminator 1955 # and append it to the variable name 1956 if escaped_state: 1957 # In this case, the '\' char was only to prevent 1958 # interpreting nextchar as a variable terminator 1959 # delete_prior_escape = True # so skip the '\' 1960 # # character 1961 del var_descr_plist[-1] 1962 #escaped_state = False 1963 var_descr_plist.append(nextchar) 1964 else: 1965 terminate_var = True 1966 1967 elif nextchar in self.var_delim: # such as '$' 1968 #sys.stdout.write(' ReadTemplate() readmode found var_delim.\n') 1969 if escaped_state: 1970 # In this case, the '\' char was only to prevent 1971 # interpreting '$' as a new variable name 1972 # delete_prior_escape = True # so skip the '\' 1973 # character 1974 del var_descr_plist[-1] 1975 var_descr_plist.append(nextchar) 1976 #escaped_state = False 1977 else: 1978 prev_var_delim = True 1979 # Then we are processing a new variable name 1980 terminate_var = True 1981 else: 1982 var_descr_plist.append(nextchar) 1983 prev_char_delim = False 1984 1985 else: # begin else clause for "if reading_var:" 1986 1987 # Then we are reading a text_block 1988 1989 if nextchar in terminators: 1990 if escaped_state: 1991 # In this case, the '\' char was only to prevent terminating 1992 # string prematurely, so delete the '\' character. 1993 #delete_prior_escape = True 1994 del text_block_plist[-1] 1995 text_block_plist.append(nextchar) 1996 elif commented_state: 1997 text_block_plist.append(nextchar) 1998 else: 1999 terminate_text = True 2000 done_reading = True 2001 2002 elif nextchar in self.var_delim: # such as '$' 2003 if escaped_state: 2004 # In this case, the '\' char was only to prevent 2005 # interpreting '$' as a variable prefix. 2006 # delete_prior_escape=True #so delete the '\' 2007 # character 2008 del text_block_plist[-1] 2009 text_block_plist.append(nextchar) 2010 elif commented_state: 2011 text_block_plist.append(nextchar) 2012 else: 2013 prev_char_delim = True 2014 reading_var = True 2015 # NOTE TO SELF: IN THE FUTURE, USE GetVarName(self) 2016 # TO PARSE TEXT ASSOCIATED WITH A VARIABLE 2017 # THIS WILL SIMPLIFY THE CODE AND ENSURE CONSISTENCY. 2018 var_paren_depth = 0 2019 terminate_text = True 2020 else: 2021 text_block_plist.append(nextchar) 2022 # TO DO: use "list_of_chars.join()" instead of '+=' 2023 prev_char_delim = False # the previous character was not '$' 2024 2025 # Now deal with "remove_esc_preceeding". (See explanation above.) 2026 if escaped_state and (nextchar in remove_esc_preceeding): 2027 if reading_var: 2028 #sys.stdout.write(' ReadTemplate: var_descr_str=\''+''.join(var_descr_plist)+'\'\n') 2029 assert(var_descr_plist[-2] in self.escape) 2030 del var_descr_plist[-2] 2031 else: 2032 #sys.stdout.write(' ReadTemplate: text_block=\''+''.join(text_block_plist)+'\'\n') 2033 assert(text_block_plist[-2] in self.escape) 2034 del text_block_plist[-2] 2035 2036 if terminate_text: 2037 #sys.stdout.write('ReadTemplate() appending: ') 2038 # sys.stdout.write(text_block) 2039 2040 # tmpl_list.append( [text_block, 2041 # ((prev_filename, prev_lineno), 2042 # (self.infile, self.lineno))] ) 2043 2044 if simplify_output: 2045 tmpl_list.append(''.join(text_block_plist)) 2046 else: 2047 tmpl_list.append(TextBlock(''.join(text_block_plist), 2048 OSrcLoc(prev_filename, prev_lineno))) 2049 #, OSrcLoc(self.infile, self.lineno))) 2050 if not done_reading: 2051 # The character that ended the text block 2052 # was a variable delimiter (like '$'), in which case 2053 # we should put it (nextchar) in the variable's prefix. 2054 var_prefix = nextchar 2055 else: 2056 var_prefix = '' 2057 var_descr_plist = [] 2058 var_suffix = '' 2059 prev_filename = self.infile 2060 prev_lineno = self.lineno 2061 del text_block_plist 2062 text_block_plist = [] 2063 # gc.collect() 2064 2065 elif terminate_var: 2066 # Print an error if we terminated in the middle of 2067 # an incomplete variable name: 2068 if prev_char_delim: 2069 raise InputError('Error: near ' + self.error_leader() + '\n\n' 2070 'Null variable name.') 2071 if var_paren_depth > 0: 2072 raise InputError('Error: near ' + self.error_leader() + '\n\n' 2073 'Incomplete bracketed variable name.') 2074 2075 var_descr_str = ''.join(var_descr_plist) 2076 2077 # Now check for variable format modifiers, 2078 # like python's ".rjust()" and ".ljust()". 2079 # If present, then put these in the variable suffix. 2080 if ((len(var_descr_plist) > 0) and (var_descr_plist[-1] == ')')): 2081 #i = len(var_descr_plist)-1 2082 # while i >= 0: 2083 # if var_descr_plist[i] == '(': 2084 # break 2085 # i -= 1 2086 i = var_descr_str.rfind('(') 2087 if (((i - 6) >= 0) and 2088 ((var_descr_str[i - 6:i] == '.rjust') or 2089 (var_descr_str[i - 6:i] == '.ljust'))): 2090 var_suffix = ''.join( 2091 var_descr_plist[i - 6:]) + var_suffix 2092 #var_descr_plist = var_descr_plist[:i-6] 2093 var_descr_str = var_descr_str[:i - 6] 2094 2095 # Process any special characters in the variable name 2096 var_descr_str = EscCharStrToChar(var_descr_str) 2097 2098 # tmpl_list.append( [[var_prefix, var_descr_str, var_suffix], 2099 # (self.infile, self.lineno)] ) 2100 if simplify_output: 2101 tmpl_list.append(var_prefix + var_descr_str + var_suffix) 2102 else: 2103 tmpl_list.append(VarRef(var_prefix, var_descr_str, var_suffix, 2104 OSrcLoc(self.infile, self.lineno))) 2105 2106 # if report_progress: 2107 #sys.stderr.write(' parsed variable '+var_prefix+var_descr_str+var_suffix+'\n') 2108 2109 #sys.stdout.write('ReadTemplate() appending: ') 2110 #sys.stderr.write(var_prefix + var_descr_str + var_suffix) 2111 2112 del var_descr_plist 2113 del var_descr_str 2114 2115 prev_filename = self.infile 2116 prev_lineno = self.lineno 2117 var_prefix = '' 2118 var_descr_plist = [] 2119 var_suffix = '' 2120 # Special case: Variable delimiters like '$' 2121 # terminate the reading of variables, 2122 # but they also signify that a new 2123 # variable is being read. 2124 if nextchar in self.var_delim: 2125 # Then we are processing a new variable name 2126 prev_var_delim = True 2127 reading_var = True 2128 # NOTE TO SELF: IN THE FUTURE, USE GetVarName(self) 2129 # TO PARSE TEXT ASSOCIATED WITH A VARIABLE 2130 # THIS WILL SIMPLIFY THE CODE AND ENSURE CONSISTENCY. 2131 var_paren_depth = 0 2132 var_prefix = nextchar 2133 2134 elif nextchar in self.var_close_paren: 2135 del text_block_plist 2136 text_block_plist = [] 2137 # gc.collect() 2138 prev_var_delim = False 2139 reading_var = False 2140 2141 else: 2142 # Generally, we don't want to initialize the next text block 2143 # with the empty string. Consider that whatever character 2144 # caused us to stop reading the previous variable and append 2145 # it to the block of text that comes after. 2146 del text_block_plist 2147 text_block_plist = [nextchar] 2148 # gc.collect() 2149 prev_var_delim = False 2150 reading_var = False 2151 2152 # If we reached the end of the template (and the user requests it), 2153 # then the terminal character can be included in the list 2154 # of text_blocks to be returned to the caller. 2155 if done_reading and keep_terminal_char: 2156 #sys.stdout.write('ReadTemplate() appending: \''+nextchar+'\'\n') 2157 # Here we create a new text block which contains only the 2158 # terminal character (nextchar). 2159 # tmpl_list.append( [nextchar, 2160 # ((self.infile, self.lineno), 2161 # (self.infile, self.lineno))] ) 2162 if simplify_output: 2163 tmpl_list.append(nextchar) 2164 else: 2165 tmpl_list.append(TextBlock(nextchar, 2166 OSrcLoc(self.infile, self.lineno))) 2167 #, OSrcLoc(self.infile, self.lineno))) 2168 2169 if escaped_state: 2170 escaped_state = False 2171 else: 2172 if nextchar in self.escape: 2173 escaped_state = True 2174 2175 #sys.stderr.write("*** TMPL_LIST0 = ***", tmpl_list) 2176 return tmpl_list # <- return value stored here 2177 2178 def GetParenExpr(self, prepend_str='', left_paren='(', right_paren=')'): 2179 """ GetParenExpr() is useful for reading in strings 2180 with nested parenthesis and spaces. 2181 This function can read in the entire string: 2182 2183 .trans(0, 10.0*sin(30), 10.0*cos(30)) 2184 2185 (Because I was too lazy to write this correctly...) 2186 Spaces are currently stripped out of the expression. 2187 (...unless surrounded by quotes) The string above becomes: 2188 2189 ".trans(0,10.0*sin(30),10.0*cos(30))" 2190 2191 Sometimes the caller wants to prepend some text to the beginning 2192 of the expression (which may contain parenthesis). For this 2193 reason, an optional first argument ("prepend_str") can be 2194 provided. By default it is empty. 2195 2196 """ 2197 2198 src_loc_begin = SrcLoc(self.infile, self.lineno) 2199 orig_wordterm = self.wordterminators 2200 self.wordterminators = self.wordterminators.replace( 2201 left_paren, '').replace(right_paren, '') 2202 2203 token = self.get_token() 2204 if ((token == '') or 2205 (token == self.eof)): 2206 return prepend_str 2207 2208 expr_str = prepend_str + token 2209 2210 # if (expr_str.find(left_paren) == -1): 2211 # raise InputError('Error near or before '+self.error_leader()+'\n' 2212 # 'Expected an open-paren (\"'+prepend_str+left_paren+'\") before this point.\n') 2213 # return expr_str 2214 2215 paren_depth = expr_str.count(left_paren) - expr_str.count(right_paren) 2216 while ((len(expr_str) == 0) or (paren_depth > 0)): 2217 token = self.get_token() 2218 if ((type(token) is not str) or 2219 (token == '')): 2220 raise InputError('Error somewhere between ' + 2221 self.error_leader(src_loc_begin.infile, 2222 src_loc_begin.lineno) 2223 + 'and ' + self.error_leader() + '\n' 2224 'Invalid expression: \"' + expr_str[0:760] + '\"') 2225 expr_str += token 2226 paren_depth = expr_str.count( 2227 left_paren) - expr_str.count(right_paren) 2228 if (paren_depth != 0): 2229 raise InputError('Error somewhere between ' + 2230 self.error_leader(src_loc_begin.infile, 2231 src_loc_begin.lineno) 2232 + 'and ' + self.error_leader() + '\n' 2233 'Invalid expression: \"' + expr_str[0:760] + '\"') 2234 self.wordterminators = orig_wordterm 2235 return expr_str 2236 2237 2238if __name__ == '__main__': 2239 if len(sys.argv) == 1: 2240 lexer = TtreeShlex() 2241 else: 2242 file = sys.argv[1] 2243 lexer = TtreeShlex(open(file), file) 2244 while 1: 2245 tt = lexer.get_token() 2246 if tt: 2247 sys.stderr.write("Token: " + repr(tt)) 2248 else: 2249 break 2250