1# -*- coding: iso-8859-1 -*-
2### -*- coding: utf-8 -*-
3
4# Authors: Eric S. Raymond, 21 Dec 1998
5#          Andrew Jewett (jewett.aij at g mail)
6# LICENSE: The PSF license:
7# https://docs.python.org/3/license.html
8# The PSF license is compatible with the GPL license.  It is not a copyleft
9# license.  It is apparently similar to the BSD and MIT licenses.
10#
11# Contributions:
12# Module and documentation by Eric S. Raymond, 21 Dec 1998
13# Input stacking and error message cleanup added by ESR, March 2000
14# push_source() and pop_source() made explicit by ESR, January 2001.
15# Posix compliance, split(), string arguments, and
16# iterator interface by Gustavo Niemeyer, April 2003.
17# Unicode support hack ("wordterminators") and numerous other hideous
18# ttree-specific hacks added by Andrew Jewett September 2011.
19
20
21"""A lexical analyzer class for simple shell-like syntaxes.
22   This version has been modified slightly to work better with unicode.
23   It was forked from the version of shlex that ships with python 3.2.2.
24   A few minor features and functions have been added.  -Andrew Jewett 2011 """
25
26
27import os.path
28import sys
29from collections import deque
30import re
31import fnmatch
32import string
33#import gc
34
35
36try:
37    from cStringIO import StringIO
38except ImportError:
39    try:
40        from StringIO import StringIO
41    except ImportError:
42        from io import StringIO
43
44__all__ = ["TtreeShlex",
45           "split",
46           "LineLex",
47           "SplitQuotedString",
48           "ExtractVarName",
49           "GetVarName",
50           "EscCharStrToChar",
51           "SafelyEncodeString",
52           "RemoveOuterQuotes",
53           "MaxLenStr",
54           "VarNameToRegex",
55           "HasRE",
56           "HasWildcard",
57           "MatchesPattern",
58           "InputError",
59           "ErrorLeader",
60           "SrcLoc",
61           "OSrcLoc",
62           "TextBlock",
63           "VarRef",
64           "VarNPtr",
65           "VarBinding",
66           "SplitTemplate",
67           "SplitTemplateMulti",
68           "TableFromTemplate",
69           "ExtractCatName",
70           #"_TableFromTemplate",
71           #"_DeleteLineFromTemplate",
72           "DeleteLinesWithBadVars",
73           "TemplateLexer"]
74
75
76class TtreeShlex(object):
77    """ A lexical analyzer class for simple shell-like syntaxes.
78    TtreeShlex is a backwards-compatible version of python's standard shlex
79    module. It has the additional member: "self.wordterminators", which
80    overrides the "self.wordchars" member.  This enables better handling of
81    unicode characters by allowing a much larger variety of characters to
82    appear in words or tokens parsed by TtreeShlex.
83
84    """
85
86    def __init__(self,
87                 instream=None,
88                 infile=None,
89                 posix=False):
90        if isinstance(instream, str):
91            instream = StringIO(instream)
92        if instream is not None:
93            self.instream = instream
94            self.infile = infile
95        else:
96            self.instream = sys.stdin
97            self.infile = None
98        self.posix = posix
99        if posix:
100            self.eof = None
101        else:
102            self.eof = ''
103        self.commenters = '#'
104        self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
105                          'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
106        #if self.posix:
107        #    self.wordchars += ('��������������������������������'
108        #                       '������������������������������')
109
110        if self.posix:
111            self.wordchars += ('��������������������������������'
112                               '������������������������������')
113
114        self.wordterminators = set([])
115        self.prev_space_terminator = ''
116        self.whitespace = ' \t\r\f\n'
117        self.whitespace_split = False
118        self.quotes = '\'"'
119        self.escape = '\\'
120        self.escapedquotes = '"'
121        self.operators = '='  #binary numeric operators like +-*/ might be added
122        self.state = ' '
123        self.pushback = deque()
124        self.lineno = 1
125        self.debug = 0
126        self.token = ''
127        self.filestack = deque()
128        # self.source_triggers
129        # are tokens which allow the seamless insertion of other
130        # files into the file being read.
131        self.source_triggers = set(['source'])
132        self.source_triggers_x = set([])
133        # self.source_triggers_x is a subset of self.source_triggers.
134        # In this case file inclusion is exclusive.
135        # In other words, the file is only included
136        # if it has not been included already.  It does this
137        # by checking if one of these tokens has been encountered.
138        self.source_files_restricted = set([])
139        self.include_path = []
140        if 'TTREE_PATH' in os.environ:
141            include_path_list = os.environ['TTREE_PATH'].split(':')
142            self.include_path += [d for d in include_path_list if len(d) > 0]
143        if self.debug:
144            sys.stderr.write('TtreeShlex: reading from %s, line %d'
145                  % (self.instream, self.lineno))
146        self.end_encountered = False
147
148    @staticmethod
149    def _belongs_to(char, include_chars, exclude_chars):
150        if ((not exclude_chars) or (len(exclude_chars)==0)):
151            return char in include_chars
152        else:
153            return char not in exclude_chars
154
155    def push_raw_text(self, text):
156        """Push a block of text onto the stack popped by the ReadLine() method.
157        (If multiple lines are present in the text, (which is determined by
158        self.line_terminators), then the text is split into multiple lines
159        and each one of them is pushed onto this stack individually.
160        The "self.lineno" counter is also adjusted, depending on the number
161        of newline characters in "line".
162            Do not strip off the newline, or other line terminators
163            at the end of the text block before using push_raw_text()!
164
165        """
166        if self.debug >= 1:
167            sys.stderr.write("TtreeShlex: pushing token " + repr(text))
168        for c in reversed(text):
169            self.pushback.appendleft(c)
170            if c == '\n':
171                self.lineno -= 1
172        if len(text) > 0:
173            self.end_encountered = False
174
175    def push_token(self, text):
176        "Push a token onto the stack popped by the get_token method"
177        self.push_raw_text(text + self.prev_space_terminator)
178
179    def push_source(self, newstream, newfile=None):
180        "Push an input source onto the lexer's input source stack."
181        if isinstance(newstream, str):
182            newstream = StringIO(newstream)
183        self.filestack.appendleft((self.infile, self.instream, self.lineno))
184        self.infile = newfile
185        self.instream = newstream
186        self.lineno = 1
187        if self.debug:
188            if newfile is not None:
189                sys.stderr.write('TtreeShlex: pushing to file %s' % (self.infile,))
190            else:
191                sys.stderr.write('TtreeShlex: pushing to stream %s' % (self.instream,))
192
193    def pop_source(self):
194        "Pop the input source stack."
195        self.instream.close()
196        (self.infile, self.instream, self.lineno) = self.filestack.popleft()
197        if self.debug:
198            sys.stderr.write('TtreeShlex: popping to %s, line %d'
199                  % (self.instream, self.lineno))
200        self.state = ' '
201
202    def get_token(self):
203        "Get a token from the input stream (or from stack if it's nonempty)"
204        #### #CHANGING: self.pushback is now a stack of characters, not tokens
205        #### if self.pushback:
206        ####    tok = self.pushback.popleft()
207        ####    if self.debug >= 1:
208        ####        sys.stderr.write("TtreeShlex: popping token " + repr(tok))
209        ####    return tok
210        #### No pushback.  Get a token.
211        raw = self.read_token()
212        # Handle inclusions
213        if self.source_triggers is not None:
214            while raw in self.source_triggers:
215                fname = self.read_token()
216                spec = self.sourcehook(fname)
217                if spec:
218                    (newfile, newstream) = spec
219                    if ((raw not in self.source_triggers_x) or
220                            (newfile not in self.source_files_restricted)):
221                        self.push_source(newstream, newfile)
222                        if raw in self.source_triggers_x:
223                            self.source_files_restricted.add(newfile)
224                    else:
225                        if self.debug >= 1:
226                            sys.stderr.write(
227                                '\ndebug warning: duplicate attempt to import file:\n               \"' + newfile + '\"\n')
228                raw = self.get_token()
229
230        # Maybe we got EOF instead?
231        while raw == self.eof:
232            if not self.filestack:
233                return self.eof
234            else:
235                self.pop_source()
236                raw = self.get_token()
237        # Neither inclusion nor EOF
238        if self.debug >= 1:
239            if raw != self.eof:
240                sys.stderr.write("TtreeShlex: token=" + repr(raw))
241            else:
242                sys.stderr.write("TtreeShlex: token=EOF")
243
244        if raw == self.eof:
245            self.end_encountered = True
246
247        return raw
248
249    def read_char(self):
250        if self.pushback:
251            nextchar = self.pushback.popleft()
252            assert((type(nextchar) is str) and (len(nextchar)==1))
253        else:
254            nextchar = self.instream.read(1)
255        return nextchar
256
257    def read_token(self):
258        self.prev_space_terminator = ''
259        quoted = False
260        escapedstate = ' '
261        while True:
262            #### self.pushback is now a stack of characters, not tokens
263            nextchar = self.read_char()
264            if nextchar == '\n':
265                self.lineno = self.lineno + 1
266            if self.debug >= 3:
267                sys.stderr.write("TtreeShlex: in state", repr(self.state),
268                      "I see character:", repr(nextchar))
269            if self.state is None:
270                self.token = ''        # past end of file
271                break
272            elif self.state == ' ':
273                if not nextchar:
274                    self.state = None  # end of file
275                    break
276                elif nextchar in self.whitespace:
277                    if self.debug >= 2:
278                        sys.stderr.write("TtreeShlex: I see whitespace in whitespace state")
279                    if self.token or (self.posix and quoted):
280                        # Keep track of which whitespace
281                        # character terminated the token.
282                        self.prev_space_terminator = nextchar
283                        break   # emit current token
284                    else:
285                        continue
286                elif nextchar in self.commenters:
287                    self.instream.readline()
288                    self.lineno = self.lineno + 1
289                elif self.posix and nextchar in self.escape:
290                    escapedstate = 'a'
291                    self.state = nextchar
292                elif TtreeShlex._belongs_to(nextchar,
293                                            self.wordchars,
294                                            self.wordterminators):
295                    self.token = nextchar
296                    self.state = 'a'
297                elif nextchar in self.quotes:
298                    if not self.posix:
299                        self.token = nextchar
300                    self.state = nextchar
301                elif self.whitespace_split:
302                    self.token = nextchar
303                    self.state = 'a'
304                else:
305                    self.token = nextchar
306                    if self.token or (self.posix and quoted):
307                        break   # emit current token
308                    else:
309                        continue
310            elif self.state in self.quotes:
311                quoted = True
312                if not nextchar:      # end of file
313                    if self.debug >= 2:
314                        sys.stderr.write("TtreeShlex: I see EOF in quotes state")
315                    # XXX what error should be raised here?
316                    raise ValueError("Error at or before " + self.error_leader() + "\n"
317                                     "      No closing quotation.")
318                if nextchar == self.state:
319                    if not self.posix:
320                        self.token = self.token + nextchar
321                        self.state = ' '
322                        break
323                    else:
324                        self.state = 'a'
325                elif self.posix and nextchar in self.escape and \
326                        self.state in self.escapedquotes:
327                    escapedstate = self.state
328                    self.state = nextchar
329                else:
330                    self.token = self.token + nextchar
331            elif self.state in self.escape:
332                if not nextchar:      # end of file
333                    if self.debug >= 2:
334                        sys.stderr.write("TtreeShlex: I see EOF in escape state")
335                    # What error should be raised here?
336                    raise InputError('File terminated immediately following an escape character.')
337                # In posix shells, only the quote itself or the escape
338                # character may be escaped within quotes.
339                if escapedstate in self.quotes and \
340                   nextchar != self.state and nextchar != escapedstate:
341                    self.token = self.token + self.state
342                self.token = self.token + nextchar
343                self.state = escapedstate
344            elif self.state == 'a':
345                if not nextchar:
346                    self.state = None   # end of file
347                    break
348                elif nextchar in self.whitespace:
349                    if self.debug >= 2:
350                        sys.stderr.write("TtreeShlex: I see whitespace in word state")
351                    self.state = ' '
352                    if self.token or (self.posix and quoted):
353                        # Keep track of which whitespace
354                        # character terminated the token.
355                        self.prev_space_terminator = nextchar
356                        break   # emit current token
357                    else:
358                        continue
359                elif nextchar in self.commenters:
360                    comment_contents = self.instream.readline()
361                    self.lineno = self.lineno + 1
362                    if self.posix:
363                        self.state = ' '
364                        if self.token or (self.posix and quoted):
365                            # Keep track of which character(s) terminated
366                            # the token (including whitespace and comments).
367                            self.prev_space_terminator = nextchar + comment_contents
368                            break   # emit current token
369                        else:
370                            continue
371                elif self.posix and nextchar in self.quotes:
372                    self.state = nextchar
373                elif self.posix and nextchar in self.escape:
374                    escapedstate = 'a'
375                    self.state = nextchar
376                elif (TtreeShlex._belongs_to(nextchar,
377                                             self.wordchars,
378                                             self.wordterminators)
379                      or (nextchar in self.quotes)
380                      or (self.whitespace_split)):
381                    self.token = self.token + nextchar
382                else:
383                    self.pushback.appendleft(nextchar)
384                    if self.debug >= 2:
385                        sys.stderr.write("TtreeShlex: I see punctuation in word state")
386                    self.state = ' '
387                    if self.token:
388                        break   # emit current token
389                    else:
390                        continue
391        result = self.token
392        self.token = ''
393        if self.posix and not quoted and result == '':
394            result = None
395        if self.debug > 1:
396            if result:
397                sys.stderr.write("TtreeShlex: raw token=" + repr(result))
398            else:
399                sys.stderr.write("TtreeShlex: raw token=EOF")
400        return result
401
402    def sourcehook(self, newfile):
403        "Hook called on a filename to be sourced."
404        newfile = RemoveOuterQuotes(newfile)
405        # This implements cpp-like semantics for relative-path inclusion.
406        newfile_full = newfile
407        if isinstance(self.infile, str) and not os.path.isabs(newfile):
408            newfile_full = os.path.join(os.path.dirname(self.infile), newfile)
409        try:
410            f = open(newfile_full, "r")
411        except IOError:
412            # If not found,
413            err = True
414            # ...then check to see if the file is in one of the
415            # directories in the self.include_path list.
416            for d in self.include_path:
417                newfile_full = os.path.join(d, newfile)
418                try:
419                    f = open(newfile_full, "r")
420                    err = False
421                    break
422                except IOError:
423                    err = True
424            if err:
425                raise InputError('Error at ' + self.error_leader() + '\n'
426                                 '       unable to open file \"' + newfile + '\"\n'
427                                 '       for reading.\n')
428        return (newfile, f)
429
430    def error_leader(self, infile=None, lineno=None):
431        "Emit a C-compiler-like, Emacs-friendly error-message leader."
432        if infile is None:
433            infile = self.infile
434        if lineno is None:
435            lineno = self.lineno
436        return "\"%s\", line %d: " % (infile, lineno)
437
438    def __iter__(self):
439        return self
440
441    def __next__(self):
442        token = self.get_token()
443        if token == self.eof:
444            raise StopIteration
445        return token
446
447    def __bool__(self):
448        return not self.end_encountered
449
450    # For compatibility with python 2.x, I must also define:
451    def __nonzero__(self):
452        return self.__bool__()
453
454
455# The split() function was originally from shlex
456# It is included for backwards compatibility.
457def split(s, comments=False, posix=True):
458    lex = TtreeShlex(s, posix=posix)
459    lex.whitespace_split = True
460    if not comments:
461        lex.commenters = ''
462    return list(lex)
463
464
465##################### NEW ADDITIONS (may be removed later) #################
466
467#"""
468#  -- linelex.py --
469# linelex.py defines the LineLex class, which inherits from, and further
470# augments the capabilities of TtreeShlex by making it easier to parse
471# individual lines one at a time.  (The original shlex's "source" inclusion
472# ability still works when reading entire lines, and lines are still counted.)
473#
474#"""
475
476#import sys
477
478
479class InputError(Exception):
480    """ A generic exception object containing a string for error reporting.
481        (Raising this exception implies that the caller has provided
482         a faulty input file or argument.)
483
484    """
485
486    def __init__(self, err_msg):
487        self.err_msg = err_msg
488
489    def __str__(self):
490        return self.err_msg
491
492    def __repr__(self):
493        return str(self)
494
495
496def ErrorLeader(infile, lineno):
497    return '\"' + infile + '\", line ' + str(lineno)
498
499
500class SrcLoc(object):
501    """ SrcLoc is essentially nothing more than a 2-tuple containing the name
502    of a file (str) and a particular line number inside that file (an integer).
503
504    """
505    __slots__ = ["infile", "lineno"]
506
507    def __init__(self, infile='', lineno=-1):
508        self.infile = infile
509        self.lineno = lineno
510
511
512def SplitQuotedString(string,
513                      quotes='\'\"',
514                      delimiters=' \t\r\f\n',
515                      escape='\\',
516                      comment_char='#',
517                      endquote=None):
518    tokens = []
519    token = ''
520    reading_token = True
521    escaped_state = False
522    quote_state = None
523    for c in string:
524
525        if (c in comment_char) and (not escaped_state) and (quote_state == None):
526            tokens.append(token)
527            return tokens
528
529        elif (c in delimiters) and (not escaped_state) and (quote_state == None):
530            if reading_token:
531                tokens.append(token)
532                token = ''
533                reading_token = False
534
535        elif c in escape:
536            if escaped_state:
537                token += c
538                reading_token = True
539                escaped_state = False
540            else:
541                escaped_state = True
542                # and leave c (the '\' character) out of token
543        elif (c == quote_state) and (not escaped_state) and (quote_state != None):
544            quote_state = None
545            if include_endquote:
546                token += c
547        elif (c in quotes) and (not escaped_state):
548            if quote_state == None:
549                if endquote != None:
550                    quote_state = endquote
551                else:
552                    quote_state = c
553                # Now deal with strings like
554                #    a "b" "c d" efg"h i j"
555                # Assuming quotes='"', then we want this to be split into:
556                #    ['a', 'b', 'c d', 'efg"h i j"']
557                # ...in other words, include the end quote if the token did
558                #    not begin with a quote
559                include_endquote = False
560                if token != '':
561                    # if this is not the first character in the token
562                    include_endquote = True
563            token += c
564            reading_token = True
565        else:
566            if (c == 'n') and (escaped_state == True):
567                c = '\n'
568            elif (c == 't') and (escaped_state == True):
569                c = '\t'
570            elif (c == 'r') and (escaped_state == True):
571                c = '\r'
572            elif (c == 'f') and (escaped_state == True):
573                c = '\f'
574            token += c
575            reading_token = True
576            escaped_state = False
577
578    if len(string) > 0:
579        tokens.append(token)
580    return tokens
581
582
583
584
585def GetVarName(lex):
586    """ Read a string like 'atom:A  '  or  '{/atom:A B/C/../D }ABC '
587        and return ('','atom:A','  ')  or  ('{','/atom:A B/C/../D ','}ABC')
588        These are 3-tuples containing the portion of the text containing
589        only the variable's name (assumed to be within the text),
590        ...in addition to the text on either side of the variable name.
591    """
592    escape = '\''
593    lparen = '{'
594    rparen = '}'
595    if hasattr(lex, 'escape'):
596        escape = lex.escape
597    if hasattr(lex, 'var_open_paren'):
598        lparen = lex.var_open_paren
599    if hasattr(lex, 'var_close_paren'):
600        rparen = lex.var_close_paren
601
602    nextchar = lex.read_char()
603    # Skip past the left-hand side paren '{'
604    paren_depth = 0
605    escaped = False
606    if nextchar == lparen:
607        paren_depth = 1
608    elif nextchar in lex.escape:
609        escaped = True
610    elif (hasattr(lex, 'wordterminators') and
611          (nextchar in lex.wordterminators)):
612        lex.push_raw_text(nextchar)
613        return ''
614    else:
615        lex.push_raw_text(nextchar)
616    # Now read the variable name:
617    var_name_l = []
618    while lex:
619        nextchar=lex.read_char()
620        if nextchar == '':
621            break
622        elif nextchar == '\n':
623            lex.lineno += 1
624            if paren_depth > 0:
625                var_name_l.append(nextchar)
626            else:
627                lex.push_raw_text(nextchar)
628                break
629        elif escaped:
630            var_name_l.append(nextchar)
631            escaped = False
632        elif nextchar in lex.escape:
633            escaped = True
634        elif nextchar == lparen:
635            paren_depth += 1
636            if (hasattr(lex, 'wordterminators') and
637                (nextchar in lex.wordterminators)):
638                lex.push_raw_text(nextchar)
639                break
640            else:
641                var_name_l.append(nextchar)
642        elif nextchar == rparen:
643            paren_depth -= 1
644            if paren_depth == 0:
645                break
646            elif (hasattr(lex, 'wordterminators') and
647                  (nextchar in lex.wordterminators)):
648                lex.push_raw_text(nextchar)
649                break
650            else:
651                var_name_l.append(nextchar)
652        elif paren_depth > 0:
653            var_name_l.append(nextchar)
654            escaped = False
655        elif nextchar in lex.whitespace:
656            lex.push_raw_text(nextchar)
657            break
658        elif (hasattr(lex, 'wordterminators') and
659              (nextchar in lex.wordterminators) and
660              (paren_depth == 0)):
661            lex.push_raw_text(nextchar)
662            break
663        elif nextchar in lex.commenters:
664            lex.instream.readline()
665            lex.lineno += 1
666            break
667        else:
668            var_name_l.append(nextchar)
669            escaped = False
670    var_name = ''.join(var_name_l)
671    return var_name
672
673
674
675def ExtractVarName(text,
676                   commenters = '#',
677                   whitespace = ' \t\r\f\n'):
678    """ Read a string like 'atom:A  '  or  '{/atom:A B/C/../D }ABC '
679        and return ('','atom:A','  ')  or  ('{','/atom:A B/C/../D ','}ABC')
680        These are 3-tuples containing the portion of the text containing
681        only the variable's name (assumed to be within the text),
682        ...in addition to the text on either side of the variable name.
683    """
684    ibegin = 0
685    left_paren = ''
686    if text[0] == '{':
687        ibegin = 1
688        left_paren = text[0] #(GetVarName() strips the leading '{' character)
689    # The best way to insure consistency with other code is to use
690    # lex.GetVarName() to figure out where the variable name ends.
691    lex = TtreeShlex(StringIO(text))
692    var_name = GetVarName(lex)
693    # Any text following the end of the variable name should be returned as well
694    text_after_list = []
695    if left_paren:
696        text_after_list.append('}') #(GetVarName() strips the trailing '}' char)
697    while lex:
698        c = lex.read_char()
699        if c == '':
700            break
701        text_after_list.append(c)
702    text_after = ''.join(text_after_list)
703    return (left_paren, var_name, text_after)
704
705
706def EscCharStrToChar(s_in, escape='\\'):
707    """
708    EscCharStrToChar() replaces any escape sequences
709    in a string with their 1-character equivalents.
710
711    """
712    assert(len(escape) > 0)
713    out_lstr = []
714    escaped_state = False
715    for c in s_in:
716        if escaped_state:
717            if (c == 'n'):
718                out_lstr.append('\n')
719            elif (c == 't'):
720                out_lstr.append('\t')
721            elif (c == 'r'):
722                out_lstr.append('\r')
723            elif (c == 'f'):
724                out_lstr.append('\f')
725            elif (c == '\''):
726                out_lstr.append('\'')
727            elif (c == '\"'):
728                out_lstr.append('\"')
729            elif c in escape:
730                out_lstr.append(c)
731            else:
732                out_lstr.append(escape + c)  # <- keep both characters
733            escaped_state = False
734        else:
735            if c in escape:
736                escaped_state = True
737            else:
738                out_lstr.append(c)
739
740    return ''.join(out_lstr)
741
742
743def SafelyEncodeString(in_str,
744                       quotes='\'\"',
745                       delimiters=' \t\r\f\n',
746                       escape='\\',
747                       comment_char='#'):
748    """
749    SafelyEncodeString(in_str) scans through the input string (in_str),
750    and returns a new string in which probletic characters
751    (like newlines, tabs, quotes, etc), are replaced by their two-character
752    backslashed equivalents (like '\n', '\t', '\'', '\"', etc).
753    The escape character is the backslash by default, but it too can be
754    overridden to create custom escape sequences
755    (but this does not effect the encoding for characters like '\n', '\t').
756
757    """
758    assert(len(escape) > 0)
759    out_lstr = []
760    use_outer_quotes = False
761    for c in in_str:
762        if (c == '\n'):
763            c = '\\n'
764        elif (c == '\t'):
765            c = '\\t'
766        elif (c == '\r'):
767            c = '\\r'
768        elif (c == '\f'):
769            c = '\\f'
770        elif c in quotes:
771            c = escape[0] + c
772        elif c in escape:
773            c = c + c
774        elif c in delimiters:
775            use_outer_quotes = True
776        # hmm... that's all that comes to mind.  Did I leave anything out?
777        out_lstr.append(c)
778
779    if use_outer_quotes:
780        out_lstr = ['\"'] + out_lstr + ['\"']
781
782    return ''.join(out_lstr)
783
784
785def RemoveOuterQuotes(text, quotes='\"\''):
786    if ((len(text) >= 2) and (text[0] in quotes) and (text[-1] == text[0])):
787        return text[1:-1]
788    else:
789        return text
790
791
792def MaxLenStr(s1, s2):
793    if len(s2) > len(s1):
794        return s2
795    else:
796        return s1
797
798
799def VarNameToRegex(s):
800    """
801    Returns the portion of a TTREE-style variable name (eg "@atom:re.C[1-5]")
802    that corresponds to a regular expression (eg "C[1-5]").  A variable name
803    is assumed to encode a regular expression if it begins with "re.", OR if
804    the a ':' character is followed by "re.".
805    If so, the text in s (excluding "re.") is assumed to be a regular expresion
806    and is returned to the caller.
807    If not, the empty string ('') is returned.
808    If the first or second character is a '{', and if the final character
809    is '}', they will be deleted.  Consequently:
810      VarNameToRegex('@atom:C') returns ''
811      VarNameToRegex('@atom:re.C[1-5]') returns '@atom:C[1-5]'
812      VarNameToRegex('@{/atom:re.C[1-5]}') returns '@/atom:C[1-5]'
813      VarNameToRegex('@bond:AB') returns ''
814      VarNameToRegex('@bond:re.A*B') returns '@bond:a*b'
815      VarNameToRegex('bond:re.A*B') returns 'bond:a*b'
816      VarNameToRegex('{bond:re.A*B}') returns 'bond:a*b'
817      VarNameToRegex('@{bond:re.A*B}') returns '@bond:a*b'
818    """
819    # First, deal with parenthesis {}
820    iparen_L = s.find('{')
821    iparen_R = s.rfind('}')
822    if (((iparen_L == 0) or (iparen_L == 1)) and (iparen_R == len(s)-1)):
823        optional_char = ''
824        if iparen_L == 1:
825            optional_char = s[0]
826        s = optional_char + s[iparen_L+1:iparen_R]
827    # Now check to see if the remaining string contains 're.' or ':re.'
828    icolon = s.find(':')
829    # If 're.' is not found immediately after the first ':' character
830    # or following a '/' character
831    # (or if it is not found at the beginning when no ':' is present)
832    # then there is no regular expression.  In that case, return ''
833    ire = s.find('re.')
834    if ((ire == -1) or
835        (not ((ire > 0) and ((s[ire-1] == ':') or (s[ire-1] == '/'))))):
836        return ''
837    return s[0:ire] + s[ire+3:]
838
839
840def HasRE(pat):
841    """
842    Returns true if a string (pat) begins with 're.'
843    """
844    return len(VarNameToRegex(pat)) > 0
845
846
847def HasWildcard(pat):
848    """
849    Returns true if a string (pat) contains a '*' or '?' character.
850
851    """
852    return (pat.find('*') != -1) or (pat.find('?') != -1)
853
854
855# def HasWildcard(pat):
856#    """
857#    Returns true if a string (pat) contains a non-backslash-protected
858#    * or ? character.
859#
860#    """
861#    N=len(pat)
862#    i=0
863#    while i < N:
864#        i = pat.find('*', i, N)
865#        if i == -1:
866#            break
867#        elif (i==0) or (pat[i-1] != '\\'):
868#            return True
869#        i += 1
870#    i=0
871#    while i < N:
872#        i = pat.find('?', i, N)
873#        if i == -1:
874#            break
875#        elif (i==0) or (pat[i-1] != '\\'):
876#            return True
877#        i += 1
878#    return False
879
880
881def MatchesPattern(s, pattern):
882    if type(pattern) is str:
883        # old code:
884        # if ((len(s) > 1) and (s[0] == '/') and (s[-1] == '/'):
885        #    re_string = p[1:-1]  # strip off the slashes '/' and '/'
886        #    if not re.search(re_string, s):
887        #        return False
888        # new code:
889        #    uses precompiled regular expressions (See "pattern.search" below)
890        if HasWildcard(pattern):
891            if not fnmatch.fnmatchcase(s, pattern):
892                return False
893        elif s != pattern:
894            return False
895    else:
896        #assert(type(p) is _sre.SRE_Match)
897        # I assume pattern = re.compile(some_reg_expr)
898        if not pattern.search(s):
899            return False
900    return True
901
902
903def MatchesAll(multi_string, pattern):
904    assert(len(multi_string) == len(pattern))
905    for i in range(0, len(pattern)):
906        if not MatchesPattern(multi_string[i], pattern[i]):
907            return False
908    return True
909
910
911class LineLex(TtreeShlex):
912    """ This class extends the TtreeShlex module (a slightly modified
913    version of the python 3.2.2 version of shlex).  LineLex has the
914    ability to read one line at a time (in addition to one token at a time).
915    (Many files and scripts must be parsed one line at a time instead of one
916     token at a time.  In these cases, the whitespace position also matters.)
917
918    Arguably, this class might not be necessary.
919    I could get rid of this class completely.  That would be nice.  To do that
920    we would need to augment and generalize shlex's get_token() member function
921    to make it read lines, not just tokens.  Of course, you can always
922    change the wordchars (or wordterminators).  Even so, there are two other
923    difficulties using the current version of shlex.get_token() to read lines:
924    1) File inclusion happen whenever the beginning of a line/token matches one
925       of the "source_triggers" (not the whole line as required by get_token()).
926    2) Lines ending in a special character (by default the backslash character)
927       continue on to the next line.
928    This code seems to work on our test files, but I'm sure there are bugs.
929    Andrew 2012-3-25
930
931    """
932
933    def __init__(self,
934                 instream=None,
935                 infile=None,
936                 posix=False):
937        TtreeShlex.__init__(self, instream, infile, posix)
938        self.line_terminators = '\n'
939        self.line_extend_chars = '\\'
940        self.skip_comments_during_readline = True
941
942    def _StripComments(self, line):
943        if self.skip_comments_during_readline:
944            for i in range(0, len(line)):
945                if ((line[i] in self.commenters) and
946                        ((i == 0) or (line[i - 1] not in self.escape))):
947                    return line[:i]
948        return line
949
950    def _ReadLine(self,
951                  recur_level=0):
952        """
953        This function retrieves a block of text, halting at a
954        terminal character.  Escape sequences are respected.
955        The self.lineno (newline counter) is also maintained.
956
957        The main difference between Readline and get_token()
958        is the way they handle the "self.source_triggers" member.
959        Both Readline() and get_token() insert text from other files when they
960        encounter a string in "self.source_triggers" in the text they read.
961        However ReadLine() ONLY inserts text from other files if the token which
962        matches with self.source_triggers appears at the beginning of the line.
963        get_token() inserts text only if lex.source matches the entire token.
964
965        comment-to-self:
966         At some point, once I'm sure this code is working, I should replace
967         shlex.get_token() with the code from ReadLine() which is more general.
968         It would be nice to get rid of "class LineLex" entirely.  ReadLine()
969         is the only new feature that LineLex which was lacking in shlex.
970
971         To do this I would need to add a couple optional arguments to
972         "get_token()", allowing it to mimic ReadLine(), such as:
973           "override_wordterms" argument (which we can pass a '\n'), and
974           "token_extender" argument (like '\' for extending lines)
975
976        """
977        first_token = ''
978        line = ''
979        escaped_state = False
980        found_space = False
981        while True:
982            nextchar = self.read_char()
983            # sys.stderr.write('nextchar=\"'+nextchar+'\"\n')
984            while nextchar == '':
985                if not self.filestack:
986                    return self._StripComments(line), '', first_token, found_space
987                else:
988                    self.pop_source()
989                    nextchar = self.read_char()
990            if nextchar == '\n':
991                self.lineno += 1
992
993            if escaped_state:
994                escaped_state = False
995            else:
996                if nextchar in self.escape:
997                    line += nextchar
998                    escaped_state = True
999                else:
1000                    escaped_state = False
1001
1002            if not escaped_state:
1003                if (nextchar in self.whitespace):
1004                    found_space = True
1005                    while first_token in self.source_triggers:
1006                        fname = RemoveOuterQuotes(self.get_token())
1007                        if (fname == '') or (fname in self.source_triggers):
1008                            raise InputError('Error: near ' + self.error_leader() + '\n'
1009                                             '       Nonsensical file inclusion request.\n')
1010                        if self.debug >= 0:
1011                            sys.stderr.write(('  ' * recur_level) +
1012                                             'reading file \"' + fname + '\"\n')
1013                        spec = self.sourcehook(fname)
1014                        if spec:
1015                            (fname, subfile) = spec
1016                            if ((first_token not in self.source_triggers_x) or
1017                                (fname not in self.source_files_restricted)):
1018                                self.push_source(subfile, fname)
1019                            if first_token in self.source_triggers_x:
1020                                self.source_files_restricted.add(fname)
1021                            else:
1022                                if self.debug >= 0:
1023                                    sys.stderr.write('\nWarning at ' + self.error_leader() + ':\n'
1024                                                     '          duplicate attempt to import file:\n         \"' + fname + '\"\n')
1025
1026                        line, nextchar, first_token, found_space = \
1027                            self._ReadLine(recur_level + 1)
1028
1029                if nextchar in self.line_terminators:
1030                    line_nrw = line.rstrip(self.whitespace)
1031                    # sys.stderr.write('line_nrw=\"'+line_nrw+'\"\n')
1032                    if ((len(line_nrw) > 0) and
1033                            (line_nrw[-1] in self.line_extend_chars) and
1034                            ((len(line_nrw) < 2) or (line_nrw[-2] not in self.escape))):
1035                        # delete the line_extend character
1036                        line = line_nrw[:-1]
1037                        # from the end of that line and keep reading...
1038                    else:
1039                        return self._StripComments(line), nextchar, first_token, found_space
1040                else:
1041                    line += nextchar
1042                    if not found_space:
1043                        first_token += nextchar
1044
1045    def ReadLine(self, recur_level=0):
1046        line, nextchar, first_token, found_space = \
1047            self._ReadLine(recur_level)
1048        if nextchar == self.eof:
1049            self.end_encountered = True
1050        return line + nextchar
1051
1052    @staticmethod
1053    def TextBlock2Lines(text, delimiters, keep_delim=True):
1054        """ This splits a string into a list of sub-strings split by delimiter
1055        characters.  This function is different from the standard str.split()
1056        function: The string is split at every character which belongs to the
1057        "delimiters" argument (which can be a string or some other container).
1058        This character is included at the end of every substring.  Example:
1059        TextBlock2Lines('\nabc\nde^fg\nhi j\n', '^\n')
1060        returns:
1061        ['\n', 'abc\n', 'de^', 'fg\n', 'hi j\n']
1062
1063        """
1064        ls = []
1065        i = 0
1066        i_prev = 0
1067        while i < len(text):
1068            if text[i] in delimiters:
1069                if keep_delim:
1070                    ls.append(text[i_prev:i + 1])
1071                else:
1072                    ls.append(text[i_prev:i])
1073                i_prev = i + 1
1074            i += 1
1075        if (i_prev < len(text)):
1076            ls.append(text[i_prev:i + 1])
1077        return ls
1078
1079    def __iter__(self):
1080        return self
1081
1082    def __next__(self):
1083        line = self.ReadLine()
1084        if line == self.eof:
1085            raise StopIteration
1086        return line
1087
1088
1089class OSrcLoc(object):
1090    """ OSrcLoc is barely more than a 2-tuple containing the name of a file
1091        (a string) and a particular line number inside that file (an integer).
1092        These objects are passed around and stored in the nodes of
1093        every tree, so that if a syntax error or broken link in that node
1094        is discovered, an error message can be provided to the user.
1095
1096    """
1097
1098    __slots__ = ["infile", "lineno", "order"]
1099    count = 0
1100
1101    def __init__(self, infile='', lineno=-1):
1102        self.infile = infile
1103        self.lineno = lineno
1104        OSrcLoc.count += 1
1105        self.order = OSrcLoc.count  # keep track of how many times it was called
1106
1107    def __lt__(self, x):
1108        return self.order < x.order
1109
1110    # def __repr__(self):
1111    #    return repr((self.infile, self.lineno, self.order))
1112
1113
1114class TextBlock(object):
1115    """TextBlock is just a 3-tuple consisting of a string, and an OSrcLoc
1116       to help locate it in the original file from which it was read."""
1117
1118    __slots__ = ["text", "srcloc"]
1119
1120    def __init__(self, text, srcloc):  # srcloc_end):
1121        self.text = text
1122        if srcloc == None:
1123            self.srcloc = OSrcLoc()
1124        else:
1125            self.srcloc = srcloc
1126        # if srcloc_end == None:
1127        #    self.srcloc_end = OSrcLoc()
1128        # else:
1129        #    self.srcloc_end = srcloc_end
1130
1131    def __repr__(self):
1132        return '\"' + self.text + '\"'
1133
1134
1135class VarRef(object):
1136    """VarRef stores variable names, and paths, and other attribute information,
1137    as well as a "OSrcLoc" to keep track of the file it was defined in."""
1138
1139    __slots__ = ["prefix", "descr_str", "suffix", "srcloc", "binding", "nptr"]
1140
1141    def __init__(self,
1142                 prefix='',  # '$' or '${'
1143                 descr_str='',  # <- descriptor string: "cpath/category:lpath"
1144                 suffix='',  # '}'
1145                 srcloc=None,  # location in file where defined
1146                 binding=None,  # a pointer to a tuple storing the value
1147                 nptr=None):  # <- see class VarNPtr
1148
1149        self.prefix = prefix  # Any text before the descriptor string goes here
1150        self.suffix = suffix  # Any text after the descriptor string goes here
1151        self.descr_str = descr_str
1152        if srcloc == None:  # <- Location in text file where variable appears
1153            self.srcloc = OSrcLoc()
1154        else:
1155            self.srcloc = srcloc
1156
1157        self.binding = binding
1158
1159        if nptr == None:
1160            self.nptr = VarNPtr()
1161        else:
1162            self.nptr = nptr
1163
1164    def __lt__(self, x):
1165        return self.order < x.order
1166
1167    # def __repr__(self):
1168    #    return repr((self.prefix + self.descr_str + self.suffix, srcloc))
1169
1170
1171class VarNPtr(object):
1172    """
1173    Every time a variable appears in a template, it has has a "descriptor".
1174    For example, consider the variable
1175       "$atom:CA"
1176    This is a string which encodes 3 pieces of information.
1177    1) the category name:  This is essentialy indicates the variable's type.
1178                           (ie "atom", in the example above)
1179    2) the category node:  Some TYPES have limited scope. Users can
1180                           specify the root node of the portion of the tree
1181                           in which this variable's type makes sense.
1182                           If this node is the root node, then that category
1183                           is relevant everywhere, and is not molecule or class
1184                           specific.  All variables have a category node, which
1185                           is often not explicitly defined to by the user.
1186                           (Category node = the root "/", in the example above.)
1187    3) the leaf node:      This is a node whose ".name" member matches the name
1188                           of a variable.  This node is created for this purpose
1189                           and it's position in the tree is a reflection of
1190                           that variable's intended scope.
1191                              In a molecule this "name" might be the name
1192                           of a type of atom, or an atom ID, or a bond type,
1193                           which is found in a particular molecule.
1194                           (Leaf node would be named "CA" in the example above.)
1195
1196    The VarNPtr class is simply a 3-tuple which
1197    keeps these 3 pieces of data together.
1198
1199    """
1200
1201    __slots__ = ["cat_name", "cat_node", "leaf_node"]
1202
1203    def __init__(self, cat_name='', cat_node=None, leaf_node=None):
1204        self.cat_name = cat_name
1205        self.cat_node = cat_node
1206        self.leaf_node = leaf_node
1207
1208    # def __repr__(self):
1209    #    return repr((self.cat_name, self.cat_node.name, self.leaf_node.name))
1210
1211
1212class VarBinding(object):
1213    """ VarBinding is essentially a tuple consistng of (full_name, binding, refs):
1214
1215    "self.full_name" is canonical name for this variable.  This is a string
1216    which specifies full path leading to the category node (beginning with '/'),
1217    the category name (followed by a ':'),
1218    as well as the leaf node (including the path leading up to it from cat_node)
1219    This triplet identifies the variable uniquely.
1220
1221    "self.value" is the data that the variable refers to (usually a string).
1222
1223    "self.refs" stores a list of VarRefs which mention the same variable
1224    from the various places inside various templates in the tree.
1225
1226    """
1227
1228    __slots__ = ["full_name", "nptr", "value", "refs", "order", "category"]
1229
1230    def __init__(self,
1231                 full_name='',
1232                 nptr=None,
1233                 value=None,
1234                 refs=None,
1235                 order=-1,
1236                 category=None):
1237        self.full_name = full_name
1238        self.nptr = nptr
1239        self.value = value
1240        self.refs = refs
1241        self.order = order
1242        self.category = category
1243
1244    def __lt__(self, x):
1245        return self.order < x.order
1246
1247    def __repr__(self):
1248        return repr((self.full_name, self.value, self.order))
1249
1250
1251def ExtractCatName(descr_str):
1252    """ When applied to a VarRef's "descr_str" member,
1253    this function will extract the "catname" of it's corresponding
1254    "nptr" member.  This can be useful for error reporting.
1255    (I use it to insure that the user is using the correct counter
1256     variable types at various locations in their input files.)
1257
1258    """
1259
1260    ib = descr_str.find(':')
1261    if ib == -1:
1262        ib = len(descr_str)
1263        ia = descr_str.rfind('/')
1264        if ia == -1:
1265            ia = 0
1266        return descr_str[ia:ib]
1267    else:
1268        str_before_colon = descr_str[0:ib]
1269        ia = str_before_colon.rfind('/')
1270        if ia == -1:
1271            return str_before_colon
1272        else:
1273            return str_before_colon[ia + 1:]
1274
1275
1276def _DeleteLineFromTemplate(tmpl_list,
1277                            i_entry,  # index into tmpl_list
1278                            newline_delimiter='\n'):
1279    """ Delete a single line from tmpl_list.
1280    tmpl_list is an alternating list of VarRefs and TextBlocks.
1281    To identify the line, the index corresponding to one of the
1282    entries in the tmpl_list is used. (Usually it is a VarRef)
1283    The text after the preceeding newline, and the text up to the next newline
1284       (starting from the beginning of the current entry, if a TextBlock)
1285    is deleted, including any VarRef (variables) located in between.
1286
1287    It returns the index corresponding to the next
1288    entry in the list (after deletion).
1289
1290    """
1291
1292    i_prev_newline = i_entry
1293    while i_prev_newline >= 0:
1294        entry = tmpl_list[i_prev_newline]
1295        if isinstance(entry, TextBlock):
1296            i_char_newline = entry.text.rfind(newline_delimiter)
1297            if i_char_newline != -1:  # then newline found
1298                # Delete the text after this newline
1299                entry.text = entry.text[:i_char_newline + 1]
1300                break
1301        i_prev_newline -= 1
1302
1303    first_var = True
1304    #i_next_newline = i_entry
1305    i_next_newline = i_prev_newline + 1
1306    while i_next_newline < len(tmpl_list):
1307        entry = tmpl_list[i_next_newline]
1308        if isinstance(entry, TextBlock):
1309            i_char_newline = entry.text.find(newline_delimiter)
1310            if i_char_newline != -1:  # then newline found
1311                # Delete the text before this newline (including the newline)
1312                entry.text = entry.text[i_char_newline + 1:]
1313                break
1314        # Invoke DeleteSelf() on the first variables on this line.  This will
1315        # insure that it is deleted from the ttree_assignments.txt file.
1316        elif isinstance(entry, VarRef):
1317            if first_var:
1318                entry.nptr.leaf_node.DeleteSelf()
1319            first_var = False
1320        i_next_newline += 1
1321
1322    del tmpl_list[i_prev_newline + 1: i_next_newline]
1323    return i_prev_newline + 1
1324
1325
1326def DeleteLinesWithBadVars(tmpl_list,
1327                           delete_entire_template=False,
1328                           newline_delimiter='\n'):
1329    """
1330    Loop through the entries in a template,
1331    an alternating list of TextBlocks and VarRefs (tmpl_list).
1332    If a VarRef points to a leaf_node which no longer exists
1333    (ie. no longer in the corresponding category's .bindings list).
1334    Then delete the line it came from from the template (tmpl_list).
1335
1336    """
1337
1338    out_str_list = []
1339    i = 0
1340    while i < len(tmpl_list):
1341        entry = tmpl_list[i]
1342        if isinstance(entry, VarRef):
1343            var_ref = entry
1344            var_bindings = var_ref.nptr.cat_node.categories[
1345                var_ref.nptr.cat_name].bindings
1346            # if var_ref.nptr.leaf_node not in var_bindings:
1347            if var_ref.nptr.leaf_node.IsDeleted():
1348                if delete_entire_template:
1349                    del tmpl_list[:]
1350                    return 0
1351                else:
1352                    i = _DeleteLineFromTemplate(tmpl_list,
1353                                                i,
1354                                                newline_delimiter)
1355            else:
1356                i += 1
1357        else:
1358            i += 1
1359
1360
1361def SplitTemplate(ltmpl, delim, delete_blanks=False):
1362    """
1363    Split a template "ltmpl" into a list of "tokens" (sub-templates)
1364    using a single delimiter string "delim".
1365
1366    INPUT arguments:
1367    "ltmpl" should be an list of TextBlocks and VarRefs.
1368    "delim" should be a simple string (type str)
1369    "delete_blanks" should be a boolean True/False value.
1370                    When true, successive occurrences of the delimiter
1371                    should not create blank entries in the output list.
1372
1373    OUTPUT:
1374    A list of tokens.
1375    Each "token" is either a TextBlock, a VarRef,
1376    or a (flat, 1-dimensional) list containing more than one of these objects.
1377    The number of "tokens" returned equals the number of times the delimiter
1378    is encountered in any of the TextBlocks in the "ltmpl" argument, plus one.
1379    (... Unless "delete_blanks" is set to True.
1380     Again, in that case, empty entries in this list are deleted.)
1381
1382    """
1383    assert(type(delim) is str)
1384    if not hasattr(ltmpl, '__len__'):
1385        ltmpl = [ltmpl]
1386
1387    tokens_lltmpl = []
1388    token_ltmpl = []
1389    i = 0
1390    while i < len(ltmpl):
1391
1392        entry = ltmpl[i]
1393        #sys.stderr.write('ltmpl['+str(i)+'] = '+str(entry)+'\n')
1394
1395        if isinstance(entry, TextBlock):
1396            # if hasattr(entry, 'text'):
1397            prev_src_loc = entry.srcloc
1398
1399            tokens_str = entry.text.split(delim)
1400
1401            lineno = entry.srcloc.lineno
1402
1403            j = 0
1404            while j < len(tokens_str):
1405                token_str = tokens_str[j]
1406
1407                delim_found = False
1408                if (j < len(tokens_str) - 1):
1409                    delim_found = True
1410
1411                if token_str == '':
1412                    if delete_blanks:
1413                        if delim == '\n':
1414                            lineno += 1
1415                        if len(token_ltmpl) > 0:
1416                            if len(token_ltmpl) == 1:
1417                                tokens_lltmpl.append(token_ltmpl[0])
1418                            else:
1419                                tokens_lltmpl.append(token_ltmpl)
1420                        del token_ltmpl
1421                        token_ltmpl = []
1422                        j += 1
1423                        continue
1424
1425                new_src_loc = OSrcLoc(prev_src_loc.infile, lineno)
1426                new_src_loc.order = prev_src_loc.order
1427
1428                for c in token_str:
1429                    # Reminder to self:  c != delim  (so c!='\n' if delim='\n')
1430                    # (We keep track of '\n' characters in delimiters above.)
1431                    if c == '\n':
1432                        lineno += 1
1433
1434                new_src_loc.lineno = lineno
1435
1436                text_block = TextBlock(token_str,
1437                                       new_src_loc)
1438
1439                prev_src_loc = new_src_loc
1440
1441                if len(token_ltmpl) == 0:
1442                    if delim_found:
1443                        tokens_lltmpl.append(text_block)
1444                        del token_ltmpl
1445                        token_ltmpl = []
1446                    else:
1447                        token_ltmpl.append(text_block)
1448                else:
1449                    if delim_found:
1450                        if len(token_str) > 0:
1451                            token_ltmpl.append(text_block)
1452                            tokens_lltmpl.append(token_ltmpl)
1453                            del token_ltmpl
1454                            token_ltmpl = []
1455                        else:
1456                            assert(not delete_blanks)
1457                            if (isinstance(token_ltmpl[-1], VarRef)
1458                                and
1459                                ((j > 0)
1460                                 or
1461                                 ((j == len(tokens_str) - 1) and
1462                                  (i == len(ltmpl) - 1))
1463                                 )):
1464                                # In that case, this empty token_str corresponds
1465                                # to a delimiter which was located immediately
1466                                # after the variable name,
1467                                # AND
1468                                #   -there is more text to follow,
1469                                #   OR
1470                                #   -we are at the end of the template.
1471                                token_ltmpl.append(text_block)
1472                            if len(token_ltmpl) == 1:
1473                                tokens_lltmpl.append(token_ltmpl[0])
1474                            else:
1475                                tokens_lltmpl.append(token_ltmpl)
1476                            del token_ltmpl
1477                            token_ltmpl = []
1478                    else:
1479                        token_ltmpl.append(text_block)
1480
1481                if (delim_found and (delim == '\n')):
1482                    lineno += 1
1483
1484                j += 1
1485
1486        elif isinstance(entry, VarRef):
1487            # elif hasattr(entry, 'descr_str'):
1488            lineno = entry.srcloc.lineno
1489            if ((len(token_ltmpl) == 1) and
1490                    isinstance(token_ltmpl[0], TextBlock) and
1491                    (len(token_ltmpl[0].text) == 0)):
1492                # special case: if the previous entry was "", then it means
1493                # the delimeter appeared at the end of the previous text block
1494                # leading up to this variable.  It separates the variable from
1495                # the previous text block.  It is not a text block of length 0.
1496                token_ltmpl[0] = entry
1497            else:
1498                token_ltmpl.append(entry)
1499        elif entry == None:
1500            token_ltmpl.append(entry)
1501        else:
1502            assert(False)
1503
1504        i += 1
1505
1506    # Append left over remains of the last token
1507    if len(token_ltmpl) == 1:
1508        tokens_lltmpl.append(token_ltmpl[0])
1509    elif len(token_ltmpl) > 1:
1510        tokens_lltmpl.append(token_ltmpl)
1511    del token_ltmpl
1512
1513    return tokens_lltmpl
1514
1515
1516def SplitTemplateMulti(ltmpl, delims, delete_blanks=False):
1517    """
1518    Split a template "ltmpl" into a list of templates using a
1519    single one or more delimiter strings "delim_list".
1520    If multiple delimiter strings are provided, splitting
1521    begins using the first delimiter string in the list.
1522    Then each token in the resulting list of templates
1523    is split using the next delimiter string
1524    and so on until we run out of delimiter strings.
1525
1526    "ltmpl" should be an list of TextBlocks and VarRefs.
1527    "delims" should be a simple string (type str) or a list of strings
1528    "delete_blanks" is either True or False
1529                    If True, then any blank entries in the resulting list of
1530                    tokens (sub-templates) will be deleted.
1531
1532    """
1533
1534    if hasattr(delims, '__len__'):  # then it hopefully is a list of strings
1535        delim_list = delims
1536    else:
1537        delim_list = [delims]     # then it hopefully is a string
1538
1539    tokens = [ltmpl]
1540    for delim in delim_list:
1541        assert(type(delim) is str)
1542        tokens_il = []
1543        for t in tokens:
1544            sub_tokens = SplitTemplate(t, delim, delete_blanks)
1545            for st in sub_tokens:
1546                if hasattr(st, '__len__'):
1547                    if (len(st) > 0) or (not delete_blanks):
1548                        tokens_il.append(st)
1549                else:
1550                    tokens_il.append(st)
1551        tokens = tokens_il
1552        del tokens_il
1553
1554    return tokens
1555
1556
1557def _TableFromTemplate(d, ltmpl, delimiters, delete_blanks):
1558    """
1559    See the docstring for the TableFromTemplate() function for an explanation.
1560    (This _TableFromTemplate() and SplitTemplate() are the workhorse functions
1561     for TableFromTemplate().)
1562
1563    """
1564
1565    output = SplitTemplateMulti(ltmpl, delimiters[d], delete_blanks[d])
1566
1567    if d > 0:
1568        i = 0
1569        while i < len(output):
1570            output[i] = _TableFromTemplate(d - 1,
1571                                           output[i],
1572                                           delimiters,
1573                                           delete_blanks)
1574            # Delete empty LISTS?
1575            if (delete_blanks[d] and
1576                    hasattr(output[i], '__len__') and
1577                    (len(output[i]) == 0)):
1578                del output[i]
1579            else:
1580                i += 1
1581
1582    return output
1583
1584
1585def TableFromTemplate(ltmpl, delimiters, delete_blanks=True):
1586    """
1587    This function can be used to split a template
1588    (a list containing TextBlocks and VarRefs) into a table
1589    into a multidimensional table, with an arbitrary number of dimensions.
1590
1591    Arguments:
1592
1593    ltmpl
1594
1595    An alternating list of TextBlocks and VarRefs containing
1596    the contents of this text template.
1597
1598    delimiters
1599
1600    The user must supply a list or tuple of delimiters: one delimiter for
1601    each dimension in the table, with low-priority delimiters
1602    (such as spaces ' ') appearing first, and higher-priority delimiters
1603    (sich as newlines '\n') appearing later on in the list.
1604    This function will divide the entire "ltmpl" into an n-dimensional
1605    table.  Initially the text is split into a list of text using the
1606    highest-priority delimiter.  Then each entry in the resulting list is
1607    split into another list according to the next highest-priority delimiter.
1608    This continues until all of the delimiters are used up and an
1609    n-dimensional list-of-lists is remaining.
1610
1611    delete_blanks
1612
1613    The optional "delete_blanks" argument can be used to indicate whether
1614    or not to delete blank entries in the table (which occur as a result
1615    of placing two delimiters next to each other).  It should be either
1616    None (default), or it should be an array of booleans matching the
1617    size of the "delimiters" argument.  This allows the caller to customize
1618    the merge settings separately for each dimension (for example: to allow
1619    merging of whitespace within a line, without ignoring blank lines).
1620
1621
1622     ---- Details: ----
1623
1624    1) Multi-character delimiters ARE allowed (like '\n\n').
1625
1626    2) If a delimiter in the "delimiters" argument is not a string
1627    but is a tuple (or a list) of strings, then the text is split according
1628    to any of the delimiters in that tuple/list (starting from the last entry).
1629    This way, users can use this feature to split text according to multiple
1630    different kinds of whitespace characters (such as ' ' and '\t'), for
1631    example, buy setting delimiters[0] = (' ','\t').   If, additionally,
1632    delete_blanks[0] == True, then this will cause this function to
1633    divide text in without regard to whitespace on a given line (for example).
1634
1635    Detailed example:
1636
1637    table2D = TableFromTmplList(ltmpl,
1638                                delimiters = ((' ','\t'), '\n'),
1639                                delete_blanks = (True, False))
1640
1641    This divides text in a similar way that the "awk" program does by default,
1642    ie, by ignoring various kinds of whitespace between text fields, but NOT
1643    ignoring blank lines.
1644
1645    3) Any text contained in variable-names is ignored.
1646
1647    """
1648
1649    # Make a copy of ltmpl
1650    # (The workhorse function "_TableFromTemplate()" makes in-place changes to
1651    #  its "ltmpl" argument.  I don't want to modify "ltmpl", so I make a copy
1652    #  of it before I invoke "_TableFromTemplate()" on it.)
1653
1654    output = [ltmpl[i] for i in range(0, len(ltmpl))]
1655
1656    d = len(delimiters) - 1
1657    output = _TableFromTemplate(d, output, delimiters, delete_blanks)
1658    return output
1659
1660
1661class TemplateLexer(TtreeShlex):
1662    """ This class extends the standard python lexing module, shlex, adding a
1663    new member function (ReadTemplate()), which can read in a block of raw text,
1664    (halting at an (non-escaped) terminal character), and split the text into
1665    alternating blocks of text and variables.  (As far as this lexer is
1666    concerned, "variables" are simply tokens preceeded by $ or @ characters,
1667    and surrounded by optional curly-brackets {}.)
1668
1669    """
1670
1671    def __init__(self,
1672                 instream=None,
1673                 infile=None,
1674                 posix=False):
1675        TtreeShlex.__init__(self, instream, infile, posix)
1676        self.var_delim = '$@'  # characters which can begin a variable name
1677        self.var_open_paren = '{'  # optional parenthesis surround a variable
1678        self.var_close_paren = '}'  # optional parenthesis surround a variable
1679        self.newline = '\n'
1680        self.comment_skip_var = '#'
1681
1682        #   Which characters belong in words?
1683        #
1684        # We want to allow these characters:
1685        #     ./$@&%^!*~`-_:;?<>[]()
1686        # to appear inside the tokens that TtreeShlex.get_token()
1687        # retrieves (TtreeShlex.get_token() is used to read class
1688        # names, and instance names, and variable names)
1689        #
1690        # settings.lex.wordchars+='./$@&%^!*~`-_+:;?<>[]' #Allow these chars
1691        #
1692        # Ommisions:
1693        # Note: I left out quotes, whitespace, comment chars ('#'), and escape
1694        #       characters ('\\') because they are also dealt with separately.
1695        #       Those characters should not overlap with settings.lex.wordchars.
1696        #
1697        # Enabling unicode support requires that we override this choice
1698        # by specifying "lex.wordterminators" instead of "wordchars".
1699        #
1700        # lex.wordterminators should be the (printable) set inverse of lex.wordchars
1701        # I'm not sure which ascii characters are NOT included in the string above
1702        # (We need to figure that out, and put them in settings.lex.wordterminators)
1703        # To figure that out, uncomment the 8 lines below:
1704        #
1705        # self.wordterminators=''
1706        # for i in range(0,256):
1707        #    c = chr(i)
1708        #    if c not in self.wordchars:
1709        #        self.wordterminators += c
1710        #sys.stderr.write('-------- wordterminators = --------\n')
1711        # sys.stderr.write(self.wordterminators+'\n')
1712        # sys.stderr.write('-----------------------------------\n')
1713        #
1714        # Here is the result:
1715        self.wordterminators = '(){|}' + \
1716            self.whitespace + \
1717            self.quotes + \
1718            self.operators + \
1719            self.escape + \
1720            self.commenters
1721
1722        #  Note:
1723        # self.whitespace = ' \t\r\f\n'
1724        # self.quotes     = '\'"'
1725        # self.escape     = '\\'
1726        # self.commenters = '#'
1727        #  Note: I do not terminate on these characters: +-=*'"`
1728        # because they appear in the names of atom types in many force-fields.
1729        # Also * characters are needed for variables containing wildcards
1730        # in the name (which will be dealt with later).
1731
1732        self.source_triggers = set(['include', 'import'])
1733        self.source_triggers_x = set(['import'])
1734
1735    def GetSrcLoc(self):
1736        return OSrcLoc(self.infile, self.lineno)
1737
1738    def ReadTemplate(self,
1739                     simplify_output=False,
1740                     terminators='}',
1741                     remove_esc_preceeding='{\\',  #explained below
1742                     var_terminators='{}(),', #(var_delim, spaces also included)
1743                     keep_terminal_char=True):
1744        """
1745           ReadTemplate() reads a block of text (between terminators)
1746        and divides it into variables (tokens following a '$' or '@' character)
1747        and raw text.  This is similar to pythons string.Template(),
1748        however it reads from streams (files), not strings, and it allows use
1749        of more complicated variable names with multiple variable delimiters
1750        (eg '$' and '@').
1751        This readline()-like member function terminates when reaching a
1752        user-specified terminator character character (second argument),
1753        or when variable (eg: "$var"$ is encountered).  The result is
1754        a list of variable-separated text-blocks (stored in the first
1755        argument).   For example, the string:
1756        "string with $var1 and $var2 variables.}"  contains:
1757                "string with ",
1758                $var1,
1759                " and ",
1760                $var2,
1761                " variables.}"
1762        This simplifies the final process of rendering
1763        (substituting text into) the text blocks later on.
1764            Output:
1765        This function returns a list of (alternating) blocks of
1766        text, and variable names.  Each entry in the list is either:
1767        1) a text block:
1768               Raw text is copied from the source, verbatim, along with
1769               some additional data (filename and line numbers), to
1770               help retroactively identify where the text came from
1771               (in case a syntax error in the text is discovered later).
1772               In this case, the list entry is stored as a list
1773               The format (TextBlock) is similar to:
1774                  [text_string, ((filenameA,lineBegin), (filenameB,lineEnd))],
1775               where the tuples, (filenameA,lineBegin) and (filenameB,lineEnd)
1776               denote the source file(s) from which the text was read, and
1777               line number at the beginning and ending of the text block.
1778               (This information is useful for generating helpful error
1779               messages.  Note that the "TtreeShlex" class allows users to
1780               combine multiple files transparently into one stream using
1781               the "source" (or "sourcehook()") member.  For this reason, it
1782               is possible, although unlikely, that the text-block
1783               we are reading could span multiple different files.)
1784        2) a variable (for example "$var" or "${var}"):
1785               In this case, the list entry is stored in the "VarRef" format
1786               which is essentialy shown below:
1787                  [[var_prefix, var_nptr, var_suffix], (filename,lineno)]
1788               where var_prefix and var_suffix are strings containing brackets
1789               and other text enclosing the variable name (and may be empty).
1790
1791       As an example, we consider a file named  "datafile" which
1792       contains the text containing 2 text blocks and 1 variable:
1793               "some\n text\n before ${var}. Text after\n".
1794       ReadTemplate() will read this and return a list with 3 entries:
1795             [ ['some\n text\n before', (('datafile', 1), ('datafile', 3))],
1796               [['${', 'var', '}'], ('datafile', 3, 3)],
1797               ['Text after\n', (('datafile', 3), ('datafile', 4))] ]
1798
1799        Note that while parsing the text, self.lineno counter is
1800        incremented whenever a newline character is encountered.
1801        (Also: Unlike shlex.get_token(), this function does not
1802        delete commented text, or insert text from other files.)
1803
1804            Exceptional Cases:
1805        Terminator characters are ignored if they are part of a variable
1806        reference. (For example, the '}' in "${cat:var}", is used to denote a
1807        bracketed variable, and does not cause ReadTemplate() to stop reading)
1808           OR if they are part of a two-character escape sequence
1809        (for example, '}' in "\}" does not cause terminate parsing).
1810        In that case, the text is considered normal text.  (However the
1811        \ character is also stripped out.  It is also stripped out if it
1812        preceeds any characters in "remove_esc_preceeding", which is
1813        the second argument.  Otherwise it is left in the text block.)
1814         What is the purpose of "remove_esc_preceeding"? To force ReadTemplate()
1815        to remove the preceeding \ when it otherwise would not.  For example,
1816        we want to remove \ whenever it preceeds another \ character, so we
1817        include it in the remove_esc_preceeding string variable. We alse include
1818        '{' because we want to remove \ when it preceeds the '{' character.
1819        That way the \ gets deleted when it preceeds either '{' or '}'.
1820        (The \ character is already removed before the '}' character.)
1821        We want consistent behavior that people expect, so that
1822        "\{abc\}" -> ReadTemplate() -> "{abc}"    (instead of "\{abc}").
1823        In retrospect, perhaps this is a confusing way to implement this.
1824
1825        """
1826
1827        #sys.stderr.write('    ReadTemplate('+terminators+') invoked at '+self.error_leader())
1828
1829        # The main loop of the parser reads only one variable at time.
1830        # The following variables keep track of where we are in the template.
1831        reading_var = False  # Are we currently reading in the name of a variable?
1832
1833        prev_char_delim = False  # True iff we just read a var_delim character like '$'
1834        # True iff we just read a (non-escaped) esc character '\'
1835        escaped_state = False
1836        # True iff we are in a region of text where vars should be ignored
1837        commented_state = False
1838        var_paren_depth = 0  # This is non-zero iff we are inside a
1839        # bracketed variable's name for example: "${var}"
1840        var_terminators += self.whitespace + self.newline + self.var_delim
1841
1842        tmpl_list = []  # List of alternating tuples of text_blocks and
1843        # variable names (see format comment above)
1844        # This list will be returned to the caller.
1845
1846        # sys.stderr.write('report_progress='+str(report_progress))
1847
1848        prev_filename = self.infile
1849        prev_lineno = self.lineno
1850        var_prefix = ''
1851        var_descr_plist = []
1852        var_suffix = ''
1853        text_block_plist = []
1854
1855        done_reading = False
1856
1857        while not done_reading:
1858
1859            terminate_text = False
1860            terminate_var = False
1861            #delete_prior_escape = False
1862
1863            nextchar = self.read_char()
1864
1865            #sys.stderr.write('    ReadTemplate() nextchar=\''+nextchar+'\' at '+self.error_leader()+'  esc='+str(escaped_state)+', pvar='+str(prev_char_delim)+', paren='+str(var_paren_depth))
1866
1867            # Count newlines:
1868            if nextchar in self.newline:
1869                commented_state = False
1870                self.lineno += 1
1871
1872            elif ((nextchar in self.comment_skip_var) and
1873                  (not escaped_state)):
1874                commented_state = True
1875
1876            # Check for end-of-file:
1877            if nextchar == '':
1878
1879                if escaped_state:
1880                    raise InputError('Error: in ' + self.error_leader() + '\n\n'
1881                                     'File terminated immediately following an escape character.')
1882                    terminate_var = True
1883                else:
1884                    terminate_text = True
1885
1886                done_reading = True
1887
1888            # --- Now process the character: ---
1889
1890            # What we do next depends on which "mode" we are in.
1891            #  If we are reading a regular text block (reading_var == False),
1892            #   then we keep appending characters onto the end of "text_block",
1893            #   checking for terminal characters, or variable delimiters.
1894            #  If we are reading a variable name (reading_var == True),
1895            #   then we append characters to the end of "var_descr_plist[]",
1896            #   checking for variable terminator characters, as well as
1897            #   parenthesis (some variables are surrounded by parenthesis).
1898
1899            elif reading_var:
1900
1901                if nextchar in terminators:
1902                    #sys.stdout.write('   ReadTemplate() readmode found terminator.\n')
1903                    if escaped_state:
1904                        # In this case, the '\' char was only to prevent terminating
1905                        # string prematurely, so delete the '\' character.
1906                        #delete_prior_escape = True
1907                        del var_descr_plist[-1]
1908                        var_descr_plist.append(nextchar)
1909                        #escaped_state = False
1910                    elif not ((var_paren_depth > 0) and
1911                              (nextchar in self.var_close_paren)):
1912                        terminate_var = True
1913                        done_reading = True
1914
1915                if nextchar in self.var_open_paren:  # eg: nextchar == '{'
1916                    #sys.stdout.write('   ReadTemplate() readmode found {\n')
1917                    if escaped_state:
1918                        var_descr_plist.append(nextchar)
1919                        #escaped_state = False
1920                    else:
1921                        # "${var}" is a valid way to refer to a variable
1922                        if prev_char_delim:
1923                            var_prefix += nextchar
1924                            var_paren_depth = 1
1925                        # "${{var}}" is also a valid way to refer to a variable,
1926                        # (although strange), but "$va{r}" is not.
1927                        # Parenthesis (in bracketed variable names) must
1928                        # immediately follow the '$' character (as in "${var}")
1929                        elif var_paren_depth > 0:
1930                            var_paren_depth += 1
1931                            var_descr_plist.append(nextchar)
1932
1933                elif nextchar in self.var_close_paren:
1934                    #sys.stdout.write('   ReadTemplate() readmode found }.\n')
1935                    if escaped_state:
1936                        # In this case, the '\' char was only to prevent
1937                        # interpreting '}' as a variable suffix,
1938                        # delete_prior_escape=True  #so skip the '\' character
1939                        del var_descr_plist[-1]
1940                        var_descr_plist.append(nextchar)
1941                        #escaped_state = False
1942                    else:
1943                        if var_paren_depth > 0:
1944                            var_paren_depth -= 1
1945                            if var_paren_depth == 0:
1946                                var_suffix = nextchar
1947                                terminate_var = True
1948                            else:
1949                                var_descr_plist.append(nextchar)
1950
1951                elif nextchar in var_terminators:
1952                    #sys.stdout.write('   ReadTemplate() readmode found var_terminator \"'+nextchar+'\"\n')
1953                    if (escaped_state or (var_paren_depth > 0)):
1954                        # In that case ignore the terminator
1955                        # and append it to the variable name
1956                        if escaped_state:
1957                            # In this case, the '\' char was only to prevent
1958                            # interpreting nextchar as a variable terminator
1959                            # delete_prior_escape = True # so skip the '\'
1960                            #                            # character
1961                            del var_descr_plist[-1]
1962                            #escaped_state = False
1963                        var_descr_plist.append(nextchar)
1964                    else:
1965                        terminate_var = True
1966
1967                elif nextchar in self.var_delim:   # such as '$'
1968                    #sys.stdout.write('   ReadTemplate() readmode found var_delim.\n')
1969                    if escaped_state:
1970                        # In this case, the '\' char was only to prevent
1971                        # interpreting '$' as a new variable name
1972                        # delete_prior_escape = True # so skip the '\'
1973                        # character
1974                        del var_descr_plist[-1]
1975                        var_descr_plist.append(nextchar)
1976                        #escaped_state = False
1977                    else:
1978                        prev_var_delim = True
1979                        # Then we are processing a new variable name
1980                        terminate_var = True
1981                else:
1982                    var_descr_plist.append(nextchar)
1983                    prev_char_delim = False
1984
1985            else:  # begin else clause for "if reading_var:"
1986
1987                # Then we are reading a text_block
1988
1989                if nextchar in terminators:
1990                    if escaped_state:
1991                        # In this case, the '\' char was only to prevent terminating
1992                        # string prematurely, so delete the '\' character.
1993                        #delete_prior_escape = True
1994                        del text_block_plist[-1]
1995                        text_block_plist.append(nextchar)
1996                    elif commented_state:
1997                        text_block_plist.append(nextchar)
1998                    else:
1999                        terminate_text = True
2000                        done_reading = True
2001
2002                elif nextchar in self.var_delim:   # such as '$'
2003                    if escaped_state:
2004                        # In this case, the '\' char was only to prevent
2005                        # interpreting '$' as a variable prefix.
2006                        # delete_prior_escape=True  #so delete the '\'
2007                        # character
2008                        del text_block_plist[-1]
2009                        text_block_plist.append(nextchar)
2010                    elif commented_state:
2011                        text_block_plist.append(nextchar)
2012                    else:
2013                        prev_char_delim = True
2014                        reading_var = True
2015                        # NOTE TO SELF: IN THE FUTURE, USE GetVarName(self)
2016                        # TO PARSE TEXT ASSOCIATED WITH A VARIABLE
2017                        # THIS WILL SIMPLIFY THE CODE AND ENSURE CONSISTENCY.
2018                        var_paren_depth = 0
2019                        terminate_text = True
2020                else:
2021                    text_block_plist.append(nextchar)
2022                    # TO DO: use "list_of_chars.join()" instead of '+='
2023                    prev_char_delim = False  # the previous character was not '$'
2024
2025            # Now deal with "remove_esc_preceeding".  (See explanation above.)
2026            if escaped_state and (nextchar in remove_esc_preceeding):
2027                if reading_var:
2028                    #sys.stdout.write('   ReadTemplate: var_descr_str=\''+''.join(var_descr_plist)+'\'\n')
2029                    assert(var_descr_plist[-2] in self.escape)
2030                    del var_descr_plist[-2]
2031                else:
2032                    #sys.stdout.write('   ReadTemplate: text_block=\''+''.join(text_block_plist)+'\'\n')
2033                    assert(text_block_plist[-2] in self.escape)
2034                    del text_block_plist[-2]
2035
2036            if terminate_text:
2037                #sys.stdout.write('ReadTemplate() appending: ')
2038                # sys.stdout.write(text_block)
2039
2040                # tmpl_list.append( [text_block,
2041                #                   ((prev_filename, prev_lineno),
2042                #                    (self.infile, self.lineno))] )
2043
2044                if simplify_output:
2045                    tmpl_list.append(''.join(text_block_plist))
2046                else:
2047                    tmpl_list.append(TextBlock(''.join(text_block_plist),
2048                                               OSrcLoc(prev_filename, prev_lineno)))
2049                    #, OSrcLoc(self.infile, self.lineno)))
2050                if not done_reading:
2051                    # The character that ended the text block
2052                    # was a variable delimiter (like '$'), in which case
2053                    # we should put it (nextchar) in the variable's prefix.
2054                    var_prefix = nextchar
2055                else:
2056                    var_prefix = ''
2057                var_descr_plist = []
2058                var_suffix = ''
2059                prev_filename = self.infile
2060                prev_lineno = self.lineno
2061                del text_block_plist
2062                text_block_plist = []
2063                # gc.collect()
2064
2065            elif terminate_var:
2066                # Print an error if we terminated in the middle of
2067                # an incomplete variable name:
2068                if prev_char_delim:
2069                    raise InputError('Error: near ' + self.error_leader() + '\n\n'
2070                                     'Null variable name.')
2071                if var_paren_depth > 0:
2072                    raise InputError('Error: near ' + self.error_leader() + '\n\n'
2073                                     'Incomplete bracketed variable name.')
2074
2075                var_descr_str = ''.join(var_descr_plist)
2076
2077                # Now check for variable format modifiers,
2078                # like python's ".rjust()" and ".ljust()".
2079                # If present, then put these in the variable suffix.
2080                if ((len(var_descr_plist) > 0) and (var_descr_plist[-1] == ')')):
2081                    #i = len(var_descr_plist)-1
2082                    # while i >= 0:
2083                    #    if var_descr_plist[i] == '(':
2084                    #        break
2085                    #    i -= 1
2086                    i = var_descr_str.rfind('(')
2087                    if (((i - 6) >= 0) and
2088                        ((var_descr_str[i - 6:i] == '.rjust') or
2089                         (var_descr_str[i - 6:i] == '.ljust'))):
2090                        var_suffix = ''.join(
2091                            var_descr_plist[i - 6:]) + var_suffix
2092                        #var_descr_plist = var_descr_plist[:i-6]
2093                        var_descr_str = var_descr_str[:i - 6]
2094
2095                # Process any special characters in the variable name
2096                var_descr_str = EscCharStrToChar(var_descr_str)
2097
2098                # tmpl_list.append( [[var_prefix, var_descr_str, var_suffix],
2099                #                   (self.infile, self.lineno)] )
2100                if simplify_output:
2101                    tmpl_list.append(var_prefix + var_descr_str + var_suffix)
2102                else:
2103                    tmpl_list.append(VarRef(var_prefix, var_descr_str, var_suffix,
2104                                            OSrcLoc(self.infile, self.lineno)))
2105
2106                # if report_progress:
2107                #sys.stderr.write('  parsed variable '+var_prefix+var_descr_str+var_suffix+'\n')
2108
2109                #sys.stdout.write('ReadTemplate() appending: ')
2110                #sys.stderr.write(var_prefix + var_descr_str + var_suffix)
2111
2112                del var_descr_plist
2113                del var_descr_str
2114
2115                prev_filename = self.infile
2116                prev_lineno = self.lineno
2117                var_prefix = ''
2118                var_descr_plist = []
2119                var_suffix = ''
2120                # Special case: Variable delimiters like '$'
2121                #               terminate the reading of variables,
2122                #               but they also signify that a new
2123                #               variable is being read.
2124                if nextchar in self.var_delim:
2125                    # Then we are processing a new variable name
2126                    prev_var_delim = True
2127                    reading_var = True
2128                    # NOTE TO SELF: IN THE FUTURE, USE GetVarName(self)
2129                    # TO PARSE TEXT ASSOCIATED WITH A VARIABLE
2130                    # THIS WILL SIMPLIFY THE CODE AND ENSURE CONSISTENCY.
2131                    var_paren_depth = 0
2132                    var_prefix = nextchar
2133
2134                elif nextchar in self.var_close_paren:
2135                    del text_block_plist
2136                    text_block_plist = []
2137                    # gc.collect()
2138                    prev_var_delim = False
2139                    reading_var = False
2140
2141                else:
2142                    # Generally, we don't want to initialize the next text block
2143                    # with the empty string.  Consider that whatever character
2144                    # caused us to stop reading the previous variable and append
2145                    # it to the block of text that comes after.
2146                    del text_block_plist
2147                    text_block_plist = [nextchar]
2148                    # gc.collect()
2149                    prev_var_delim = False
2150                    reading_var = False
2151
2152            # If we reached the end of the template (and the user requests it),
2153            # then the terminal character can be included in the list
2154            # of text_blocks to be returned to the caller.
2155            if done_reading and keep_terminal_char:
2156                #sys.stdout.write('ReadTemplate() appending: \''+nextchar+'\'\n')
2157                # Here we create a new text block which contains only the
2158                # terminal character (nextchar).
2159                # tmpl_list.append( [nextchar,
2160                #                   ((self.infile, self.lineno),
2161                #                    (self.infile, self.lineno))] )
2162                if simplify_output:
2163                    tmpl_list.append(nextchar)
2164                else:
2165                    tmpl_list.append(TextBlock(nextchar,
2166                                               OSrcLoc(self.infile, self.lineno)))
2167                    #, OSrcLoc(self.infile, self.lineno)))
2168
2169            if escaped_state:
2170                escaped_state = False
2171            else:
2172                if nextchar in self.escape:
2173                    escaped_state = True
2174
2175        #sys.stderr.write("*** TMPL_LIST0  = ***", tmpl_list)
2176        return tmpl_list  # <- return value stored here
2177
2178    def GetParenExpr(self, prepend_str='', left_paren='(', right_paren=')'):
2179        """ GetParenExpr() is useful for reading in strings
2180            with nested parenthesis and spaces.
2181            This function can read in the entire string:
2182
2183              .trans(0, 10.0*sin(30), 10.0*cos(30))
2184
2185            (Because I was too lazy to write this correctly...)
2186            Spaces are currently stripped out of the expression.
2187            (...unless surrounded by quotes) The string above becomes:
2188
2189              ".trans(0,10.0*sin(30),10.0*cos(30))"
2190
2191            Sometimes the caller wants to prepend some text to the beginning
2192            of the expression (which may contain parenthesis).  For this
2193            reason, an optional first argument ("prepend_str") can be
2194            provided.  By default it is empty.
2195
2196        """
2197
2198        src_loc_begin = SrcLoc(self.infile, self.lineno)
2199        orig_wordterm = self.wordterminators
2200        self.wordterminators = self.wordterminators.replace(
2201            left_paren, '').replace(right_paren, '')
2202
2203        token = self.get_token()
2204        if ((token == '') or
2205                (token == self.eof)):
2206            return prepend_str
2207
2208        expr_str = prepend_str + token
2209
2210        # if (expr_str.find(left_paren) == -1):
2211        #    raise InputError('Error near or before '+self.error_leader()+'\n'
2212        #                     'Expected an open-paren (\"'+prepend_str+left_paren+'\") before this point.\n')
2213        #    return expr_str
2214
2215        paren_depth = expr_str.count(left_paren) - expr_str.count(right_paren)
2216        while ((len(expr_str) == 0) or (paren_depth > 0)):
2217            token = self.get_token()
2218            if ((type(token) is not str) or
2219                    (token == '')):
2220                raise InputError('Error somewhere between ' +
2221                                 self.error_leader(src_loc_begin.infile,
2222                                                   src_loc_begin.lineno)
2223                                 + 'and ' + self.error_leader() + '\n'
2224                                 'Invalid expression: \"' + expr_str[0:760] + '\"')
2225            expr_str += token
2226            paren_depth = expr_str.count(
2227                left_paren) - expr_str.count(right_paren)
2228        if (paren_depth != 0):
2229            raise InputError('Error somewhere between ' +
2230                             self.error_leader(src_loc_begin.infile,
2231                                               src_loc_begin.lineno)
2232                             + 'and ' + self.error_leader() + '\n'
2233                             'Invalid expression: \"' + expr_str[0:760] + '\"')
2234        self.wordterminators = orig_wordterm
2235        return expr_str
2236
2237
2238if __name__ == '__main__':
2239    if len(sys.argv) == 1:
2240        lexer = TtreeShlex()
2241    else:
2242        file = sys.argv[1]
2243        lexer = TtreeShlex(open(file), file)
2244    while 1:
2245        tt = lexer.get_token()
2246        if tt:
2247            sys.stderr.write("Token: " + repr(tt))
2248        else:
2249            break
2250