1#!/usr/bin/env python
2
3__version__ = '2.1'
4
5from plasTeX import Logging, encoding
6from plasTeX.DOM import Element, Text, Node, DocumentFragment, Document
7from plasTeX.Tokenizer import Token, BeginGroup, EndGroup, Other
8import string
9import re
10
11log = Logging.getLogger()
12status = Logging.getLogger('status')
13deflog = Logging.getLogger('parse.definitions')
14
15#
16# Utility functions
17#
18
19def idgen():
20    """ Generate a unique ID """
21    i = 1
22    while 1:
23        yield 'a%.10d' % i
24        i += 1
25idgen = idgen()
26
27def subclasses(o):
28    """ Return all subclasses of the given class """
29    output = [o]
30    for item in o.__subclasses__():
31        output.extend(subclasses(item))
32    return output
33
34def sourceChildren(o, par=True):
35    """ Return the LaTeX source of the child nodes """
36    if o.hasChildNodes():
37        if par:
38            return ''.join([x.source for x in o.childNodes])
39        else:
40            source = []
41            for par in o.childNodes:
42                source += [x.source for x in par]
43            return ''.join(source)
44    return ''
45
46def sourceArguments(o):
47    """ Return the LaTeX source of the arguments """
48    return o.argSource
49
50def ismacro(o):
51    """ Is the given object a macro? """
52    return hasattr(o, 'macroName')
53
54def issection(o):
55    """ Is the given object a section? """
56    return o.level >= Node.DOCUMENT_LEVEL and o.level < Node.ENDSECTIONS_LEVEL
57
58def macroName(o):
59     """ Return the macro name of the given object """
60     if o.macroName is None:
61         if type(o) is type:
62             return o.__name__
63         return type(o).__name__
64     return o.macroName
65
66
67class Argument(object):
68    """
69    Macro argument
70
71    Argument strings in macros are compiled into Arguments
72    once.  Then the compiled arguments can be used to get the
73    arguments thereafter.
74
75    """
76    def __init__(self, name, index, options={}):
77        self.name = name
78        self.index = index
79        self.source = ''
80        self.options = options.copy()
81
82    def __repr__(self):
83        return '%s: %s' % (self.name, self.options)
84
85    def __eq__(self, other):
86        try:
87            return self.name == other.name and self.options == other.options
88        except AttributeError:
89            return NotImplemented
90
91
92
93class CSSStyles(dict):
94    """ CSS Style object """
95    @property
96    def inline(self):
97        """
98        Create an inline style representation
99
100        Returns:
101        string containing inline CSS
102
103        """
104        if not self:
105            return None
106        return '; '.join(['%s:%s' % (x[0], x[1]) for x in list(self.items())])
107
108
109class Macro(Element):
110    """
111    Base class for all macros
112
113    """
114    MODE_NONE = 0
115    MODE_BEGIN = 1
116    MODE_END = 2
117
118    macroName = None  # TeX macro name (instead of class name)
119    macroMode = MODE_NONE  # begin, end, or none
120    mathMode = None
121
122    # Node variables
123    level = Node.COMMAND_LEVEL
124    nodeType = Node.ELEMENT_NODE
125    nodeValue = None
126
127    # Counter associated with this macro
128    counter = None
129
130    # Value to return when macro is referred to by \ref
131    ref = None
132
133    # Attributes that should be persisted between runs for nodes
134    # that can be referenced.  This allows for cross-document links.
135    refAttributes = ['macroName', 'ref', 'title', 'captionName', 'id', 'url']
136
137    # Source of the TeX macro arguments
138    argSource = ''
139
140    # LaTeX argument template
141    args = ''
142
143    # Force there to be at least on paragraph in the content
144    forcePars = False
145
146    def persist(self, attrs=None):
147        """
148        Store attributes needed for cross-document links
149
150        This method really needs to be called by the renderer because
151        the rendered versions of the attributes are needed.  If nested
152        classes could be pickeled, we could just pickle the attributes.
153
154        Keyword Arguments:
155        attrs -- dictionary to populate with values.  If set to None,
156            a new dictionary should be created.
157
158        Returns: dictionary containing attributes to be persisted
159
160        """
161        if attrs is None:
162            attrs = {}
163        for name in self.refAttributes:
164            value = getattr(self, name, None)
165            if value is None:
166                continue
167            if isinstance(value, Node):
168                value = '%s' % str(value)
169            attrs[name] = value
170        return attrs
171
172    def restore(self, attrs):
173        """
174        Restore attributes needed for cross-document links
175
176        Required Attributes:
177        attrs -- dictionary of attributes to be set on self
178
179        """
180        remap = {'url':'urloverride'}
181        for key, value in list(attrs.items()):
182            setattr(self, str(remap.get(key, key)), value)
183
184    @property
185    def config(self):
186        """ Shortcut to the document config """
187        return self.ownerDocument.config
188
189    @property
190    def idref(self):
191        """ Storage area for idref argument types """
192        if hasattr(self, '@idref'):
193            return getattr(self, '@idref')
194        d = {}
195        setattr(self, '@idref', d)
196        return d
197
198    def captionName():
199        """ Name associated with the counter """
200        def fget(self):
201            if hasattr(self, '@captionName'):
202                return getattr(self, '@captionName')
203            self.captionName = name = self.ownerDocument.createTextNode('')
204            return name
205        def fset(self, value):
206            setattr(self, '@captionName', value)
207        return locals()
208    captionName = property(**captionName())
209
210    def title():
211        """ Retrieve title from variable or attributes dictionary """
212        def fget(self):
213            try:
214                return getattr(self, '@title')
215            except AttributeError:
216                try:
217                    return self.attributes['title']
218                except KeyError:
219                    pass
220            raise AttributeError('could not find attribute "title"')
221        def fset(self, value):
222            setattr(self, '@title', value)
223        return locals()
224    title = property(**title())
225
226    def fullTitle():
227        """ Retrieve title including the section number """
228        def fget(self):
229            try:
230                return getattr(self, '@fullTitle')
231            except AttributeError:
232                if self.ref is not None:
233                    fullTitle = self.ownerDocument.createDocumentFragment()
234                    fullTitle.extend([self.ref, ' ', self.title], setParent=False)
235                else:
236                    fullTitle = self.title
237                setattr(self, '@fullTitle', fullTitle)
238                return fullTitle
239        def fset(self, value):
240            setattr(self, '@fullTitle', value)
241        return locals()
242    fullTitle = property(**fullTitle())
243
244    def tocEntry():
245        """ Retrieve table of contents entry """
246        def fget(self):
247            try:
248                return getattr(self, '@tocEntry')
249            except AttributeError:
250                try:
251                    if 'toc' in list(self.attributes.keys()):
252                        toc = self.attributes['toc']
253                        if toc is None:
254                            toc = self.title
255                        setattr(self, '@tocEntry', toc)
256                        return toc
257                except (KeyError, AttributeError):
258                    pass
259            return self.title
260        def fset(self, value):
261            setattr(self, '@tocEntry', value)
262        return locals()
263    tocEntry = property(**tocEntry())
264
265    def fullTocEntry():
266        """ Retrieve title including the section number """
267        def fget(self):
268            try:
269                try:
270                    return getattr(self, '@fullTocEntry')
271                except AttributeError:
272                    if self.ref is not None:
273                        fullTocEntry = self.ownerDocument.createDocumentFragment()
274                        fullTocEntry.extend([self.ref, ' ', self.tocEntry], setParent=False)
275                    else:
276                        fullTocEntry = self.tocEntry
277                    setattr(self, '@fullTocEntry', fullTocEntry)
278                    return fullTocEntry
279            except Exception as msg:
280                return self.title
281        def fset(self, value):
282            setattr(self, '@fullTocEntry', value)
283        return locals()
284    fullTocEntry = property(**fullTocEntry())
285
286    @property
287    def style(self):
288        """ CSS styles """
289        try:
290            return getattr(self, '@style')
291        except AttributeError:
292            style = CSSStyles()
293            setattr(self, '@style', style)
294        return style
295
296    def digest(self, tokens):
297        pass
298
299    def locals(self):
300        """ Retrieve all macros local to this namespace """
301        tself = type(self)
302        localsname = '@locals'
303        # Check for cached versions first
304        try:
305            return vars(tself)[localsname]
306        except KeyError:
307            pass
308        mro = list(tself.__mro__)
309        mro.reverse()
310        loc = {}
311        for cls in mro:
312            for value in list(vars(cls).values()):
313                if ismacro(value):
314                    loc[macroName(value)] = value
315        # Cache the locals in a unique name
316        setattr(tself, localsname, loc)
317        return loc
318
319    def id():
320        def fset(self, value):
321            if value:
322                setattr(self, '@id', value)
323            else:
324                delattr(self, '@id')
325        def fget(self):
326            id = getattr(self, '@id', None)
327            if id is None:
328                for id in idgen:
329                    setattr(self, '@hasgenid', True)
330                    self.id = id
331                    break
332            return id
333        return locals()
334    id = property(**id())
335
336    def expand(self, tex):
337        """ Fully expand the macro """
338        result = self.invoke(tex)
339        if result is None:
340            return self
341        return tex.expandTokens(result)
342
343    def invoke(self, tex):
344        # Just pop the context if this is a \end token
345        if self.macroMode == Macro.MODE_END:
346            self.ownerDocument.context.pop(self)
347            return
348
349        # If this is a \begin token or the element needs to be
350        # closed automatically (i.e. \section, \item, etc.), just
351        # push the new context and return the instance.
352        if self.macroMode == Macro.MODE_BEGIN:
353            self.ownerDocument.context.push(self)
354            self.parse(tex)
355            self.setLinkType()
356            return
357
358        # Push, parse, and pop.  The command doesn't need to stay on
359        # the context stack.  We push an empty context so that the
360        # `self' token doesn't get put into the output stream twice
361        # (once here and once with the pop).
362        self.ownerDocument.context.push(self)
363        self.parse(tex)
364        self.ownerDocument.context.pop(self)
365        self.setLinkType()
366
367    def setLinkType(self, key=None):
368        """
369        Set up navigation links
370
371        Keyword Arguments:
372        key -- the name or names of the navigation keys to set
373            instead of using self.linkType
374
375        """
376        if key is None:
377            key = self.linkType
378        if key:
379            userdata = self.ownerDocument.userdata
380            if 'links' not in userdata:
381                userdata['links'] = {}
382            if isinstance(key, str):
383                userdata['links'][key] = self
384            else:
385                for k in key:
386                    userdata['links'][k] = self
387
388    @property
389    def tagName(self):
390        t = type(self)
391        if t.macroName is None:
392            return t.__name__
393        return t.macroName
394    nodeName = tagName
395
396    @property
397    def source(self):
398        name = self.nodeName
399
400        # Automatically revert internal names like "active::~"
401        escape = '\\'
402        if '::' in name:
403            name = name.split('::').pop()
404            escape = ''
405
406        # \begin environment
407        # If self.childNodes is not empty, print out the entire environment
408        if self.macroMode == Macro.MODE_BEGIN:
409            argSource = sourceArguments(self)
410            if not argSource:
411                argSource = ' '
412            s = '%sbegin{%s}%s' % (escape, name, argSource)
413            if self.hasChildNodes():
414                s += '%s%send{%s}' % (sourceChildren(self), escape, name)
415            return s
416
417        # \end environment
418        if self.macroMode == Macro.MODE_END:
419            return '%send{%s}' % (escape, name)
420
421        argSource = sourceArguments(self)
422        if not argSource:
423            argSource = ' '
424        elif argSource[0] in encoding.stringletters():
425            argSource = ' %s' % argSource
426        s = '%s%s%s' % (escape, name, argSource)
427
428        # If self.childNodes is not empty, print out the contents
429        if self.attributes and 'self' in list(self.attributes.keys()):
430            pass
431        else:
432            if self.hasChildNodes():
433                s += sourceChildren(self)
434        return s
435
436    @property
437    def childrenSource(self):
438        return sourceChildren(self)
439
440    def parse(self, tex):
441        """
442        Parse the arguments defined in the `args` variable
443
444        Required Arguments:
445        tex -- the TeX stream to parse from
446
447        Returns:
448        self.attributes
449
450        """
451        if self.macroMode == Macro.MODE_END:
452            return
453
454        self.preParse(tex)
455
456        # args is empty, don't parse
457        if not self.args:
458            self.postParse(tex)
459            return
460
461        self.argSource = ''
462        arg = None
463        try:
464            for arg in self.arguments:
465                self.preArgument(arg, tex)
466                output, source = tex.readArgumentAndSource(parentNode=self,
467                                                           name=arg.name,
468                                                           **arg.options)
469                self.argSource += source
470                self.attributes[arg.name] = output
471                self.postArgument(arg, output, tex)
472        except:
473            log.error('Error while parsing argument "%s" of "%s"' %
474                       (arg.name, self.nodeName))
475            raise
476
477        self.postParse(tex)
478
479        return self.attributes
480
481    def preParse(self, tex):
482        """
483        Do operations that must be done immediately before parsing arguments
484
485        Required Arguments:
486        tex -- the TeX instance containing the current context
487
488        """
489        if not self.args:
490            self.refstepcounter(tex)
491
492    def preArgument(self, arg, tex):
493        """
494        Event called before parsing each argument
495
496        Arguments:
497        arg -- the Argument instance that holds all argument meta-data
498            including the argument's name, source, and options.
499        tex -- the TeX instance containing the current context
500
501        """
502        # Check for a '*' type argument at the beginning of the
503        # argument list.  If there is one, don't increment counters
504        # or set labels.  This must be done immediately since
505        # the following arguments may contain labels.
506        if arg.index == 0 and arg.name != '*modifier*':
507            self.refstepcounter(tex)
508
509    def postArgument(self, arg, value, tex):
510        """
511        Event called after parsing each argument
512
513        Arguments:
514        arg -- the Argument instance that holds all argument meta-data
515            including the argument's name, source, and options.
516        tex -- the TeX instance containing the current context
517
518        """
519        # If there was a '*', unset the counter for this instance
520        if arg.index == 0 and arg.name == '*modifier*':
521            if value:
522                self.counter = ''
523            self.refstepcounter(tex)
524
525    def stepcounter(self, tex):
526        """
527        Increment the counter for the object (if one exists)
528
529        Required Arguments:
530        tex -- the TeX instance containing the current context
531
532        """
533        if self.counter:
534            try:
535                self.ownerDocument.context.counters[self.counter].stepcounter()
536            except KeyError:
537                log.warning('Could not find counter "%s"', self.counter)
538                self.ownerDocument.context.newcounter(self.counter, initial=1)
539
540    def refstepcounter(self, tex):
541        """
542        Increment the counter for the object (if one exists)
543
544        In addition to stepping the counter, the current object is
545        set as the currently labeled object.
546
547        Required Arguments:
548        tex -- the TeX instance containing the current context
549
550        """
551        if self.counter:
552            self.ownerDocument.context.currentlabel = self
553            self.stepcounter(tex)
554
555    def postParse(self, tex):
556        """
557        Do operations that must be done immediately after parsing arguments
558
559        Required Arguments:
560        tex -- the TeX instance containing the current context
561
562        """
563        if self.counter:
564            try: secnumdepth = self.config['document']['sec-num-depth']
565            except: secnumdepth = 10
566            if secnumdepth >= self.level or self.level > self.ENDSECTIONS_LEVEL:
567                self.ref = self.ownerDocument.createElement('the' + self.counter).expand(tex)
568                self.captionName = self.ownerDocument.createElement(self.counter + 'name').expand(tex)
569
570    @property
571    def arguments(self):
572        """
573        Compile the argument string into function call arguments
574
575        Returns:
576        arguments as compiled entities
577
578        """
579        tself = type(self)
580
581        # Check for cached version first
582        if '@arguments' in vars(tself):
583            return vars(tself)['@arguments']
584
585        # If the argument string is empty, short circuit
586        if not tself.args:
587            setattr(tself, '@arguments', [])
588            return getattr(tself, '@arguments')
589
590        # Split the arguments into their primary components
591        args = iter([x.strip() for x in
592                     re.split(r'(\w+(?::\w+(?:\(\S\))?(?::\w+)?)?|\W|\s+)',
593                              tself.args) if x is not None and x.strip()])
594
595        groupings = {'[':'[]', '(':'()', '<':'<>', '{':'{}'}
596
597        macroargs = []
598        argdict = {}
599        index = 0
600        for item in args:
601
602            # Modifier argument
603            if item in '*+-':
604                if argdict:
605                    raise ValueError('Improperly placed "%s" in argument string "%s"' % \
606                        (item, tself.args))
607                argdict.clear()
608                macroargs.append(Argument('*modifier*', index, {'spec':item}))
609                index += 1
610
611            # Optional equals
612            elif item in '=':
613                argdict.clear()
614                macroargs.append(Argument('*equals*', index, {'spec':item}))
615                index += 1
616
617            # Beginning of group
618            elif item in '[(<{':
619                argdict.clear()
620                argdict['spec'] = groupings[item]
621
622            # End of group
623            elif item in '])>}':
624                pass
625
626            # Argument name (and possibly type)
627            elif item[0] in encoding.stringletters():
628                parts = item.split(':')
629                item = parts.pop(0)
630                # Parse for types and subtypes
631                if parts:
632                    # We already have a type, so check for subtypes
633                    # for list items
634                    if 'type' in list(argdict.keys()):
635                        argdict['subtype'] = parts.pop(0)
636                    else:
637                        # Split type and possible delimiter
638                        argdict['type'], argdict['delim'] = re.search(r'(\w+)(?:\((\W)\))?', parts.pop(0)).groups()
639                        if parts:
640                            argdict['subtype'] = parts.pop(0)
641                # Arguments that are instance variables are always expanded
642                if argdict.get('type') in ['cs', 'nox']:
643                    argdict['expanded'] = False
644                else:
645                    argdict['expanded'] = True
646                macroargs.append(Argument(item, index, argdict))
647                index += 1
648                argdict.clear()
649
650            else:
651                raise ValueError('Could not parse argument string "%s", reached unexpected "%s"' % (tself.args, item))
652
653        # Cache the result
654        setattr(tself, '@arguments', macroargs)
655
656        return macroargs
657
658    def digestUntil(self, tokens, endclass):
659        """
660        Absorb tokens until a token of the given class is given
661
662        This method is useful for things like lists and tables
663        when one element is actually ended by the occurrence of
664        another (i.e. \\item ended by \\item, array cell ended by
665        array cell, array cell ended by array row, etc.).
666
667        Required Arguments:
668        tokens -- iterator of tokens in the stream
669        endclass -- class reference or tuple of class references
670            that, when a token of that type is reached, stops
671            the digestion process
672
673        Returns:
674        None -- if the context ended without reaching a token of
675            the requested type
676        token -- the token of the requested type if it was found
677
678        """
679        for tok in tokens:
680            if tok.nodeType == Node.ELEMENT_NODE:
681                if isinstance(tok, endclass):
682                    tokens.push(tok)
683                    return tok
684                tok.parentNode = self
685                tok.digest(tokens)
686            # Stay within our context
687            if tok.contextDepth < self.contextDepth:
688                tokens.push(tok)
689                break
690            self.appendChild(tok)
691
692    @property
693    def currentSection(self):
694        """
695        Return the section that this node belongs to
696
697        This property will contain the parent section if the current
698        node is a section node.
699
700        """
701        node = self.parentNode
702        while node is not None:
703            if node.level < Node.ENDSECTIONS_LEVEL:
704                return node
705            node = node.parentNode
706        return
707
708    def paragraphs(self, force=True):
709        """
710        Group content into paragraphs
711
712        This algorithm is based on TeX's paragraph grouping algorithm.
713        This has the downside that it isn't the same paragraph algorithm
714        as HTML which doesn't allow block-level elements (e.g. table,
715        ol, ul, etc.) inside paragraphs.  This will result in invalid
716        HTML, but it isn't likely to be noticed in a browser.
717
718        Keyword Arguments:
719        force -- force all content to be grouped into paragraphs even
720            if there are no paragraps already present
721
722        """
723        parname = None
724        for item in self:
725            if item.level == Node.PAR_LEVEL:
726                parname = item.nodeName
727                break
728
729        # No paragraphs, and we aren't forcing paragraphs...
730        if parname is None and not force:
731            self.normalize(self.ownerDocument.charsubs)
732            return
733
734        if parname is None:
735            parname = 'par'
736
737        # Group content into paragraphs
738        par = self.ownerDocument.createElement(parname)
739        par.parentNode = self
740        newnodes = [par]
741        while self:
742            item = self.pop(0)
743            if item.level == Node.PAR_LEVEL:
744                newnodes.append(item)
745                continue
746            if item.level < Node.PAR_LEVEL:
747                newnodes.append(item)
748                break
749            # Block level elements get their own paragraph
750            if item.blockType:
751                par = self.ownerDocument.createElement(parname)
752                par.appendChild(item)
753                par.blockType = True
754                newnodes.append(par)
755                par = self.ownerDocument.createElement(parname)
756                newnodes.append(par)
757                continue
758            newnodes[-1].append(item)
759
760        # Insert nodes into self
761        for i, item in enumerate(newnodes):
762            if item.level == Node.PAR_LEVEL:
763                item.normalize(self.ownerDocument.charsubs)
764            self.insert(i, item)
765
766        # Filter out any empty paragraphs
767        for i in range(len(self) - 1, -1, -1):
768            item = self[i]
769            if item.level == Node.PAR_LEVEL:
770                if not item:
771                    self.pop(i)
772                elif len(item) == 1 and item[0].isElementContentWhitespace:
773                    self.pop(i)
774
775class TeXFragment(DocumentFragment):
776    """ Document fragment node """
777    @property
778    def source(self):
779        return sourceChildren(self)
780
781
782class TeXDocument(Document):
783    """ TeX Document node """
784    documentFragmentClass = TeXFragment
785    charsubs = [
786        ('``', chr(8220)),
787        ("''", chr(8221)),
788        ('"`', chr(8222)),
789        ('"\'', chr(8220)),
790        ('`', chr(8216)),
791        ("'", chr(8217)),
792        ('---', chr(8212)),
793        ('--', chr(8211)),
794#       ('fj', unichr(58290)),
795#       ('ff', unichr(64256)),
796#       ('fi', unichr(64257)),
797#       ('fl', unichr(64258)),
798#       ('ffi',unichr(64259)),
799#       ('ffl',unichr(64260)),
800#       ('ij', unichr(307)),
801#       ('IJ', unichr(308)),
802    ]
803
804    def __init__(self, *args, **kwargs):
805        # super(TeXDocument, self).__init__(*args, **kwargs)
806
807        if 'context' not in list(kwargs.keys()):
808            from plasTeX import Context
809            self.context = Context.Context(load=True)
810        else:
811            self.context = kwargs['context']
812
813        if 'config' not in list(kwargs.keys()):
814            from plasTeX import Config
815            self.config = Config.config
816        else:
817            self.config = kwargs['config']
818
819        # post parsing callbacks list
820        self.postParseCallbacks = []
821
822        self.packageResources = []
823        self.rendererdata = dict()
824
825    def addPackageResource(self, resource):
826        """
827        Adds a pacakge resource or a list of package resources to
828        self.packageResources.
829        """
830        if isinstance(resource, list):
831            self.packageResources.extend(resource)
832        else:
833            self.packageResources.append(resource)
834
835    def createElement(self, name):
836        elem = self.context[name]()
837        elem.parentNode = None
838        elem.ownerDocument = self
839        elem.contextDepth = 1000
840        return elem
841
842    @property
843    def preamble(self):
844        """
845        Return the nodes in the document that correspond to the preamble
846
847        """
848        output = self.createDocumentFragment()
849        for item in self:
850            if item.level == Macro.DOCUMENT_LEVEL:
851                break
852            output.append(item)
853        return output
854
855    @property
856    def source(self):
857        """ Return the LaTeX source of the document """
858        return sourceChildren(self)
859
860class Command(Macro):
861    """ Base class for all Python-based LaTeX commands """
862
863class Environment(Macro):
864    """ Base class for all Python-based LaTeX environments """
865    level = Node.ENVIRONMENT_LEVEL
866
867    def invoke(self, tex):
868        if self.macroMode == Macro.MODE_END:
869            self.ownerDocument.context.pop(self)
870            if self.str is not None:
871                return tex.textTokens(self.str)
872            return
873
874        self.ownerDocument.context.push(self)
875        self.parse(tex)
876
877        if self.str is not None:
878            return tex.textTokens(self.str)
879
880        self.setLinkType()
881
882    def digest(self, tokens):
883        """ Absorb all of the tokens that belong to the environment """
884        if self.macroMode == Macro.MODE_END:
885            return
886        # Absorb the tokens that belong to us
887        dopars = self.forcePars
888#       print 'DIGEST', type(self), self.contextDepth
889        for item in tokens:
890#           print type(item), (item.level, self.level), (item.contextDepth, self.contextDepth)
891            # Make sure that we know to group paragraphs if one is found
892            if item.level == Node.PAR_LEVEL:
893                self.appendChild(item)
894                dopars = True
895                continue
896            # Don't absorb objects with a higher precedence
897            if item.level < self.level:
898                tokens.push(item)
899                break
900            # Absorb macros until the end of this environment is found
901            if item.nodeType == Node.ELEMENT_NODE:
902                if item.macroMode == Macro.MODE_END and type(item) is type(self):
903                    break
904                item.parentNode = self
905                item.digest(tokens)
906            # Stay within our context depth
907            if self.level > Node.DOCUMENT_LEVEL and \
908               item.contextDepth < self.contextDepth:
909                tokens.push(item)
910                break
911#           print 'APPEND', type(item)
912            self.appendChild(item)
913#       print 'DONE', type(self)
914        if dopars:
915            self.paragraphs()
916
917
918class NoCharSubEnvironment(Environment):
919    """
920    A subclass of Environment which prevents character substitution inside
921    itself.
922    """
923    def __init__(self, *args, **kwargs):
924        # Will hold the owner document charsubs to restore it at the end
925        self.charsubs = []
926        super(NoCharSubEnvironment, self).__init__(*args, **kwargs)
927
928    def invoke(self, tex):
929        # The goal is to prevent any character substitution while handling a
930        # this environment.
931        doc = self.ownerDocument
932        if self.macroMode == Macro.MODE_BEGIN:
933            self.charsubs = doc.charsubs
934            doc.charsubs = []
935        elif self.macroMode == Macro.MODE_END:
936            doc.charsubs = self.charsubs
937        super(NoCharSubEnvironment, self).invoke(tex)
938
939class IgnoreCommand(Command):
940    """
941    This command will be parsed, but will not go to the output stream
942
943    This should be used sparingly because it also means that if you
944    try to access the source of a node in a document, this will also
945    be missing from that.
946
947    """
948    def invoke(self, tex):
949        Command.invoke(self, tex)
950        return []
951
952class UnrecognizedMacro(Macro):
953    """
954    Base class for unrecognized macros
955
956    When an unrecognized macro is requested, an instance of this
957    class is generated as a placeholder for the missing macro.
958
959    """
960    def __eq__(self, other):
961        if not hasattr(other, 'nodeName'):
962            return True
963        if other.nodeName in ['undefined', '@undefined']:
964            return True
965        if isinstance(other, UnrecognizedMacro):
966            return True
967        return super(UnrecognizedMacro, self).__eq__(other)
968
969class NewIf(Macro):
970    """ Base class for all generated \\newifs """
971
972    state = False
973
974    def invoke(self, tex):
975        tex.processIfContent(type(self).state)
976        return []
977
978    @classmethod
979    def setState(cls, state):
980        cls.state = state
981
982    @classmethod
983    def setTrue(cls):
984        cls.state = True
985
986    @classmethod
987    def setFalse(cls):
988        cls.state = False
989
990class IfTrue(Macro):
991    """ Base class for all generated \\iftrues """
992    def invoke(self, tex):
993        type(self).ifclass.setTrue()
994        return []
995
996class IfFalse(Macro):
997    """ Base class for all generated \\iffalses """
998    def invoke(self, tex):
999        type(self).ifclass.setFalse()
1000        return []
1001
1002def expandDef(definition, params):
1003    # Walk through the definition and expand parameters
1004    if not definition:
1005        return []
1006    output = []
1007    definition = iter(definition)
1008    previous = ''
1009    for t in definition:
1010        # Expand parameters
1011        if t.catcode == Token.CC_PARAMETER:
1012            for t in definition:
1013                # Double '#'
1014                if t.catcode == Token.CC_PARAMETER:
1015                    output.append(t)
1016                else:
1017                    if params[int(t)] is not None:
1018                        # This is a pretty bad hack, but `ifx' commands
1019                        # need an argument to also be a token.  So we
1020                        # wrap them in a group here and let the
1021                        # TeX parser convert the group to a token.
1022                        if previous == 'ifx':
1023                            output.append(BeginGroup(' '))
1024                            output.extend(params[int(t)])
1025                            output.append(EndGroup(' '))
1026                        else:
1027                            output.extend(params[int(t)])
1028                break
1029        # Just append other tokens to the output
1030        else:
1031            output.append(t)
1032        previous = t
1033    return output
1034
1035class NewCommand(Macro):
1036    """ Superclass for all \newcommand/\newenvironment type commands """
1037    nargs = 0
1038    opt = None
1039    definition = None
1040
1041    def invoke(self, tex):
1042        if self.macroMode == Macro.MODE_END:
1043            res = self.ownerDocument.createElement('end' + self.tagName).invoke(tex)
1044            if res is None:
1045                return [res, EndGroup(' ')]
1046            return res + [EndGroup(' ')]
1047
1048        params = [None]
1049
1050        # Get optional argument, if needed
1051        nargs = self.nargs
1052        if self.opt is not None:
1053            nargs -= 1
1054            params.append(tex.readArgument('[]', default=self.opt,
1055                                           parentNode=self,
1056                                           name='#%s' % len(params)))
1057
1058        # Get mandatory arguments
1059        for i in range(nargs):
1060            params.append(tex.readArgument(parentNode=self,
1061                                           name='#%s' % len(params)))
1062
1063        deflog.debug2('expanding %s %s', self.definition, params)
1064
1065        output = []
1066        if self.macroMode == Macro.MODE_BEGIN:
1067            output.append(BeginGroup(' '))
1068
1069        return output + expandDef(self.definition, params)
1070
1071class Definition(Macro):
1072    """ Superclass for all \\def-type commands """
1073    args = None
1074    definition = None
1075
1076    def invoke(self, tex):
1077        if not self.args: return self.definition
1078
1079        name = macroName(self)
1080        argIter = iter(self.args)
1081        inparam = False
1082        params = [None]
1083        for a in argIter:
1084
1085            # Beginning a new parameter
1086            if a.catcode == Token.CC_PARAMETER:
1087
1088                # Adjacent parameters, just get the next token
1089                if inparam:
1090                    params.append(tex.readArgument(parentNode=self,
1091                                                   name='#%s' % len(params)))
1092
1093                # Get the parameter number
1094                for a in argIter:
1095                    # Numbered parameter
1096                    if a in string.digits:
1097                        inparam = True
1098
1099                    elif a.catcode == Token.CC_PARAMETER:
1100                        continue
1101
1102                    # Handle #{ case here
1103                    elif a.catcode == Token.CC_BGROUP:
1104                        param = []
1105                        for t in tex.itertokens():
1106                            if t.catcode == Token.CC_BGROUP:
1107                                tex.pushToken(t)
1108                            else:
1109                                param.append(t)
1110                        inparam = False
1111                        params.append(param)
1112
1113                    else:
1114                        raise ValueError('Invalid arg string: %s' % ''.join(self.args))
1115                    break
1116
1117            # In a parameter, so get everything up to a token that matches `a`
1118            elif inparam:
1119                param = []
1120                for t in tex.itertokens():
1121                    if t == a:
1122                        break
1123                    else:
1124                        param.append(t)
1125                inparam = False
1126                params.append(param)
1127
1128            # Not in a parameter, just make sure the token matches
1129            else:
1130                for t in tex.itertokens():
1131                    if t == a:
1132                        break
1133                    else:
1134                        log.info('Arguments of "%s" don\'t match definition. Got "%s" but was expecting "%s" (%s).' % (name, t, a, ''.join(self.args)))
1135                        break
1136
1137        if inparam:
1138            params.append(tex.readArgument(parentNode=self,
1139                                           name='#%s' % len(params)))
1140
1141        deflog.debug2('expanding %s %s', self.definition, params)
1142
1143        return expandDef(self.definition, params)
1144
1145
1146class number(int):
1147    """ Class used for parameter and count values """
1148    def __new__(cls, v):
1149        if isinstance(v, Macro):
1150            return v.__count__()
1151        return int.__new__(cls, v)
1152
1153    @property
1154    def source(self):
1155        return str(self)
1156
1157class count(number): pass
1158
1159class dimen(float):
1160    """ Class used for dimen values """
1161
1162    units = ['pt', 'pc', 'in', 'bp', 'cm', 'mm', 'dd', 'cc', 'sp', 'ex', 'em']
1163
1164    def __new__(cls, v):
1165        if isinstance(v, Macro):
1166            return v.__dimen__()
1167        elif isinstance(v, str) and v[-1] in encoding.stringletters():
1168            # Get rid of glue components
1169            v = list(v.split('plus').pop(0).split('minus').pop(0).strip())
1170            units = []
1171            while v and v[-1] in encoding.stringletters():
1172                units.insert(0, v.pop())
1173            v = float(''.join(v))
1174            units = ''.join(units)
1175            if units == 'pt':
1176                v *= 65536
1177            elif units == 'pc':
1178                v *= 12 * 65536
1179            elif units == 'in':
1180                v *= 72.27 * 65536
1181            elif units == 'bp':
1182                v *= (72.27 * 65536) / 72
1183            elif units == 'cm':
1184                v *= (72.27 * 65536) / 2.54
1185            elif units == 'mm':
1186                v *= (72.27 * 65536) / 25.4
1187            elif units == 'dd':
1188                v *= (1238.0 * 65536) / 1157
1189            elif units == 'cc':
1190                v *= (1238.0 * 12 * 65536) / 1157
1191            elif units == 'sp':
1192                pass
1193            # Encode fil(ll)s by adding 2, 4, and 6 billion
1194            elif units == 'fil':
1195                if v < 0: v -= 2e9
1196                else: v += 2e9
1197            elif units == 'fill':
1198                if v < 0: v -= 4e9
1199                else: v += 4e9
1200            elif units == 'filll':
1201                if v < 0: v -= 6e9
1202                else: v += 6e9
1203            elif units == 'mu':
1204                pass
1205            # Just estimates, since I don't know the actual font size
1206            elif units == 'ex':
1207                v *= 5 * 65536
1208            elif units == 'em':
1209                v *= 11 * 65536
1210            else:
1211                raise ValueError('Unrecognized units: %s' % units)
1212        return float.__new__(cls, v)
1213
1214    @property
1215    def source(self):
1216        sign = 1
1217        if self < 0:
1218            sign = -1
1219        if abs(self) >= 6e9:
1220            return str(sign * (abs(self) - 6e9)) + 'filll'
1221        if abs(self) >= 4e9:
1222            return str(sign * (abs(self) - 4e9)) + 'fill'
1223        if abs(self) >= 2e9:
1224            return str(sign * (abs(self) - 2e9)) + 'fil'
1225        return '%spt' % self.pt
1226
1227    @property
1228    def pt(self):
1229        return self / 65536
1230    point = pt
1231
1232    @property
1233    def pc(self):
1234        return self / (12 * 65536)
1235    pica = pc
1236
1237    @property
1238    def _in(self):
1239        return self / (72.27 * 65536)
1240    inch = _in
1241
1242    @property
1243    def bp(self):
1244        return self / ((72.27 * 65536) / 72)
1245    bigpoint = bp
1246
1247    @property
1248    def cm(self):
1249        return self / ((72.27 * 65536) / 2.54)
1250    centimeter = cm
1251
1252    @property
1253    def mm(self):
1254        return self / ((72.27 * 65536) / 25.4)
1255    millimeter = mm
1256
1257    @property
1258    def dd(self):
1259        return self / ((1238 * 65536) / 1157)
1260    didotpoint = dd
1261
1262    @property
1263    def cc(self):
1264        return self / ((1238 * 12 * 65536) / 1157)
1265    cicero = cc
1266
1267    @property
1268    def sp(self):
1269        return self
1270    scaledpoint = sp
1271
1272    @property
1273    def ex(self):
1274        return self / (5 * 65536)
1275    xheight = ex
1276
1277    @property
1278    def em(self):
1279        return self / (11 * 65536)
1280    mwidth = em
1281
1282    @property
1283    def fill(self):
1284        sign = 1
1285        if self < 0:
1286            sign = -1
1287        if abs(self) >= 6e9:
1288            return sign * (abs(self) - 6e9)
1289        if abs(self) >= 4e9:
1290            return sign * (abs(self) - 4e9)
1291        if abs(self) >= 2e9:
1292            return sign * (abs(self) - 2e9)
1293        raise ValueError('This is not a fil(ll) dimension')
1294    fil = filll = fill
1295
1296    def __repr__(self):
1297        return self.source
1298
1299    def __str__(self):
1300        return self.source
1301
1302class mudimen(dimen):
1303    """ Class used for mudimen values """
1304    units = ['mu']
1305
1306class glue(dimen):
1307    """ Class used for glue values """
1308    def __new__(cls, g, plus=None, minus=None):
1309        return dimen.__new__(cls, g)
1310
1311    def __init__(self, g, plus=None, minus=None):
1312        # super(glue, self).__init__(g)
1313        self.stretch = self.shrink = None
1314        if plus is not None:
1315            self.stretch = dimen(plus)
1316        if minus is not None:
1317            self.shrink = dimen(minus)
1318
1319    @property
1320    def source(self):
1321        s = [dimen(self).source]
1322        if self.stretch is not None:
1323            s.append('plus')
1324            s.append(self.stretch.source)
1325        if self.shrink is not None:
1326            s.append('minus')
1327            s.append(self.shrink.source)
1328        return ' '.join(s)
1329
1330class muglue(glue):
1331    """ Class used for muglue values """
1332    units = ['mu']
1333
1334
1335class ParameterCommand(Command):
1336    args = '= value:Number'
1337    value = count(0)
1338
1339    enabled = True
1340    _enablelevel = 0
1341
1342    def invoke(self, tex):
1343        if ParameterCommand.enabled:
1344            # Disable invoke() in parameters nested in our arguments.
1345            # We don't want them to invoke, we want them to set our value.
1346            ParameterCommand.enabled = False
1347            type(self).value = self.parse(tex)['value']
1348            ParameterCommand.enabled = True
1349
1350    @classmethod
1351    def enable(cls):
1352        ParameterCommand._enablelevel += 1
1353        ParameterCommand.enabled = ParameterCommand._enablelevel >= 0
1354
1355    @classmethod
1356    def disable(cls):
1357        ParameterCommand._enablelevel -= 1
1358        ParameterCommand.enabled = ParameterCommand._enablelevel >= 0
1359
1360    def __dimen__(self):
1361        return dimen(type(self).value)
1362
1363    def __mudimen__(self):
1364        return mudimen(type(self).value)
1365
1366    def __count__(self):
1367        return count(type(self).value)
1368
1369    def __glue__(self):
1370        return glue(type(self).value)
1371
1372    def __muglue__(self):
1373        return muglue(type(self).value)
1374
1375    def the(self):
1376        return type(self).value.source
1377
1378    @classmethod
1379    def new(cls, *args, **kwargs):
1380        return count(*args, **kwargs)
1381
1382class RegisterCommand(ParameterCommand): pass
1383
1384class CountCommand(RegisterCommand): pass
1385
1386class DimenCommand(RegisterCommand):
1387    args = '= value:Dimen'
1388    value = dimen(0)
1389
1390    def setlength(self, len):
1391        type(self).value = dimen(len)
1392
1393    def addtolength(self, len):
1394        type(self).value = dimen(type(self).value + len)
1395
1396    @classmethod
1397    def new(cls, *args, **kwargs):
1398        return dimen(*args, **kwargs)
1399
1400class MuDimenCommand(RegisterCommand):
1401    args = '= value:MuDimen'
1402    value = mudimen(0)
1403
1404    def setlength(self, len):
1405        type(self).value = mudimen(len)
1406
1407    def addtolength(self, len):
1408        type(self).value = mudimen(type(self).value + len)
1409
1410    @classmethod
1411    def new(cls, *args, **kwargs):
1412        return mudimen(*args, **kwargs)
1413
1414class GlueCommand(RegisterCommand):
1415    args = '= value:Glue'
1416    value = glue(0)
1417
1418    def setlength(self, len):
1419        type(self).value = glue(len)
1420
1421    def addtolength(self, len):
1422        type(self).value = glue(type(self).value + len)
1423
1424    @classmethod
1425    def new(cls, *args, **kwargs):
1426        return glue(*args, **kwargs)
1427
1428class MuGlueCommand(RegisterCommand):
1429    args = '= value:MuGlue'
1430    value = muglue(0)
1431
1432    def setlength(self, len):
1433        type(self).value = muglue(len)
1434
1435    def addtolength(self, len):
1436        type(self).value = muglue(type(self).value + len)
1437
1438    @classmethod
1439    def new(cls, *args, **kwargs):
1440        return muglue(*args, **kwargs)
1441
1442
1443class Counter(object):
1444    """
1445    LaTeX counter class
1446
1447    """
1448    def __init__(self, context, name, resetby=None, value=0):
1449        self.name = name
1450        self.resetby = resetby
1451        self.value = value
1452        self.counters = context.counters
1453
1454    def addtocounter(self, other):
1455        self.value += int(other)
1456        self.resetcounters()
1457
1458    def setcounter(self, other):
1459        self.value = int(other)
1460        self.resetcounters()
1461
1462    def stepcounter(self):
1463        self.value += 1
1464        self.resetcounters()
1465
1466    def resetcounters(self):
1467        for counter in list(self.counters.values()):
1468            if counter.resetby and self.name and counter.resetby == self.name:
1469                counter.value = 0
1470                counter.resetcounters()
1471
1472    def __int__(self):
1473        return self.value
1474
1475    def __float__(self):
1476        return self.value
1477
1478    @property
1479    def arabic(self):
1480        return str(self.value)
1481
1482    @property
1483    def Roman(self):
1484        roman = ""
1485        n, number = divmod(self.value, 1000)
1486        roman = "M"*n
1487        if number >= 900:
1488            roman = roman + "CM"
1489            number = number - 900
1490        while number >= 500:
1491            roman = roman + "D"
1492            number = number - 500
1493        if number >= 400:
1494            roman = roman + "CD"
1495            number = number - 400
1496        while number >= 100:
1497            roman = roman + "C"
1498            number = number - 100
1499        if number >= 90:
1500            roman = roman + "XC"
1501            number = number - 90
1502        while number >= 50:
1503            roman = roman + "L"
1504            number = number - 50
1505        if number >= 40:
1506            roman = roman + "XL"
1507            number = number - 40
1508        while number >= 10:
1509            roman = roman + "X"
1510            number = number - 10
1511        if number >= 9:
1512            roman = roman + "IX"
1513            number = number - 9
1514        while number >= 5:
1515            roman = roman + "V"
1516            number = number - 5
1517        if number >= 4:
1518            roman = roman + "IV"
1519            number = number - 4
1520        while number > 0:
1521            roman = roman + "I"
1522            number = number - 1
1523        return roman
1524
1525    @property
1526    def roman(self):
1527        return self.Roman.lower()
1528
1529    @property
1530    def Alph(self):
1531        return encoding.stringletters()[self.value - 1].upper()
1532
1533    @property
1534    def alph(self):
1535        return self.Alph.lower()
1536
1537    @property
1538    def fnsymbol(self):
1539        return '*' * self.value
1540
1541
1542class TheCounter(Command):
1543    """ Base class for \\thecounter commands """
1544    format = None
1545
1546    def invoke(self, tex):
1547
1548        def counterValue(m):
1549            """ Replace the counter values """
1550            name = m.group(1)
1551
1552            # If there is a reference to another \\thecounter, invoke it
1553            if name.startswith('the') and name != re.sub(r'^the', '', self.__class__.__name__):
1554                return ''.join(tex.expandTokens(self.ownerDocument.createElement(name).invoke(tex)))
1555
1556            # Get formatted value of the requested counter
1557            format = m.group(2)
1558            if not format:
1559                format = 'arabic'
1560
1561            return getattr(self.ownerDocument.context.counters[name], format)
1562
1563        format = re.sub(r'\$(\w+)', r'${\1}', self.format)
1564        if self.format is None:
1565            format = '${%s.arabic}' % self.nodeName[3:]
1566
1567        t = re.sub(r'\$\{\s*(\w+)(?:\.(\w+))?\s*\}', counterValue, format)
1568
1569        # This is kind of a hack.  Since number formats aren't quite as
1570        # flexible as in LaTeX, we have to do somethings heuristically.
1571        # In this case, whenever a counter value comes out as a zero,
1572        # just hank it out.  This is especially useful in document classes
1573        # such as book and report which do this in the \thefigure format macro.
1574        t = re.sub(r'\b0[^\dA-Za-z]+', r'', t)
1575
1576        return tex.textTokens(t)
1577