1#!/usr/bin/env python 2 3__version__ = '2.1' 4 5from plasTeX import Logging, encoding 6from plasTeX.DOM import Element, Text, Node, DocumentFragment, Document 7from plasTeX.Tokenizer import Token, BeginGroup, EndGroup, Other 8import string 9import re 10 11log = Logging.getLogger() 12status = Logging.getLogger('status') 13deflog = Logging.getLogger('parse.definitions') 14 15# 16# Utility functions 17# 18 19def idgen(): 20 """ Generate a unique ID """ 21 i = 1 22 while 1: 23 yield 'a%.10d' % i 24 i += 1 25idgen = idgen() 26 27def subclasses(o): 28 """ Return all subclasses of the given class """ 29 output = [o] 30 for item in o.__subclasses__(): 31 output.extend(subclasses(item)) 32 return output 33 34def sourceChildren(o, par=True): 35 """ Return the LaTeX source of the child nodes """ 36 if o.hasChildNodes(): 37 if par: 38 return ''.join([x.source for x in o.childNodes]) 39 else: 40 source = [] 41 for par in o.childNodes: 42 source += [x.source for x in par] 43 return ''.join(source) 44 return '' 45 46def sourceArguments(o): 47 """ Return the LaTeX source of the arguments """ 48 return o.argSource 49 50def ismacro(o): 51 """ Is the given object a macro? """ 52 return hasattr(o, 'macroName') 53 54def issection(o): 55 """ Is the given object a section? """ 56 return o.level >= Node.DOCUMENT_LEVEL and o.level < Node.ENDSECTIONS_LEVEL 57 58def macroName(o): 59 """ Return the macro name of the given object """ 60 if o.macroName is None: 61 if type(o) is type: 62 return o.__name__ 63 return type(o).__name__ 64 return o.macroName 65 66 67class Argument(object): 68 """ 69 Macro argument 70 71 Argument strings in macros are compiled into Arguments 72 once. Then the compiled arguments can be used to get the 73 arguments thereafter. 74 75 """ 76 def __init__(self, name, index, options={}): 77 self.name = name 78 self.index = index 79 self.source = '' 80 self.options = options.copy() 81 82 def __repr__(self): 83 return '%s: %s' % (self.name, self.options) 84 85 def __eq__(self, other): 86 try: 87 return self.name == other.name and self.options == other.options 88 except AttributeError: 89 return NotImplemented 90 91 92 93class CSSStyles(dict): 94 """ CSS Style object """ 95 @property 96 def inline(self): 97 """ 98 Create an inline style representation 99 100 Returns: 101 string containing inline CSS 102 103 """ 104 if not self: 105 return None 106 return '; '.join(['%s:%s' % (x[0], x[1]) for x in list(self.items())]) 107 108 109class Macro(Element): 110 """ 111 Base class for all macros 112 113 """ 114 MODE_NONE = 0 115 MODE_BEGIN = 1 116 MODE_END = 2 117 118 macroName = None # TeX macro name (instead of class name) 119 macroMode = MODE_NONE # begin, end, or none 120 mathMode = None 121 122 # Node variables 123 level = Node.COMMAND_LEVEL 124 nodeType = Node.ELEMENT_NODE 125 nodeValue = None 126 127 # Counter associated with this macro 128 counter = None 129 130 # Value to return when macro is referred to by \ref 131 ref = None 132 133 # Attributes that should be persisted between runs for nodes 134 # that can be referenced. This allows for cross-document links. 135 refAttributes = ['macroName', 'ref', 'title', 'captionName', 'id', 'url'] 136 137 # Source of the TeX macro arguments 138 argSource = '' 139 140 # LaTeX argument template 141 args = '' 142 143 # Force there to be at least on paragraph in the content 144 forcePars = False 145 146 def persist(self, attrs=None): 147 """ 148 Store attributes needed for cross-document links 149 150 This method really needs to be called by the renderer because 151 the rendered versions of the attributes are needed. If nested 152 classes could be pickeled, we could just pickle the attributes. 153 154 Keyword Arguments: 155 attrs -- dictionary to populate with values. If set to None, 156 a new dictionary should be created. 157 158 Returns: dictionary containing attributes to be persisted 159 160 """ 161 if attrs is None: 162 attrs = {} 163 for name in self.refAttributes: 164 value = getattr(self, name, None) 165 if value is None: 166 continue 167 if isinstance(value, Node): 168 value = '%s' % str(value) 169 attrs[name] = value 170 return attrs 171 172 def restore(self, attrs): 173 """ 174 Restore attributes needed for cross-document links 175 176 Required Attributes: 177 attrs -- dictionary of attributes to be set on self 178 179 """ 180 remap = {'url':'urloverride'} 181 for key, value in list(attrs.items()): 182 setattr(self, str(remap.get(key, key)), value) 183 184 @property 185 def config(self): 186 """ Shortcut to the document config """ 187 return self.ownerDocument.config 188 189 @property 190 def idref(self): 191 """ Storage area for idref argument types """ 192 if hasattr(self, '@idref'): 193 return getattr(self, '@idref') 194 d = {} 195 setattr(self, '@idref', d) 196 return d 197 198 def captionName(): 199 """ Name associated with the counter """ 200 def fget(self): 201 if hasattr(self, '@captionName'): 202 return getattr(self, '@captionName') 203 self.captionName = name = self.ownerDocument.createTextNode('') 204 return name 205 def fset(self, value): 206 setattr(self, '@captionName', value) 207 return locals() 208 captionName = property(**captionName()) 209 210 def title(): 211 """ Retrieve title from variable or attributes dictionary """ 212 def fget(self): 213 try: 214 return getattr(self, '@title') 215 except AttributeError: 216 try: 217 return self.attributes['title'] 218 except KeyError: 219 pass 220 raise AttributeError('could not find attribute "title"') 221 def fset(self, value): 222 setattr(self, '@title', value) 223 return locals() 224 title = property(**title()) 225 226 def fullTitle(): 227 """ Retrieve title including the section number """ 228 def fget(self): 229 try: 230 return getattr(self, '@fullTitle') 231 except AttributeError: 232 if self.ref is not None: 233 fullTitle = self.ownerDocument.createDocumentFragment() 234 fullTitle.extend([self.ref, ' ', self.title], setParent=False) 235 else: 236 fullTitle = self.title 237 setattr(self, '@fullTitle', fullTitle) 238 return fullTitle 239 def fset(self, value): 240 setattr(self, '@fullTitle', value) 241 return locals() 242 fullTitle = property(**fullTitle()) 243 244 def tocEntry(): 245 """ Retrieve table of contents entry """ 246 def fget(self): 247 try: 248 return getattr(self, '@tocEntry') 249 except AttributeError: 250 try: 251 if 'toc' in list(self.attributes.keys()): 252 toc = self.attributes['toc'] 253 if toc is None: 254 toc = self.title 255 setattr(self, '@tocEntry', toc) 256 return toc 257 except (KeyError, AttributeError): 258 pass 259 return self.title 260 def fset(self, value): 261 setattr(self, '@tocEntry', value) 262 return locals() 263 tocEntry = property(**tocEntry()) 264 265 def fullTocEntry(): 266 """ Retrieve title including the section number """ 267 def fget(self): 268 try: 269 try: 270 return getattr(self, '@fullTocEntry') 271 except AttributeError: 272 if self.ref is not None: 273 fullTocEntry = self.ownerDocument.createDocumentFragment() 274 fullTocEntry.extend([self.ref, ' ', self.tocEntry], setParent=False) 275 else: 276 fullTocEntry = self.tocEntry 277 setattr(self, '@fullTocEntry', fullTocEntry) 278 return fullTocEntry 279 except Exception as msg: 280 return self.title 281 def fset(self, value): 282 setattr(self, '@fullTocEntry', value) 283 return locals() 284 fullTocEntry = property(**fullTocEntry()) 285 286 @property 287 def style(self): 288 """ CSS styles """ 289 try: 290 return getattr(self, '@style') 291 except AttributeError: 292 style = CSSStyles() 293 setattr(self, '@style', style) 294 return style 295 296 def digest(self, tokens): 297 pass 298 299 def locals(self): 300 """ Retrieve all macros local to this namespace """ 301 tself = type(self) 302 localsname = '@locals' 303 # Check for cached versions first 304 try: 305 return vars(tself)[localsname] 306 except KeyError: 307 pass 308 mro = list(tself.__mro__) 309 mro.reverse() 310 loc = {} 311 for cls in mro: 312 for value in list(vars(cls).values()): 313 if ismacro(value): 314 loc[macroName(value)] = value 315 # Cache the locals in a unique name 316 setattr(tself, localsname, loc) 317 return loc 318 319 def id(): 320 def fset(self, value): 321 if value: 322 setattr(self, '@id', value) 323 else: 324 delattr(self, '@id') 325 def fget(self): 326 id = getattr(self, '@id', None) 327 if id is None: 328 for id in idgen: 329 setattr(self, '@hasgenid', True) 330 self.id = id 331 break 332 return id 333 return locals() 334 id = property(**id()) 335 336 def expand(self, tex): 337 """ Fully expand the macro """ 338 result = self.invoke(tex) 339 if result is None: 340 return self 341 return tex.expandTokens(result) 342 343 def invoke(self, tex): 344 # Just pop the context if this is a \end token 345 if self.macroMode == Macro.MODE_END: 346 self.ownerDocument.context.pop(self) 347 return 348 349 # If this is a \begin token or the element needs to be 350 # closed automatically (i.e. \section, \item, etc.), just 351 # push the new context and return the instance. 352 if self.macroMode == Macro.MODE_BEGIN: 353 self.ownerDocument.context.push(self) 354 self.parse(tex) 355 self.setLinkType() 356 return 357 358 # Push, parse, and pop. The command doesn't need to stay on 359 # the context stack. We push an empty context so that the 360 # `self' token doesn't get put into the output stream twice 361 # (once here and once with the pop). 362 self.ownerDocument.context.push(self) 363 self.parse(tex) 364 self.ownerDocument.context.pop(self) 365 self.setLinkType() 366 367 def setLinkType(self, key=None): 368 """ 369 Set up navigation links 370 371 Keyword Arguments: 372 key -- the name or names of the navigation keys to set 373 instead of using self.linkType 374 375 """ 376 if key is None: 377 key = self.linkType 378 if key: 379 userdata = self.ownerDocument.userdata 380 if 'links' not in userdata: 381 userdata['links'] = {} 382 if isinstance(key, str): 383 userdata['links'][key] = self 384 else: 385 for k in key: 386 userdata['links'][k] = self 387 388 @property 389 def tagName(self): 390 t = type(self) 391 if t.macroName is None: 392 return t.__name__ 393 return t.macroName 394 nodeName = tagName 395 396 @property 397 def source(self): 398 name = self.nodeName 399 400 # Automatically revert internal names like "active::~" 401 escape = '\\' 402 if '::' in name: 403 name = name.split('::').pop() 404 escape = '' 405 406 # \begin environment 407 # If self.childNodes is not empty, print out the entire environment 408 if self.macroMode == Macro.MODE_BEGIN: 409 argSource = sourceArguments(self) 410 if not argSource: 411 argSource = ' ' 412 s = '%sbegin{%s}%s' % (escape, name, argSource) 413 if self.hasChildNodes(): 414 s += '%s%send{%s}' % (sourceChildren(self), escape, name) 415 return s 416 417 # \end environment 418 if self.macroMode == Macro.MODE_END: 419 return '%send{%s}' % (escape, name) 420 421 argSource = sourceArguments(self) 422 if not argSource: 423 argSource = ' ' 424 elif argSource[0] in encoding.stringletters(): 425 argSource = ' %s' % argSource 426 s = '%s%s%s' % (escape, name, argSource) 427 428 # If self.childNodes is not empty, print out the contents 429 if self.attributes and 'self' in list(self.attributes.keys()): 430 pass 431 else: 432 if self.hasChildNodes(): 433 s += sourceChildren(self) 434 return s 435 436 @property 437 def childrenSource(self): 438 return sourceChildren(self) 439 440 def parse(self, tex): 441 """ 442 Parse the arguments defined in the `args` variable 443 444 Required Arguments: 445 tex -- the TeX stream to parse from 446 447 Returns: 448 self.attributes 449 450 """ 451 if self.macroMode == Macro.MODE_END: 452 return 453 454 self.preParse(tex) 455 456 # args is empty, don't parse 457 if not self.args: 458 self.postParse(tex) 459 return 460 461 self.argSource = '' 462 arg = None 463 try: 464 for arg in self.arguments: 465 self.preArgument(arg, tex) 466 output, source = tex.readArgumentAndSource(parentNode=self, 467 name=arg.name, 468 **arg.options) 469 self.argSource += source 470 self.attributes[arg.name] = output 471 self.postArgument(arg, output, tex) 472 except: 473 log.error('Error while parsing argument "%s" of "%s"' % 474 (arg.name, self.nodeName)) 475 raise 476 477 self.postParse(tex) 478 479 return self.attributes 480 481 def preParse(self, tex): 482 """ 483 Do operations that must be done immediately before parsing arguments 484 485 Required Arguments: 486 tex -- the TeX instance containing the current context 487 488 """ 489 if not self.args: 490 self.refstepcounter(tex) 491 492 def preArgument(self, arg, tex): 493 """ 494 Event called before parsing each argument 495 496 Arguments: 497 arg -- the Argument instance that holds all argument meta-data 498 including the argument's name, source, and options. 499 tex -- the TeX instance containing the current context 500 501 """ 502 # Check for a '*' type argument at the beginning of the 503 # argument list. If there is one, don't increment counters 504 # or set labels. This must be done immediately since 505 # the following arguments may contain labels. 506 if arg.index == 0 and arg.name != '*modifier*': 507 self.refstepcounter(tex) 508 509 def postArgument(self, arg, value, tex): 510 """ 511 Event called after parsing each argument 512 513 Arguments: 514 arg -- the Argument instance that holds all argument meta-data 515 including the argument's name, source, and options. 516 tex -- the TeX instance containing the current context 517 518 """ 519 # If there was a '*', unset the counter for this instance 520 if arg.index == 0 and arg.name == '*modifier*': 521 if value: 522 self.counter = '' 523 self.refstepcounter(tex) 524 525 def stepcounter(self, tex): 526 """ 527 Increment the counter for the object (if one exists) 528 529 Required Arguments: 530 tex -- the TeX instance containing the current context 531 532 """ 533 if self.counter: 534 try: 535 self.ownerDocument.context.counters[self.counter].stepcounter() 536 except KeyError: 537 log.warning('Could not find counter "%s"', self.counter) 538 self.ownerDocument.context.newcounter(self.counter, initial=1) 539 540 def refstepcounter(self, tex): 541 """ 542 Increment the counter for the object (if one exists) 543 544 In addition to stepping the counter, the current object is 545 set as the currently labeled object. 546 547 Required Arguments: 548 tex -- the TeX instance containing the current context 549 550 """ 551 if self.counter: 552 self.ownerDocument.context.currentlabel = self 553 self.stepcounter(tex) 554 555 def postParse(self, tex): 556 """ 557 Do operations that must be done immediately after parsing arguments 558 559 Required Arguments: 560 tex -- the TeX instance containing the current context 561 562 """ 563 if self.counter: 564 try: secnumdepth = self.config['document']['sec-num-depth'] 565 except: secnumdepth = 10 566 if secnumdepth >= self.level or self.level > self.ENDSECTIONS_LEVEL: 567 self.ref = self.ownerDocument.createElement('the' + self.counter).expand(tex) 568 self.captionName = self.ownerDocument.createElement(self.counter + 'name').expand(tex) 569 570 @property 571 def arguments(self): 572 """ 573 Compile the argument string into function call arguments 574 575 Returns: 576 arguments as compiled entities 577 578 """ 579 tself = type(self) 580 581 # Check for cached version first 582 if '@arguments' in vars(tself): 583 return vars(tself)['@arguments'] 584 585 # If the argument string is empty, short circuit 586 if not tself.args: 587 setattr(tself, '@arguments', []) 588 return getattr(tself, '@arguments') 589 590 # Split the arguments into their primary components 591 args = iter([x.strip() for x in 592 re.split(r'(\w+(?::\w+(?:\(\S\))?(?::\w+)?)?|\W|\s+)', 593 tself.args) if x is not None and x.strip()]) 594 595 groupings = {'[':'[]', '(':'()', '<':'<>', '{':'{}'} 596 597 macroargs = [] 598 argdict = {} 599 index = 0 600 for item in args: 601 602 # Modifier argument 603 if item in '*+-': 604 if argdict: 605 raise ValueError('Improperly placed "%s" in argument string "%s"' % \ 606 (item, tself.args)) 607 argdict.clear() 608 macroargs.append(Argument('*modifier*', index, {'spec':item})) 609 index += 1 610 611 # Optional equals 612 elif item in '=': 613 argdict.clear() 614 macroargs.append(Argument('*equals*', index, {'spec':item})) 615 index += 1 616 617 # Beginning of group 618 elif item in '[(<{': 619 argdict.clear() 620 argdict['spec'] = groupings[item] 621 622 # End of group 623 elif item in '])>}': 624 pass 625 626 # Argument name (and possibly type) 627 elif item[0] in encoding.stringletters(): 628 parts = item.split(':') 629 item = parts.pop(0) 630 # Parse for types and subtypes 631 if parts: 632 # We already have a type, so check for subtypes 633 # for list items 634 if 'type' in list(argdict.keys()): 635 argdict['subtype'] = parts.pop(0) 636 else: 637 # Split type and possible delimiter 638 argdict['type'], argdict['delim'] = re.search(r'(\w+)(?:\((\W)\))?', parts.pop(0)).groups() 639 if parts: 640 argdict['subtype'] = parts.pop(0) 641 # Arguments that are instance variables are always expanded 642 if argdict.get('type') in ['cs', 'nox']: 643 argdict['expanded'] = False 644 else: 645 argdict['expanded'] = True 646 macroargs.append(Argument(item, index, argdict)) 647 index += 1 648 argdict.clear() 649 650 else: 651 raise ValueError('Could not parse argument string "%s", reached unexpected "%s"' % (tself.args, item)) 652 653 # Cache the result 654 setattr(tself, '@arguments', macroargs) 655 656 return macroargs 657 658 def digestUntil(self, tokens, endclass): 659 """ 660 Absorb tokens until a token of the given class is given 661 662 This method is useful for things like lists and tables 663 when one element is actually ended by the occurrence of 664 another (i.e. \\item ended by \\item, array cell ended by 665 array cell, array cell ended by array row, etc.). 666 667 Required Arguments: 668 tokens -- iterator of tokens in the stream 669 endclass -- class reference or tuple of class references 670 that, when a token of that type is reached, stops 671 the digestion process 672 673 Returns: 674 None -- if the context ended without reaching a token of 675 the requested type 676 token -- the token of the requested type if it was found 677 678 """ 679 for tok in tokens: 680 if tok.nodeType == Node.ELEMENT_NODE: 681 if isinstance(tok, endclass): 682 tokens.push(tok) 683 return tok 684 tok.parentNode = self 685 tok.digest(tokens) 686 # Stay within our context 687 if tok.contextDepth < self.contextDepth: 688 tokens.push(tok) 689 break 690 self.appendChild(tok) 691 692 @property 693 def currentSection(self): 694 """ 695 Return the section that this node belongs to 696 697 This property will contain the parent section if the current 698 node is a section node. 699 700 """ 701 node = self.parentNode 702 while node is not None: 703 if node.level < Node.ENDSECTIONS_LEVEL: 704 return node 705 node = node.parentNode 706 return 707 708 def paragraphs(self, force=True): 709 """ 710 Group content into paragraphs 711 712 This algorithm is based on TeX's paragraph grouping algorithm. 713 This has the downside that it isn't the same paragraph algorithm 714 as HTML which doesn't allow block-level elements (e.g. table, 715 ol, ul, etc.) inside paragraphs. This will result in invalid 716 HTML, but it isn't likely to be noticed in a browser. 717 718 Keyword Arguments: 719 force -- force all content to be grouped into paragraphs even 720 if there are no paragraps already present 721 722 """ 723 parname = None 724 for item in self: 725 if item.level == Node.PAR_LEVEL: 726 parname = item.nodeName 727 break 728 729 # No paragraphs, and we aren't forcing paragraphs... 730 if parname is None and not force: 731 self.normalize(self.ownerDocument.charsubs) 732 return 733 734 if parname is None: 735 parname = 'par' 736 737 # Group content into paragraphs 738 par = self.ownerDocument.createElement(parname) 739 par.parentNode = self 740 newnodes = [par] 741 while self: 742 item = self.pop(0) 743 if item.level == Node.PAR_LEVEL: 744 newnodes.append(item) 745 continue 746 if item.level < Node.PAR_LEVEL: 747 newnodes.append(item) 748 break 749 # Block level elements get their own paragraph 750 if item.blockType: 751 par = self.ownerDocument.createElement(parname) 752 par.appendChild(item) 753 par.blockType = True 754 newnodes.append(par) 755 par = self.ownerDocument.createElement(parname) 756 newnodes.append(par) 757 continue 758 newnodes[-1].append(item) 759 760 # Insert nodes into self 761 for i, item in enumerate(newnodes): 762 if item.level == Node.PAR_LEVEL: 763 item.normalize(self.ownerDocument.charsubs) 764 self.insert(i, item) 765 766 # Filter out any empty paragraphs 767 for i in range(len(self) - 1, -1, -1): 768 item = self[i] 769 if item.level == Node.PAR_LEVEL: 770 if not item: 771 self.pop(i) 772 elif len(item) == 1 and item[0].isElementContentWhitespace: 773 self.pop(i) 774 775class TeXFragment(DocumentFragment): 776 """ Document fragment node """ 777 @property 778 def source(self): 779 return sourceChildren(self) 780 781 782class TeXDocument(Document): 783 """ TeX Document node """ 784 documentFragmentClass = TeXFragment 785 charsubs = [ 786 ('``', chr(8220)), 787 ("''", chr(8221)), 788 ('"`', chr(8222)), 789 ('"\'', chr(8220)), 790 ('`', chr(8216)), 791 ("'", chr(8217)), 792 ('---', chr(8212)), 793 ('--', chr(8211)), 794# ('fj', unichr(58290)), 795# ('ff', unichr(64256)), 796# ('fi', unichr(64257)), 797# ('fl', unichr(64258)), 798# ('ffi',unichr(64259)), 799# ('ffl',unichr(64260)), 800# ('ij', unichr(307)), 801# ('IJ', unichr(308)), 802 ] 803 804 def __init__(self, *args, **kwargs): 805 # super(TeXDocument, self).__init__(*args, **kwargs) 806 807 if 'context' not in list(kwargs.keys()): 808 from plasTeX import Context 809 self.context = Context.Context(load=True) 810 else: 811 self.context = kwargs['context'] 812 813 if 'config' not in list(kwargs.keys()): 814 from plasTeX import Config 815 self.config = Config.config 816 else: 817 self.config = kwargs['config'] 818 819 # post parsing callbacks list 820 self.postParseCallbacks = [] 821 822 self.packageResources = [] 823 self.rendererdata = dict() 824 825 def addPackageResource(self, resource): 826 """ 827 Adds a pacakge resource or a list of package resources to 828 self.packageResources. 829 """ 830 if isinstance(resource, list): 831 self.packageResources.extend(resource) 832 else: 833 self.packageResources.append(resource) 834 835 def createElement(self, name): 836 elem = self.context[name]() 837 elem.parentNode = None 838 elem.ownerDocument = self 839 elem.contextDepth = 1000 840 return elem 841 842 @property 843 def preamble(self): 844 """ 845 Return the nodes in the document that correspond to the preamble 846 847 """ 848 output = self.createDocumentFragment() 849 for item in self: 850 if item.level == Macro.DOCUMENT_LEVEL: 851 break 852 output.append(item) 853 return output 854 855 @property 856 def source(self): 857 """ Return the LaTeX source of the document """ 858 return sourceChildren(self) 859 860class Command(Macro): 861 """ Base class for all Python-based LaTeX commands """ 862 863class Environment(Macro): 864 """ Base class for all Python-based LaTeX environments """ 865 level = Node.ENVIRONMENT_LEVEL 866 867 def invoke(self, tex): 868 if self.macroMode == Macro.MODE_END: 869 self.ownerDocument.context.pop(self) 870 if self.str is not None: 871 return tex.textTokens(self.str) 872 return 873 874 self.ownerDocument.context.push(self) 875 self.parse(tex) 876 877 if self.str is not None: 878 return tex.textTokens(self.str) 879 880 self.setLinkType() 881 882 def digest(self, tokens): 883 """ Absorb all of the tokens that belong to the environment """ 884 if self.macroMode == Macro.MODE_END: 885 return 886 # Absorb the tokens that belong to us 887 dopars = self.forcePars 888# print 'DIGEST', type(self), self.contextDepth 889 for item in tokens: 890# print type(item), (item.level, self.level), (item.contextDepth, self.contextDepth) 891 # Make sure that we know to group paragraphs if one is found 892 if item.level == Node.PAR_LEVEL: 893 self.appendChild(item) 894 dopars = True 895 continue 896 # Don't absorb objects with a higher precedence 897 if item.level < self.level: 898 tokens.push(item) 899 break 900 # Absorb macros until the end of this environment is found 901 if item.nodeType == Node.ELEMENT_NODE: 902 if item.macroMode == Macro.MODE_END and type(item) is type(self): 903 break 904 item.parentNode = self 905 item.digest(tokens) 906 # Stay within our context depth 907 if self.level > Node.DOCUMENT_LEVEL and \ 908 item.contextDepth < self.contextDepth: 909 tokens.push(item) 910 break 911# print 'APPEND', type(item) 912 self.appendChild(item) 913# print 'DONE', type(self) 914 if dopars: 915 self.paragraphs() 916 917 918class NoCharSubEnvironment(Environment): 919 """ 920 A subclass of Environment which prevents character substitution inside 921 itself. 922 """ 923 def __init__(self, *args, **kwargs): 924 # Will hold the owner document charsubs to restore it at the end 925 self.charsubs = [] 926 super(NoCharSubEnvironment, self).__init__(*args, **kwargs) 927 928 def invoke(self, tex): 929 # The goal is to prevent any character substitution while handling a 930 # this environment. 931 doc = self.ownerDocument 932 if self.macroMode == Macro.MODE_BEGIN: 933 self.charsubs = doc.charsubs 934 doc.charsubs = [] 935 elif self.macroMode == Macro.MODE_END: 936 doc.charsubs = self.charsubs 937 super(NoCharSubEnvironment, self).invoke(tex) 938 939class IgnoreCommand(Command): 940 """ 941 This command will be parsed, but will not go to the output stream 942 943 This should be used sparingly because it also means that if you 944 try to access the source of a node in a document, this will also 945 be missing from that. 946 947 """ 948 def invoke(self, tex): 949 Command.invoke(self, tex) 950 return [] 951 952class UnrecognizedMacro(Macro): 953 """ 954 Base class for unrecognized macros 955 956 When an unrecognized macro is requested, an instance of this 957 class is generated as a placeholder for the missing macro. 958 959 """ 960 def __eq__(self, other): 961 if not hasattr(other, 'nodeName'): 962 return True 963 if other.nodeName in ['undefined', '@undefined']: 964 return True 965 if isinstance(other, UnrecognizedMacro): 966 return True 967 return super(UnrecognizedMacro, self).__eq__(other) 968 969class NewIf(Macro): 970 """ Base class for all generated \\newifs """ 971 972 state = False 973 974 def invoke(self, tex): 975 tex.processIfContent(type(self).state) 976 return [] 977 978 @classmethod 979 def setState(cls, state): 980 cls.state = state 981 982 @classmethod 983 def setTrue(cls): 984 cls.state = True 985 986 @classmethod 987 def setFalse(cls): 988 cls.state = False 989 990class IfTrue(Macro): 991 """ Base class for all generated \\iftrues """ 992 def invoke(self, tex): 993 type(self).ifclass.setTrue() 994 return [] 995 996class IfFalse(Macro): 997 """ Base class for all generated \\iffalses """ 998 def invoke(self, tex): 999 type(self).ifclass.setFalse() 1000 return [] 1001 1002def expandDef(definition, params): 1003 # Walk through the definition and expand parameters 1004 if not definition: 1005 return [] 1006 output = [] 1007 definition = iter(definition) 1008 previous = '' 1009 for t in definition: 1010 # Expand parameters 1011 if t.catcode == Token.CC_PARAMETER: 1012 for t in definition: 1013 # Double '#' 1014 if t.catcode == Token.CC_PARAMETER: 1015 output.append(t) 1016 else: 1017 if params[int(t)] is not None: 1018 # This is a pretty bad hack, but `ifx' commands 1019 # need an argument to also be a token. So we 1020 # wrap them in a group here and let the 1021 # TeX parser convert the group to a token. 1022 if previous == 'ifx': 1023 output.append(BeginGroup(' ')) 1024 output.extend(params[int(t)]) 1025 output.append(EndGroup(' ')) 1026 else: 1027 output.extend(params[int(t)]) 1028 break 1029 # Just append other tokens to the output 1030 else: 1031 output.append(t) 1032 previous = t 1033 return output 1034 1035class NewCommand(Macro): 1036 """ Superclass for all \newcommand/\newenvironment type commands """ 1037 nargs = 0 1038 opt = None 1039 definition = None 1040 1041 def invoke(self, tex): 1042 if self.macroMode == Macro.MODE_END: 1043 res = self.ownerDocument.createElement('end' + self.tagName).invoke(tex) 1044 if res is None: 1045 return [res, EndGroup(' ')] 1046 return res + [EndGroup(' ')] 1047 1048 params = [None] 1049 1050 # Get optional argument, if needed 1051 nargs = self.nargs 1052 if self.opt is not None: 1053 nargs -= 1 1054 params.append(tex.readArgument('[]', default=self.opt, 1055 parentNode=self, 1056 name='#%s' % len(params))) 1057 1058 # Get mandatory arguments 1059 for i in range(nargs): 1060 params.append(tex.readArgument(parentNode=self, 1061 name='#%s' % len(params))) 1062 1063 deflog.debug2('expanding %s %s', self.definition, params) 1064 1065 output = [] 1066 if self.macroMode == Macro.MODE_BEGIN: 1067 output.append(BeginGroup(' ')) 1068 1069 return output + expandDef(self.definition, params) 1070 1071class Definition(Macro): 1072 """ Superclass for all \\def-type commands """ 1073 args = None 1074 definition = None 1075 1076 def invoke(self, tex): 1077 if not self.args: return self.definition 1078 1079 name = macroName(self) 1080 argIter = iter(self.args) 1081 inparam = False 1082 params = [None] 1083 for a in argIter: 1084 1085 # Beginning a new parameter 1086 if a.catcode == Token.CC_PARAMETER: 1087 1088 # Adjacent parameters, just get the next token 1089 if inparam: 1090 params.append(tex.readArgument(parentNode=self, 1091 name='#%s' % len(params))) 1092 1093 # Get the parameter number 1094 for a in argIter: 1095 # Numbered parameter 1096 if a in string.digits: 1097 inparam = True 1098 1099 elif a.catcode == Token.CC_PARAMETER: 1100 continue 1101 1102 # Handle #{ case here 1103 elif a.catcode == Token.CC_BGROUP: 1104 param = [] 1105 for t in tex.itertokens(): 1106 if t.catcode == Token.CC_BGROUP: 1107 tex.pushToken(t) 1108 else: 1109 param.append(t) 1110 inparam = False 1111 params.append(param) 1112 1113 else: 1114 raise ValueError('Invalid arg string: %s' % ''.join(self.args)) 1115 break 1116 1117 # In a parameter, so get everything up to a token that matches `a` 1118 elif inparam: 1119 param = [] 1120 for t in tex.itertokens(): 1121 if t == a: 1122 break 1123 else: 1124 param.append(t) 1125 inparam = False 1126 params.append(param) 1127 1128 # Not in a parameter, just make sure the token matches 1129 else: 1130 for t in tex.itertokens(): 1131 if t == a: 1132 break 1133 else: 1134 log.info('Arguments of "%s" don\'t match definition. Got "%s" but was expecting "%s" (%s).' % (name, t, a, ''.join(self.args))) 1135 break 1136 1137 if inparam: 1138 params.append(tex.readArgument(parentNode=self, 1139 name='#%s' % len(params))) 1140 1141 deflog.debug2('expanding %s %s', self.definition, params) 1142 1143 return expandDef(self.definition, params) 1144 1145 1146class number(int): 1147 """ Class used for parameter and count values """ 1148 def __new__(cls, v): 1149 if isinstance(v, Macro): 1150 return v.__count__() 1151 return int.__new__(cls, v) 1152 1153 @property 1154 def source(self): 1155 return str(self) 1156 1157class count(number): pass 1158 1159class dimen(float): 1160 """ Class used for dimen values """ 1161 1162 units = ['pt', 'pc', 'in', 'bp', 'cm', 'mm', 'dd', 'cc', 'sp', 'ex', 'em'] 1163 1164 def __new__(cls, v): 1165 if isinstance(v, Macro): 1166 return v.__dimen__() 1167 elif isinstance(v, str) and v[-1] in encoding.stringletters(): 1168 # Get rid of glue components 1169 v = list(v.split('plus').pop(0).split('minus').pop(0).strip()) 1170 units = [] 1171 while v and v[-1] in encoding.stringletters(): 1172 units.insert(0, v.pop()) 1173 v = float(''.join(v)) 1174 units = ''.join(units) 1175 if units == 'pt': 1176 v *= 65536 1177 elif units == 'pc': 1178 v *= 12 * 65536 1179 elif units == 'in': 1180 v *= 72.27 * 65536 1181 elif units == 'bp': 1182 v *= (72.27 * 65536) / 72 1183 elif units == 'cm': 1184 v *= (72.27 * 65536) / 2.54 1185 elif units == 'mm': 1186 v *= (72.27 * 65536) / 25.4 1187 elif units == 'dd': 1188 v *= (1238.0 * 65536) / 1157 1189 elif units == 'cc': 1190 v *= (1238.0 * 12 * 65536) / 1157 1191 elif units == 'sp': 1192 pass 1193 # Encode fil(ll)s by adding 2, 4, and 6 billion 1194 elif units == 'fil': 1195 if v < 0: v -= 2e9 1196 else: v += 2e9 1197 elif units == 'fill': 1198 if v < 0: v -= 4e9 1199 else: v += 4e9 1200 elif units == 'filll': 1201 if v < 0: v -= 6e9 1202 else: v += 6e9 1203 elif units == 'mu': 1204 pass 1205 # Just estimates, since I don't know the actual font size 1206 elif units == 'ex': 1207 v *= 5 * 65536 1208 elif units == 'em': 1209 v *= 11 * 65536 1210 else: 1211 raise ValueError('Unrecognized units: %s' % units) 1212 return float.__new__(cls, v) 1213 1214 @property 1215 def source(self): 1216 sign = 1 1217 if self < 0: 1218 sign = -1 1219 if abs(self) >= 6e9: 1220 return str(sign * (abs(self) - 6e9)) + 'filll' 1221 if abs(self) >= 4e9: 1222 return str(sign * (abs(self) - 4e9)) + 'fill' 1223 if abs(self) >= 2e9: 1224 return str(sign * (abs(self) - 2e9)) + 'fil' 1225 return '%spt' % self.pt 1226 1227 @property 1228 def pt(self): 1229 return self / 65536 1230 point = pt 1231 1232 @property 1233 def pc(self): 1234 return self / (12 * 65536) 1235 pica = pc 1236 1237 @property 1238 def _in(self): 1239 return self / (72.27 * 65536) 1240 inch = _in 1241 1242 @property 1243 def bp(self): 1244 return self / ((72.27 * 65536) / 72) 1245 bigpoint = bp 1246 1247 @property 1248 def cm(self): 1249 return self / ((72.27 * 65536) / 2.54) 1250 centimeter = cm 1251 1252 @property 1253 def mm(self): 1254 return self / ((72.27 * 65536) / 25.4) 1255 millimeter = mm 1256 1257 @property 1258 def dd(self): 1259 return self / ((1238 * 65536) / 1157) 1260 didotpoint = dd 1261 1262 @property 1263 def cc(self): 1264 return self / ((1238 * 12 * 65536) / 1157) 1265 cicero = cc 1266 1267 @property 1268 def sp(self): 1269 return self 1270 scaledpoint = sp 1271 1272 @property 1273 def ex(self): 1274 return self / (5 * 65536) 1275 xheight = ex 1276 1277 @property 1278 def em(self): 1279 return self / (11 * 65536) 1280 mwidth = em 1281 1282 @property 1283 def fill(self): 1284 sign = 1 1285 if self < 0: 1286 sign = -1 1287 if abs(self) >= 6e9: 1288 return sign * (abs(self) - 6e9) 1289 if abs(self) >= 4e9: 1290 return sign * (abs(self) - 4e9) 1291 if abs(self) >= 2e9: 1292 return sign * (abs(self) - 2e9) 1293 raise ValueError('This is not a fil(ll) dimension') 1294 fil = filll = fill 1295 1296 def __repr__(self): 1297 return self.source 1298 1299 def __str__(self): 1300 return self.source 1301 1302class mudimen(dimen): 1303 """ Class used for mudimen values """ 1304 units = ['mu'] 1305 1306class glue(dimen): 1307 """ Class used for glue values """ 1308 def __new__(cls, g, plus=None, minus=None): 1309 return dimen.__new__(cls, g) 1310 1311 def __init__(self, g, plus=None, minus=None): 1312 # super(glue, self).__init__(g) 1313 self.stretch = self.shrink = None 1314 if plus is not None: 1315 self.stretch = dimen(plus) 1316 if minus is not None: 1317 self.shrink = dimen(minus) 1318 1319 @property 1320 def source(self): 1321 s = [dimen(self).source] 1322 if self.stretch is not None: 1323 s.append('plus') 1324 s.append(self.stretch.source) 1325 if self.shrink is not None: 1326 s.append('minus') 1327 s.append(self.shrink.source) 1328 return ' '.join(s) 1329 1330class muglue(glue): 1331 """ Class used for muglue values """ 1332 units = ['mu'] 1333 1334 1335class ParameterCommand(Command): 1336 args = '= value:Number' 1337 value = count(0) 1338 1339 enabled = True 1340 _enablelevel = 0 1341 1342 def invoke(self, tex): 1343 if ParameterCommand.enabled: 1344 # Disable invoke() in parameters nested in our arguments. 1345 # We don't want them to invoke, we want them to set our value. 1346 ParameterCommand.enabled = False 1347 type(self).value = self.parse(tex)['value'] 1348 ParameterCommand.enabled = True 1349 1350 @classmethod 1351 def enable(cls): 1352 ParameterCommand._enablelevel += 1 1353 ParameterCommand.enabled = ParameterCommand._enablelevel >= 0 1354 1355 @classmethod 1356 def disable(cls): 1357 ParameterCommand._enablelevel -= 1 1358 ParameterCommand.enabled = ParameterCommand._enablelevel >= 0 1359 1360 def __dimen__(self): 1361 return dimen(type(self).value) 1362 1363 def __mudimen__(self): 1364 return mudimen(type(self).value) 1365 1366 def __count__(self): 1367 return count(type(self).value) 1368 1369 def __glue__(self): 1370 return glue(type(self).value) 1371 1372 def __muglue__(self): 1373 return muglue(type(self).value) 1374 1375 def the(self): 1376 return type(self).value.source 1377 1378 @classmethod 1379 def new(cls, *args, **kwargs): 1380 return count(*args, **kwargs) 1381 1382class RegisterCommand(ParameterCommand): pass 1383 1384class CountCommand(RegisterCommand): pass 1385 1386class DimenCommand(RegisterCommand): 1387 args = '= value:Dimen' 1388 value = dimen(0) 1389 1390 def setlength(self, len): 1391 type(self).value = dimen(len) 1392 1393 def addtolength(self, len): 1394 type(self).value = dimen(type(self).value + len) 1395 1396 @classmethod 1397 def new(cls, *args, **kwargs): 1398 return dimen(*args, **kwargs) 1399 1400class MuDimenCommand(RegisterCommand): 1401 args = '= value:MuDimen' 1402 value = mudimen(0) 1403 1404 def setlength(self, len): 1405 type(self).value = mudimen(len) 1406 1407 def addtolength(self, len): 1408 type(self).value = mudimen(type(self).value + len) 1409 1410 @classmethod 1411 def new(cls, *args, **kwargs): 1412 return mudimen(*args, **kwargs) 1413 1414class GlueCommand(RegisterCommand): 1415 args = '= value:Glue' 1416 value = glue(0) 1417 1418 def setlength(self, len): 1419 type(self).value = glue(len) 1420 1421 def addtolength(self, len): 1422 type(self).value = glue(type(self).value + len) 1423 1424 @classmethod 1425 def new(cls, *args, **kwargs): 1426 return glue(*args, **kwargs) 1427 1428class MuGlueCommand(RegisterCommand): 1429 args = '= value:MuGlue' 1430 value = muglue(0) 1431 1432 def setlength(self, len): 1433 type(self).value = muglue(len) 1434 1435 def addtolength(self, len): 1436 type(self).value = muglue(type(self).value + len) 1437 1438 @classmethod 1439 def new(cls, *args, **kwargs): 1440 return muglue(*args, **kwargs) 1441 1442 1443class Counter(object): 1444 """ 1445 LaTeX counter class 1446 1447 """ 1448 def __init__(self, context, name, resetby=None, value=0): 1449 self.name = name 1450 self.resetby = resetby 1451 self.value = value 1452 self.counters = context.counters 1453 1454 def addtocounter(self, other): 1455 self.value += int(other) 1456 self.resetcounters() 1457 1458 def setcounter(self, other): 1459 self.value = int(other) 1460 self.resetcounters() 1461 1462 def stepcounter(self): 1463 self.value += 1 1464 self.resetcounters() 1465 1466 def resetcounters(self): 1467 for counter in list(self.counters.values()): 1468 if counter.resetby and self.name and counter.resetby == self.name: 1469 counter.value = 0 1470 counter.resetcounters() 1471 1472 def __int__(self): 1473 return self.value 1474 1475 def __float__(self): 1476 return self.value 1477 1478 @property 1479 def arabic(self): 1480 return str(self.value) 1481 1482 @property 1483 def Roman(self): 1484 roman = "" 1485 n, number = divmod(self.value, 1000) 1486 roman = "M"*n 1487 if number >= 900: 1488 roman = roman + "CM" 1489 number = number - 900 1490 while number >= 500: 1491 roman = roman + "D" 1492 number = number - 500 1493 if number >= 400: 1494 roman = roman + "CD" 1495 number = number - 400 1496 while number >= 100: 1497 roman = roman + "C" 1498 number = number - 100 1499 if number >= 90: 1500 roman = roman + "XC" 1501 number = number - 90 1502 while number >= 50: 1503 roman = roman + "L" 1504 number = number - 50 1505 if number >= 40: 1506 roman = roman + "XL" 1507 number = number - 40 1508 while number >= 10: 1509 roman = roman + "X" 1510 number = number - 10 1511 if number >= 9: 1512 roman = roman + "IX" 1513 number = number - 9 1514 while number >= 5: 1515 roman = roman + "V" 1516 number = number - 5 1517 if number >= 4: 1518 roman = roman + "IV" 1519 number = number - 4 1520 while number > 0: 1521 roman = roman + "I" 1522 number = number - 1 1523 return roman 1524 1525 @property 1526 def roman(self): 1527 return self.Roman.lower() 1528 1529 @property 1530 def Alph(self): 1531 return encoding.stringletters()[self.value - 1].upper() 1532 1533 @property 1534 def alph(self): 1535 return self.Alph.lower() 1536 1537 @property 1538 def fnsymbol(self): 1539 return '*' * self.value 1540 1541 1542class TheCounter(Command): 1543 """ Base class for \\thecounter commands """ 1544 format = None 1545 1546 def invoke(self, tex): 1547 1548 def counterValue(m): 1549 """ Replace the counter values """ 1550 name = m.group(1) 1551 1552 # If there is a reference to another \\thecounter, invoke it 1553 if name.startswith('the') and name != re.sub(r'^the', '', self.__class__.__name__): 1554 return ''.join(tex.expandTokens(self.ownerDocument.createElement(name).invoke(tex))) 1555 1556 # Get formatted value of the requested counter 1557 format = m.group(2) 1558 if not format: 1559 format = 'arabic' 1560 1561 return getattr(self.ownerDocument.context.counters[name], format) 1562 1563 format = re.sub(r'\$(\w+)', r'${\1}', self.format) 1564 if self.format is None: 1565 format = '${%s.arabic}' % self.nodeName[3:] 1566 1567 t = re.sub(r'\$\{\s*(\w+)(?:\.(\w+))?\s*\}', counterValue, format) 1568 1569 # This is kind of a hack. Since number formats aren't quite as 1570 # flexible as in LaTeX, we have to do somethings heuristically. 1571 # In this case, whenever a counter value comes out as a zero, 1572 # just hank it out. This is especially useful in document classes 1573 # such as book and report which do this in the \thefigure format macro. 1574 t = re.sub(r'\b0[^\dA-Za-z]+', r'', t) 1575 1576 return tex.textTokens(t) 1577