1 /* 2 blahtex: a TeX to MathML converter designed with MediaWiki in mind 3 blahtexml: an extension of blahtex with XML processing in mind 4 http://gva.noekeon.org/blahtexml 5 6 Copyright (c) 2006, David Harvey 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 10 11 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 12 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 13 * Neither the names of the authors nor the names of their affiliation may be used to endorse or promote products derived from this software without specific prior written permission. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 16 */ 17 18 #ifndef BLAHTEX_LAYOUTTREE_H 19 #define BLAHTEX_LAYOUTTREE_H 20 21 #include <memory> 22 #include "MathmlNode.h" 23 24 namespace blahtex 25 { 26 27 // The maximum number of nodes allowed in the output MathML tree. 28 // (This limit is imposed to prevent users eliciting quadratic time by 29 // inputting arrays with lots of empty entries.) 30 const unsigned cMaxMathmlNodeCount = 2500; 31 32 33 struct MathmlEnvironment; 34 35 // The LayoutTree namespace contains all classes that represents nodes in 36 // the layout tree. The layout tree is an intermediate stage between the 37 // parse tree and the final output XML tree. 38 namespace LayoutTree 39 { 40 // Base class for layout tree nodes. 41 struct Node 42 { ~NodeNode43 virtual ~Node() 44 { } 45 46 // This field is only used during the layout tree building phase, to 47 // determine inter-atomic spacing. The values correspond roughly 48 // to TeX's differently flavoured atoms. (We omit several flavours 49 // that TeX uses, like "acc" and "rad"; these are generally handled 50 // as "ord".) 51 // 52 // This field is ignored for LayoutTree::Space nodes. 53 enum Flavour 54 { 55 cFlavourOrd, 56 cFlavourOp, 57 cFlavourBin, 58 cFlavourRel, 59 cFlavourOpen, 60 cFlavourClose, 61 cFlavourPunct, 62 cFlavourInner 63 } 64 mFlavour; 65 66 // This field is only used during the layout tree building phase, to 67 // determine script placement. It corresponds to TeX's "limits", 68 // "nolimits", "displaylimits" trichotomy. 69 // 70 // It is only valid if mFlavour == cFlavourOp. 71 enum Limits 72 { 73 cLimitsDisplayLimits, 74 cLimitsLimits, 75 cLimitsNoLimits 76 } 77 mLimits; 78 79 // This field corresponds to TeX's displaystyle/textstyle/ 80 // scriptstyle/scriptscriptstyle setting. (We ignore the cramped/ 81 // uncramped variations.) 82 // 83 // This field is ignored for LayoutTree::Space nodes. 84 enum Style 85 { 86 cStyleDisplay, // like \displaystyle 87 cStyleText, // like \textstyle 88 cStyleScript, // like \scriptstyle 89 cStyleScriptScript // like \scriptscriptstyle 90 } 91 mStyle; 92 93 // Colour of the node. For symbols this is the colour of the symbol; 94 // for fractions it's the colour of the horizontal bar; for radicals 95 // it's the colour of the radical symbol. 96 // 97 // This field is ignored for LayoutTree::Space nodes. 98 RGBColour mColour; 99 100 NodeNode101 Node( 102 Style style, 103 Flavour flavour, 104 Limits limits, 105 RGBColour colour 106 ) : 107 mStyle(style), 108 mFlavour(flavour), 109 mLimits(limits), 110 mColour(colour) 111 { } 112 113 114 // This function "optimises" the tree beneath the current node: 115 // (1) It merges adjacent Space nodes into single spaces, and 116 // (2) It merges adjacent Symbol nodes in certain situations. 117 // For exammple, we want <mn>12</mn> instead of 118 // <mn>1</mn><mn>2</mn>, and <mi>sin</mi> instead of 119 // <mi mathvariant="normal">s</mi> 120 // <mi mathvariant="normal">i</mi> 121 // <mi mathvariant="normal">n</mi> !!!! OptimiseNode122 virtual void Optimise() 123 { } 124 125 126 // This function converts the layout tree rooted at this node into 127 // a MathML tree. 128 // 129 // The inheritedEnvironment parameter tells it what assumptions to 130 // make about its rendering environment. It uses these to decide 131 // whether to insert extra <mstyle> tags. 132 // 133 // The nodeCount parameter is used to keep track of the total number 134 // of nodes in the MathML tree. For security reasons we put a hard 135 // limit on this. (See cMaxMathmlNodeCount.) 136 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 137 const MathmlOptions& options, 138 const MathmlEnvironment& inheritedEnvironment, 139 unsigned& nodeCount 140 ) const = 0; 141 142 143 // This function recursively prints the layout tree under this node. 144 // Debugging use only. 145 virtual void Print( 146 std::wostream& os, 147 int depth = 0 148 ) const = 0; 149 150 std::wstring PrintFields() const; // used internally by Print 151 }; 152 153 154 // A Row stores a list of children nodes. It gets translated into an 155 // <mrow> node in the MathML tree. 156 // 157 // No Row ever has another Row node as its child. 158 struct Row : Node 159 { 160 std::list<Node*> mChildren; 161 RowRow162 Row(Style style, RGBColour colour) : 163 Node(style, cFlavourOrd, cLimitsDisplayLimits, colour) 164 { } 165 166 ~Row(); 167 168 virtual void Optimise(); 169 170 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 171 const MathmlOptions& options, 172 const MathmlEnvironment& inheritedEnvironment, 173 unsigned& nodeCount 174 ) const; 175 176 virtual void Print( 177 std::wostream& os, 178 int depth = 0 179 ) const; 180 }; 181 182 183 // Symbol is an abstract class; its concrete subclasses are 184 // SymbolIdentifier, SymbolNumber, SymbolOperator, SymbolText. It 185 // represents anything that will get translated as <mn>, <mi>, <mo> 186 // or <mtext>. It describes the text that goes inside the tags (mText) 187 // and what font it should be in (mFont). 188 struct Symbol : Node 189 { 190 std::wstring mText; 191 MathmlFont mFont; 192 SymbolSymbol193 Symbol( 194 const std::wstring& text, 195 MathmlFont font, 196 Style style, 197 Flavour flavour, 198 Limits limits, 199 RGBColour colour 200 ) : 201 Node(style, flavour, limits, colour), 202 mText(text), 203 mFont(font) 204 { } 205 206 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 207 const MathmlOptions& options, 208 const MathmlEnvironment& inheritedEnvironment, 209 unsigned& nodeCount 210 ) const = 0; 211 212 virtual void Print( 213 std::wostream& os, 214 int depth = 0 215 ) const = 0; 216 }; 217 218 219 // SymbolIdentifier represents things translated as <mi>. 220 struct SymbolIdentifier : Symbol 221 { SymbolIdentifierSymbolIdentifier222 SymbolIdentifier( 223 const std::wstring& text, 224 MathmlFont font, 225 Style style, 226 Flavour flavour, 227 Limits limits, 228 RGBColour colour 229 ) : 230 Symbol(text, font, style, flavour, limits, colour) 231 { } 232 233 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 234 const MathmlOptions& options, 235 const MathmlEnvironment& inheritedEnvironment, 236 unsigned& nodeCount 237 ) const; 238 239 virtual void Print( 240 std::wostream& os, 241 int depth = 0 242 ) const; 243 }; 244 245 246 // SymbolNumber represents things translated as <mn>. 247 struct SymbolNumber : Symbol 248 { SymbolNumberSymbolNumber249 SymbolNumber( 250 const std::wstring& text, 251 MathmlFont font, 252 Style style, 253 Flavour flavour, 254 Limits limits, 255 RGBColour colour 256 ) : 257 Symbol(text, font, style, flavour, limits, colour) 258 { } 259 260 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 261 const MathmlOptions& options, 262 const MathmlEnvironment& inheritedEnvironment, 263 unsigned& nodeCount 264 ) const; 265 266 virtual void Print( 267 std::wostream& os, 268 int depth = 0 269 ) const; 270 }; 271 272 273 // SymbolText represents things translated as <mtext>. 274 // 275 // Actually, each SymbolText represents just a single character; 276 // they get merged by their parent's Row::BuildMathmlTree() function. 277 struct SymbolText : Symbol 278 { SymbolTextSymbolText279 SymbolText( 280 const std::wstring& text, 281 MathmlFont font, 282 Style style, 283 RGBColour colour 284 ) : 285 Symbol( 286 text, font, style, cFlavourOrd, cLimitsDisplayLimits, colour 287 ) 288 { } 289 290 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 291 const MathmlOptions& options, 292 const MathmlEnvironment& inheritedEnvironment, 293 unsigned& nodeCount 294 ) const; 295 296 virtual void Print( 297 std::wostream& os, 298 int depth = 0 299 ) const; 300 }; 301 302 303 // SymbolOperator represents things translated as <mo>. 304 struct SymbolOperator : Symbol 305 { 306 // Whether or not this operator is stretchy. 307 // 308 // Note: because of the existence of the MathML operator dictionary, 309 // BuildMathmlTree() needs to do a bit of work to decide whether 310 // to actually use a "stretchy" attribute to implement this flag. 311 bool mIsStretchy; 312 313 // mSize, if non-empty, indicates the "minsize" and "maxsize" 314 // attributes. It is only valid if mIsStretchy is true. 315 std::wstring mSize; 316 317 // Whether to use the accent="true" attribute. 318 // 319 // Again, BuildMathmlTree needs to do some work to decide if the 320 // "accent" attribute is actually needed. 321 bool mIsAccent; 322 SymbolOperatorSymbolOperator323 SymbolOperator( 324 bool isStretchy, 325 const std::wstring& size, 326 bool isAccent, 327 const std::wstring& text, 328 MathmlFont font, 329 Style style, 330 Flavour flavour, 331 Limits limits, 332 RGBColour colour 333 ) : 334 Symbol(text, font, style, flavour, limits, colour), 335 mIsStretchy(isStretchy), 336 mSize(size), 337 mIsAccent(isAccent) 338 { } 339 340 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 341 const MathmlOptions& options, 342 const MathmlEnvironment& inheritedEnvironment, 343 unsigned& nodeCount 344 ) const; 345 346 virtual void Print( 347 std::wostream& os, 348 int depth = 0 349 ) const; 350 }; 351 352 353 // Represents a space. This may or not actually end up as MathML markup, 354 // depending on a variety of things. 355 struct Space : Node 356 { 357 // mWidth is the width of the space, measured in mu. 358 // (18mu = 1em in normal font size.) 359 // It may be negative. 360 int mWidth; 361 362 // This flag indicates whether the space was requested by the user 363 // via a TeX spacing command like "\quad". False means that blahtex 364 // computed the space (according to TeX's rules). 365 bool mIsUserRequested; 366 SpaceSpace367 Space( 368 int width, 369 bool isUserRequested 370 ) : 371 Node(cStyleDisplay, cFlavourOrd, cLimitsDisplayLimits, 0), 372 mWidth(width), 373 mIsUserRequested(isUserRequested) 374 { } 375 376 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 377 const MathmlOptions& options, 378 const MathmlEnvironment& inheritedEnvironment, 379 unsigned& nodeCount 380 ) const; 381 382 virtual void Print( 383 std::wostream& os, 384 int depth = 0 385 ) const; 386 }; 387 388 389 // Represents a base with a subscript and/or a superscript, 390 // OR a base with an underscript and/or an overscript. 391 struct Scripts : Node 392 { 393 // Any of the following three fields may be NULL (i.e. empty). 394 std::auto_ptr<Node> mBase, mUpper, mLower; 395 396 // True means sub/superscript; false means under/overscript. 397 // 398 // (This flag is computed from e.g. the "limits" setting of mBase, 399 // and from the current TeX style.) 400 bool mIsSideset; 401 ScriptsScripts402 Scripts( 403 Style style, 404 Flavour flavour, 405 Limits limits, 406 RGBColour colour, 407 bool isSideset, 408 std::auto_ptr<Node> base, 409 std::auto_ptr<Node> upper, 410 std::auto_ptr<Node> lower 411 ) : 412 Node(style, flavour, limits, colour), 413 mIsSideset(isSideset), 414 mBase(base), 415 mUpper(upper), 416 mLower(lower) 417 { } 418 419 virtual void Optimise(); 420 421 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 422 const MathmlOptions& options, 423 const MathmlEnvironment& inheritedEnvironment, 424 unsigned& nodeCount 425 ) const; 426 427 virtual void Print( 428 std::wostream& os, 429 int depth = 0 430 ) const; 431 }; 432 433 434 // Represents something that will get translated as <mfrac>. 435 struct Fraction : Node 436 { 437 std::auto_ptr<Node> mNumerator, mDenominator; 438 439 // Does the fraction need a visible line? 440 // True for ordinary vanilla fractions; false for things like 441 // binomial coefficients. 442 bool mIsLineVisible; 443 FractionFraction444 Fraction( 445 Style style, 446 RGBColour colour, 447 std::auto_ptr<Node> numerator, 448 std::auto_ptr<Node> denominator, 449 bool isLineVisible 450 ) : 451 Node(style, cFlavourOrd, cLimitsDisplayLimits, colour), 452 mNumerator(numerator), 453 mDenominator(denominator), 454 mIsLineVisible(isLineVisible) 455 { } 456 457 virtual void Optimise(); 458 459 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 460 const MathmlOptions& options, 461 const MathmlEnvironment& inheritedEnvironment, 462 unsigned& nodeCount 463 ) const; 464 465 virtual void Print( 466 std::wostream& os, 467 int depth = 0 468 ) const; 469 }; 470 471 472 // Represents an expression between a pair of delimiters. 473 // 474 // (Blahtex doesn't translate this using <mfenced>, because then we 475 // couldn't use more exotic (non-ASCII) fences in the "open" and 476 // "close" attributes.) 477 struct Fenced : Node 478 { 479 // The opening and closing delimiters, i.e. the text that goes 480 // inside <mo>...</mo>. 481 std::wstring mLeftDelimiter, mRightDelimiter; 482 483 // The expression being surrounded by fences. 484 std::auto_ptr<Node> mChild; 485 FencedFenced486 Fenced( 487 Style style, 488 RGBColour colour, 489 const std::wstring& leftDelimiter, 490 const std::wstring& rightDelimiter, 491 std::auto_ptr<Node> child 492 ) : 493 Node(style, cFlavourInner, cLimitsDisplayLimits, colour), 494 mLeftDelimiter(leftDelimiter), 495 mRightDelimiter(rightDelimiter), 496 mChild(child) 497 { } 498 499 virtual void Optimise(); 500 501 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 502 const MathmlOptions& options, 503 const MathmlEnvironment& inheritedEnvironment, 504 unsigned& nodeCount 505 ) const; 506 507 virtual void Print( 508 std::wostream& os, 509 int depth = 0 510 ) const; 511 }; 512 513 514 // Represents an expression under a square root sign; i.e. something 515 // translated as <msqrt>. 516 struct Sqrt : Node 517 { 518 // The expression under the radical. 519 std::auto_ptr<Node> mChild; 520 SqrtSqrt521 Sqrt( 522 std::auto_ptr<Node> child, 523 RGBColour colour 524 ) : 525 Node(child->mStyle, cFlavourOrd, cLimitsDisplayLimits, colour), 526 mChild(child) 527 { } 528 529 virtual void Optimise(); 530 531 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 532 const MathmlOptions& options, 533 const MathmlEnvironment& inheritedEnvironment, 534 unsigned& nodeCount 535 ) const; 536 537 virtual void Print( 538 std::wostream& os, 539 int depth = 0 540 ) const; 541 }; 542 543 544 // Represents an expression under a general radical sign; i.e. something 545 // translated as <mroot>. 546 struct Root : Node 547 { 548 // The expressions under and outside the radical. 549 std::auto_ptr<Node> mInside, mOutside; 550 RootRoot551 Root( 552 std::auto_ptr<Node> inside, 553 std::auto_ptr<Node> outside, 554 RGBColour colour 555 ) : 556 Node(inside->mStyle, cFlavourOrd, cLimitsDisplayLimits, colour), 557 mInside(inside), 558 mOutside(outside) 559 { } 560 561 virtual void Optimise(); 562 563 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 564 const MathmlOptions& options, 565 const MathmlEnvironment& inheritedEnvironment, 566 unsigned& nodeCount 567 ) const; 568 569 virtual void Print( 570 std::wostream& os, 571 int depth = 0 572 ) const; 573 }; 574 575 576 // Represents something translated as <mtable>. 577 struct Table : Node 578 { 579 // Array of rows of table entries. 580 std::vector<std::vector<Node*> > mRows; 581 582 // These values describe the possible alignment values for the 583 // table. Most environments (e.g. "matrix", "pmatrix") use 584 // cAlignCentre. The environments "cases" uses cAlignLeft (all table 585 // entries aligned to the left). cAlignRightLeft alternates columns 586 // aligned right and left; it's used for the "aligned" environment. 587 enum Align 588 { 589 cAlignLeft, 590 cAlignCentre, 591 cAlignRightLeft 592 } 593 mAlign; 594 595 // How much space to put between rows of the table. Currently 596 // "tight" is used for "\substack" blocks, everything else 597 // gets "normal". 598 enum RowSpacing 599 { 600 cRowSpacingNormal, 601 cRowSpacingTight 602 } 603 mRowSpacing; 604 605 Table( 606 Style style, 607 RGBColour colour, 608 RowSpacing rowSpacing = cRowSpacingNormal 609 ) : NodeTable610 Node(style, cFlavourOrd, cLimitsDisplayLimits, colour), 611 mAlign(cAlignCentre), 612 mRowSpacing(rowSpacing) 613 { } 614 615 ~Table(); 616 617 virtual void Optimise(); 618 619 virtual std::auto_ptr<MathmlNode> BuildMathmlTree( 620 const MathmlOptions& options, 621 const MathmlEnvironment& inheritedEnvironment, 622 unsigned& nodeCount 623 ) const; 624 625 virtual void Print( 626 std::wostream& os, 627 int depth = 0 628 ) const; 629 }; 630 631 } // end LayoutTree namespace 632 633 634 // This struct records some information about the rendering environment for 635 // a portion of the MathML tree. It is used when building the MathML tree 636 // to decide when it is necessary to insert additional <mstyle> tags. 637 struct MathmlEnvironment 638 { 639 // The "displaystyle" and "scriptlevel" attributes. 640 bool mDisplayStyle; 641 int mScriptLevel; 642 643 // The "mathcolor" attribute. 644 RGBColour mColour; 645 646 MathmlEnvironment( 647 bool displayStyle = false, 648 int scriptLevel = 0, 649 RGBColour colour = 0 650 ) : mDisplayStyleMathmlEnvironment651 mDisplayStyle(displayStyle), 652 mScriptLevel(scriptLevel), 653 mColour(colour) 654 { } 655 656 // This constructor determines the displayStyle and scriptLevel settings 657 // corresponding to the given TeX style. 658 MathmlEnvironment( 659 LayoutTree::Node::Style style, 660 RGBColour colour 661 ); 662 }; 663 664 } 665 666 #endif 667 668 // end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 669