1 /*
2 blahtex: a TeX to MathML converter designed with MediaWiki in mind
3 blahtexml: an extension of blahtex with XML processing in mind
4 http://gva.noekeon.org/blahtexml
5 
6 Copyright (c) 2006, David Harvey
7 All rights reserved.
8 
9 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
10 
11     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13     * Neither the names of the authors nor the names of their affiliation may be used to endorse or promote products derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16 */
17 
18 #ifndef BLAHTEX_LAYOUTTREE_H
19 #define BLAHTEX_LAYOUTTREE_H
20 
21 #include <memory>
22 #include "MathmlNode.h"
23 
24 namespace blahtex
25 {
26 
27 // The maximum number of nodes allowed in the output MathML tree.
28 // (This limit is imposed to prevent users eliciting quadratic time by
29 // inputting arrays with lots of empty entries.)
30 const unsigned cMaxMathmlNodeCount = 2500;
31 
32 
33 struct MathmlEnvironment;
34 
35 // The LayoutTree namespace contains all classes that represents nodes in
36 // the layout tree. The layout tree is an intermediate stage between the
37 // parse tree and the final output XML tree.
38 namespace LayoutTree
39 {
40     // Base class for layout tree nodes.
41     struct Node
42     {
~NodeNode43         virtual ~Node()
44         { }
45 
46         // This field is only used during the layout tree building phase, to
47         // determine inter-atomic spacing. The values correspond roughly
48         // to TeX's differently flavoured atoms. (We omit several flavours
49         // that TeX uses, like "acc" and "rad"; these are generally handled
50         // as "ord".)
51         //
52         // This field is ignored for LayoutTree::Space nodes.
53         enum Flavour
54         {
55             cFlavourOrd,
56             cFlavourOp,
57             cFlavourBin,
58             cFlavourRel,
59             cFlavourOpen,
60             cFlavourClose,
61             cFlavourPunct,
62             cFlavourInner
63         }
64         mFlavour;
65 
66         // This field is only used during the layout tree building phase, to
67         // determine script placement. It corresponds to TeX's "limits",
68         // "nolimits", "displaylimits" trichotomy.
69         //
70         // It is only valid if mFlavour == cFlavourOp.
71         enum Limits
72         {
73             cLimitsDisplayLimits,
74             cLimitsLimits,
75             cLimitsNoLimits
76         }
77         mLimits;
78 
79         // This field corresponds to TeX's displaystyle/textstyle/
80         // scriptstyle/scriptscriptstyle setting. (We ignore the cramped/
81         // uncramped variations.)
82         //
83         // This field is ignored for LayoutTree::Space nodes.
84         enum Style
85         {
86             cStyleDisplay,              // like \displaystyle
87             cStyleText,                 // like \textstyle
88             cStyleScript,               // like \scriptstyle
89             cStyleScriptScript          // like \scriptscriptstyle
90         }
91         mStyle;
92 
93         // Colour of the node. For symbols this is the colour of the symbol;
94         // for fractions it's the colour of the horizontal bar; for radicals
95         // it's the colour of the radical symbol.
96         //
97         // This field is ignored for LayoutTree::Space nodes.
98         RGBColour mColour;
99 
100 
NodeNode101         Node(
102             Style style,
103             Flavour flavour,
104             Limits limits,
105             RGBColour colour
106         ) :
107             mStyle(style),
108             mFlavour(flavour),
109             mLimits(limits),
110             mColour(colour)
111         { }
112 
113 
114         // This function "optimises" the tree beneath the current node:
115         // (1) It merges adjacent Space nodes into single spaces, and
116         // (2) It merges adjacent Symbol nodes in certain situations.
117         //     For exammple, we want <mn>12</mn> instead of
118         //     <mn>1</mn><mn>2</mn>, and <mi>sin</mi> instead of
119         //     <mi mathvariant="normal">s</mi>
120         //     <mi mathvariant="normal">i</mi>
121         //     <mi mathvariant="normal">n</mi>   !!!!
OptimiseNode122         virtual void Optimise()
123         { }
124 
125 
126         // This function converts the layout tree rooted at this node into
127         // a MathML tree.
128         //
129         // The inheritedEnvironment parameter tells it what assumptions to
130         // make about its rendering environment. It uses these to decide
131         // whether to insert extra <mstyle> tags.
132         //
133         // The nodeCount parameter is used to keep track of the total number
134         // of nodes in the MathML tree. For security reasons we put a hard
135         // limit on this. (See cMaxMathmlNodeCount.)
136         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
137             const MathmlOptions& options,
138             const MathmlEnvironment& inheritedEnvironment,
139             unsigned& nodeCount
140         ) const = 0;
141 
142 
143         // This function recursively prints the layout tree under this node.
144         // Debugging use only.
145         virtual void Print(
146             std::wostream& os,
147             int depth = 0
148         ) const = 0;
149 
150         std::wstring PrintFields() const;   // used internally by Print
151     };
152 
153 
154     // A Row stores a list of children nodes. It gets translated into an
155     // <mrow> node in the MathML tree.
156     //
157     // No Row ever has another Row node as its child.
158     struct Row : Node
159     {
160         std::list<Node*> mChildren;
161 
RowRow162         Row(Style style, RGBColour colour) :
163             Node(style, cFlavourOrd, cLimitsDisplayLimits, colour)
164         { }
165 
166         ~Row();
167 
168         virtual void Optimise();
169 
170         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
171             const MathmlOptions& options,
172             const MathmlEnvironment& inheritedEnvironment,
173             unsigned& nodeCount
174         ) const;
175 
176         virtual void Print(
177             std::wostream& os,
178             int depth = 0
179         ) const;
180     };
181 
182 
183     // Symbol is an abstract class; its concrete subclasses are
184     // SymbolIdentifier, SymbolNumber, SymbolOperator, SymbolText. It
185     // represents anything that will get translated as <mn>, <mi>, <mo>
186     // or <mtext>. It describes the text that goes inside the tags (mText)
187     // and what font it should be in (mFont).
188     struct Symbol : Node
189     {
190         std::wstring mText;
191         MathmlFont mFont;
192 
SymbolSymbol193         Symbol(
194             const std::wstring& text,
195             MathmlFont font,
196             Style style,
197             Flavour flavour,
198             Limits limits,
199             RGBColour colour
200         ) :
201             Node(style, flavour, limits, colour),
202             mText(text),
203             mFont(font)
204         { }
205 
206         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
207             const MathmlOptions& options,
208             const MathmlEnvironment& inheritedEnvironment,
209             unsigned& nodeCount
210         ) const = 0;
211 
212         virtual void Print(
213             std::wostream& os,
214             int depth = 0
215         ) const = 0;
216     };
217 
218 
219     // SymbolIdentifier represents things translated as <mi>.
220     struct SymbolIdentifier : Symbol
221     {
SymbolIdentifierSymbolIdentifier222         SymbolIdentifier(
223             const std::wstring& text,
224             MathmlFont font,
225             Style style,
226             Flavour flavour,
227             Limits limits,
228             RGBColour colour
229         ) :
230             Symbol(text, font, style, flavour, limits, colour)
231         { }
232 
233         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
234             const MathmlOptions& options,
235             const MathmlEnvironment& inheritedEnvironment,
236             unsigned& nodeCount
237         ) const;
238 
239         virtual void Print(
240             std::wostream& os,
241             int depth = 0
242         ) const;
243     };
244 
245 
246     // SymbolNumber represents things translated as <mn>.
247     struct SymbolNumber : Symbol
248     {
SymbolNumberSymbolNumber249         SymbolNumber(
250             const std::wstring& text,
251             MathmlFont font,
252             Style style,
253             Flavour flavour,
254             Limits limits,
255             RGBColour colour
256         ) :
257             Symbol(text, font, style, flavour, limits, colour)
258         { }
259 
260         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
261             const MathmlOptions& options,
262             const MathmlEnvironment& inheritedEnvironment,
263             unsigned& nodeCount
264         ) const;
265 
266         virtual void Print(
267             std::wostream& os,
268             int depth = 0
269         ) const;
270     };
271 
272 
273     // SymbolText represents things translated as <mtext>.
274     //
275     // Actually, each SymbolText represents just a single character;
276     // they get merged by their parent's Row::BuildMathmlTree() function.
277     struct SymbolText : Symbol
278     {
SymbolTextSymbolText279         SymbolText(
280             const std::wstring& text,
281             MathmlFont font,
282             Style style,
283             RGBColour colour
284         ) :
285             Symbol(
286                 text, font, style, cFlavourOrd, cLimitsDisplayLimits, colour
287             )
288         { }
289 
290         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
291             const MathmlOptions& options,
292             const MathmlEnvironment& inheritedEnvironment,
293             unsigned& nodeCount
294         ) const;
295 
296         virtual void Print(
297             std::wostream& os,
298             int depth = 0
299         ) const;
300     };
301 
302 
303     // SymbolOperator represents things translated as <mo>.
304     struct SymbolOperator : Symbol
305     {
306         // Whether or not this operator is stretchy.
307         //
308         // Note: because of the existence of the MathML operator dictionary,
309         // BuildMathmlTree() needs to do a bit of work to decide whether
310         // to actually use a "stretchy" attribute to implement this flag.
311         bool mIsStretchy;
312 
313         // mSize, if non-empty, indicates the "minsize" and "maxsize"
314         // attributes. It is only valid if mIsStretchy is true.
315         std::wstring mSize;
316 
317         // Whether to use the accent="true" attribute.
318         //
319         // Again, BuildMathmlTree needs to do some work to decide if the
320         // "accent" attribute is actually needed.
321         bool mIsAccent;
322 
SymbolOperatorSymbolOperator323         SymbolOperator(
324             bool isStretchy,
325             const std::wstring& size,
326             bool isAccent,
327             const std::wstring& text,
328             MathmlFont font,
329             Style style,
330             Flavour flavour,
331             Limits limits,
332             RGBColour colour
333         ) :
334             Symbol(text, font, style, flavour, limits, colour),
335             mIsStretchy(isStretchy),
336             mSize(size),
337             mIsAccent(isAccent)
338         { }
339 
340         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
341             const MathmlOptions& options,
342             const MathmlEnvironment& inheritedEnvironment,
343             unsigned& nodeCount
344         ) const;
345 
346         virtual void Print(
347             std::wostream& os,
348             int depth = 0
349         ) const;
350     };
351 
352 
353     // Represents a space. This may or not actually end up as MathML markup,
354     // depending on a variety of things.
355     struct Space : Node
356     {
357         // mWidth is the width of the space, measured in mu.
358         // (18mu = 1em in normal font size.)
359         // It may be negative.
360         int mWidth;
361 
362         // This flag indicates whether the space was requested by the user
363         // via a TeX spacing command like "\quad". False means that blahtex
364         // computed the space (according to TeX's rules).
365         bool mIsUserRequested;
366 
SpaceSpace367         Space(
368             int width,
369             bool isUserRequested
370         ) :
371             Node(cStyleDisplay, cFlavourOrd, cLimitsDisplayLimits, 0),
372             mWidth(width),
373             mIsUserRequested(isUserRequested)
374         { }
375 
376         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
377             const MathmlOptions& options,
378             const MathmlEnvironment& inheritedEnvironment,
379             unsigned& nodeCount
380         ) const;
381 
382         virtual void Print(
383             std::wostream& os,
384             int depth = 0
385         ) const;
386     };
387 
388 
389     // Represents a base with a subscript and/or a superscript,
390     // OR a base with an underscript and/or an overscript.
391     struct Scripts : Node
392     {
393         // Any of the following three fields may be NULL (i.e. empty).
394         std::auto_ptr<Node> mBase, mUpper, mLower;
395 
396         // True means sub/superscript; false means under/overscript.
397         //
398         // (This flag is computed from e.g. the "limits" setting of mBase,
399         // and from the current TeX style.)
400         bool mIsSideset;
401 
ScriptsScripts402         Scripts(
403             Style style,
404             Flavour flavour,
405             Limits limits,
406             RGBColour colour,
407             bool isSideset,
408             std::auto_ptr<Node> base,
409             std::auto_ptr<Node> upper,
410             std::auto_ptr<Node> lower
411         ) :
412             Node(style, flavour, limits, colour),
413             mIsSideset(isSideset),
414             mBase(base),
415             mUpper(upper),
416             mLower(lower)
417         { }
418 
419         virtual void Optimise();
420 
421         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
422             const MathmlOptions& options,
423             const MathmlEnvironment& inheritedEnvironment,
424             unsigned& nodeCount
425         ) const;
426 
427         virtual void Print(
428             std::wostream& os,
429             int depth = 0
430         ) const;
431     };
432 
433 
434     // Represents something that will get translated as <mfrac>.
435     struct Fraction : Node
436     {
437         std::auto_ptr<Node> mNumerator, mDenominator;
438 
439         // Does the fraction need a visible line?
440         // True for ordinary vanilla fractions; false for things like
441         // binomial coefficients.
442         bool mIsLineVisible;
443 
FractionFraction444         Fraction(
445             Style style,
446             RGBColour colour,
447             std::auto_ptr<Node> numerator,
448             std::auto_ptr<Node> denominator,
449             bool isLineVisible
450         ) :
451             Node(style, cFlavourOrd, cLimitsDisplayLimits, colour),
452             mNumerator(numerator),
453             mDenominator(denominator),
454             mIsLineVisible(isLineVisible)
455         { }
456 
457         virtual void Optimise();
458 
459         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
460             const MathmlOptions& options,
461             const MathmlEnvironment& inheritedEnvironment,
462             unsigned& nodeCount
463         ) const;
464 
465         virtual void Print(
466             std::wostream& os,
467             int depth = 0
468         ) const;
469     };
470 
471 
472     // Represents an expression between a pair of delimiters.
473     //
474     // (Blahtex doesn't translate this using <mfenced>, because then we
475     // couldn't use more exotic (non-ASCII) fences in the "open" and
476     // "close" attributes.)
477     struct Fenced : Node
478     {
479         // The opening and closing delimiters, i.e. the text that goes
480         // inside <mo>...</mo>.
481         std::wstring mLeftDelimiter, mRightDelimiter;
482 
483         // The expression being surrounded by fences.
484         std::auto_ptr<Node> mChild;
485 
FencedFenced486         Fenced(
487             Style style,
488             RGBColour colour,
489             const std::wstring& leftDelimiter,
490             const std::wstring& rightDelimiter,
491             std::auto_ptr<Node> child
492         ) :
493             Node(style, cFlavourInner, cLimitsDisplayLimits, colour),
494             mLeftDelimiter(leftDelimiter),
495             mRightDelimiter(rightDelimiter),
496             mChild(child)
497         { }
498 
499         virtual void Optimise();
500 
501         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
502             const MathmlOptions& options,
503             const MathmlEnvironment& inheritedEnvironment,
504             unsigned& nodeCount
505         ) const;
506 
507         virtual void Print(
508             std::wostream& os,
509             int depth = 0
510         ) const;
511     };
512 
513 
514     // Represents an expression under a square root sign; i.e. something
515     // translated as <msqrt>.
516     struct Sqrt : Node
517     {
518         // The expression under the radical.
519         std::auto_ptr<Node> mChild;
520 
SqrtSqrt521         Sqrt(
522             std::auto_ptr<Node> child,
523             RGBColour colour
524         ) :
525             Node(child->mStyle, cFlavourOrd, cLimitsDisplayLimits, colour),
526             mChild(child)
527         { }
528 
529         virtual void Optimise();
530 
531         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
532             const MathmlOptions& options,
533             const MathmlEnvironment& inheritedEnvironment,
534             unsigned& nodeCount
535         ) const;
536 
537         virtual void Print(
538             std::wostream& os,
539             int depth = 0
540         ) const;
541     };
542 
543 
544     // Represents an expression under a general radical sign; i.e. something
545     // translated as <mroot>.
546     struct Root : Node
547     {
548         // The expressions under and outside the radical.
549         std::auto_ptr<Node> mInside, mOutside;
550 
RootRoot551         Root(
552             std::auto_ptr<Node> inside,
553             std::auto_ptr<Node> outside,
554             RGBColour colour
555         ) :
556             Node(inside->mStyle, cFlavourOrd, cLimitsDisplayLimits, colour),
557             mInside(inside),
558             mOutside(outside)
559         { }
560 
561         virtual void Optimise();
562 
563         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
564             const MathmlOptions& options,
565             const MathmlEnvironment& inheritedEnvironment,
566             unsigned& nodeCount
567         ) const;
568 
569         virtual void Print(
570             std::wostream& os,
571             int depth = 0
572         ) const;
573     };
574 
575 
576     // Represents something translated as <mtable>.
577     struct Table : Node
578     {
579         // Array of rows of table entries.
580         std::vector<std::vector<Node*> > mRows;
581 
582         // These values describe the possible alignment values for the
583         // table. Most environments (e.g. "matrix", "pmatrix") use
584         // cAlignCentre. The environments "cases" uses cAlignLeft (all table
585         // entries aligned to the left). cAlignRightLeft alternates columns
586         // aligned right and left; it's used for the "aligned" environment.
587         enum Align
588         {
589             cAlignLeft,
590             cAlignCentre,
591             cAlignRightLeft
592         }
593         mAlign;
594 
595         // How much space to put between rows of the table. Currently
596         // "tight" is used for "\substack" blocks, everything else
597         // gets "normal".
598         enum RowSpacing
599         {
600             cRowSpacingNormal,
601             cRowSpacingTight
602         }
603         mRowSpacing;
604 
605         Table(
606             Style style,
607             RGBColour colour,
608             RowSpacing rowSpacing = cRowSpacingNormal
609         ) :
NodeTable610             Node(style, cFlavourOrd, cLimitsDisplayLimits, colour),
611             mAlign(cAlignCentre),
612             mRowSpacing(rowSpacing)
613         { }
614 
615         ~Table();
616 
617         virtual void Optimise();
618 
619         virtual std::auto_ptr<MathmlNode> BuildMathmlTree(
620             const MathmlOptions& options,
621             const MathmlEnvironment& inheritedEnvironment,
622             unsigned& nodeCount
623         ) const;
624 
625         virtual void Print(
626             std::wostream& os,
627             int depth = 0
628         ) const;
629     };
630 
631 } // end LayoutTree namespace
632 
633 
634 // This struct records some information about the rendering environment for
635 // a portion of the MathML tree. It is used when building the MathML tree
636 // to decide when it is necessary to insert additional <mstyle> tags.
637 struct MathmlEnvironment
638 {
639     // The "displaystyle" and "scriptlevel" attributes.
640     bool mDisplayStyle;
641     int mScriptLevel;
642 
643     // The "mathcolor" attribute.
644     RGBColour mColour;
645 
646     MathmlEnvironment(
647         bool displayStyle = false,
648         int scriptLevel = 0,
649         RGBColour colour = 0
650     ) :
mDisplayStyleMathmlEnvironment651         mDisplayStyle(displayStyle),
652         mScriptLevel(scriptLevel),
653         mColour(colour)
654     { }
655 
656     // This constructor determines the displayStyle and scriptLevel settings
657     // corresponding to the given TeX style.
658     MathmlEnvironment(
659         LayoutTree::Node::Style style,
660         RGBColour colour
661     );
662 };
663 
664 }
665 
666 #endif
667 
668 // end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
669