1 /*
2 blahtex: a TeX to MathML converter designed with MediaWiki in mind
3 blahtexml: an extension of blahtex with XML processing in mind
4 http://gva.noekeon.org/blahtexml
5 
6 Copyright (c) 2006, David Harvey
7 Copyright (c) 2009, Gilles Van Assche
8 All rights reserved.
9 
10 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
11 
12     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
13     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
14     * Neither the names of the authors nor the names of their affiliation may be used to endorse or promote products derived from this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
17 */
18 
19 #ifndef BLAHTEX_MISC_H
20 #define BLAHTEX_MISC_H
21 
22 
23 #include <set>
24 #include <vector>
25 #include <string>
26 
27 #include "Token.h"
28 
29 
30 // I use wishful_hash_set/map wherever I really want to use hash_set/map.
31 // Unfortunately hash_set/map is not quite standard enough yet, so for now
32 // it just gets mapped to set/map.
33 #define  wishful_hash_map  std::map
34 #define  wishful_hash_set  std::set
35 
36 
37 // The macro END_ARRAY is used in several places to simplify code that
38 // constructs an STL container from an array of data.
39 // (Yes, I hate macros too. Sorry.)
40 #define END_ARRAY(zzz_array) \
41     ((zzz_array) + sizeof(zzz_array)/sizeof((zzz_array)[0]))
42 
43 
44 namespace blahtex
45 {
46 
47 
48 // Stores colours in 0x00rrggbb format.
49 // Better be 32 bits wide!
50 typedef unsigned RGBColour;
51 
52 
53 // The blahtex core throws an Exception object when it detects the input is
54 // invalid in some way. This doesn't include more serious things like debug
55 // assertions (these are thrown as std::logic_error).
56 //
57 // Each exception consists of an identifying string (mCode) plus zero or
58 // more arguments (mArgs). This scheme is designed to facilitate
59 // localisation of error messages. A complete list of corresponding
60 // messages in English is provided in Messages.cpp (not part of the
61 // blahtex core).
62 class Exception
63 {
64 private:
65     std::wstring mCode;
66     std::vector<std::wstring> mArgs;
67 
68 public:
Exception()69     Exception()
70     {
71     }
72 
73     Exception(
74         const std::wstring& code,
75         const std::wstring& arg1 = L"",
76         const std::wstring& arg2 = L"",
77         const std::wstring& arg3 = L""
78     ) :
mCode(code)79         mCode(code)
80     {
81         if (!arg1.empty())
82             mArgs.push_back(arg1);
83         if (!arg2.empty())
84             mArgs.push_back(arg2);
85         if (!arg3.empty())
86             mArgs.push_back(arg3);
87     }
88 
GetCode()89     const std::wstring& GetCode() const
90     {
91         return mCode;
92     }
93 
GetArgs()94     const std::vector<std::wstring>& GetArgs() const
95     {
96         return mArgs;
97 	}
98 };
99 
100 
101 class TokenException : public Exception
102 {
103 private:
104 	Token mToken;
105 
106 public:
TokenException(const std::wstring & code,const Token & token)107 	TokenException(const std::wstring & code, const Token & token)
108 					: Exception(code, L"", L"", L""), mToken(token) {}
109 
TokenException(const std::wstring & code,const std::wstring & arg1,const Token & token)110 	TokenException(const std::wstring & code, const std::wstring & arg1, const Token & token)
111 					: Exception(code, arg1, L"", L""), mToken(token) {}
112 
getToken()113 	const Token & getToken() const {
114 		return mToken;
115 	}
116 };
117 
118 
119 // EncodingOptions describes output character encoding options.
120 struct EncodingOptions
121 {
122     // mMathmlEncoding tells what to do with non-ASCII MathML characters.
123     // It corresponds to the "--mathml-encoding" option on the command line.
124     enum MathmlEncoding
125     {
126         cMathmlEncodingRaw,         // directly in unicode
127         cMathmlEncodingNumeric,     // use e.g. "&#x2329;"
128         cMathmlEncodingShort,       // use e.g. "&lang;"
129         cMathmlEncodingLong         // use e.g. "&LeftAngleBracket;"
130     }
131     mMathmlEncoding;
132 
133     // mOtherEncodingRaw tells what to do with non-ASCII, non-MathML
134     // characters:
135     // * true means use unicode directly
136     // * false means use e.g. "&#x1234;"
137     bool mOtherEncodingRaw;
138 
139     // mAllowPlane1 tells whether to allow unicode plane-1 characters.
140     // (This facility is included because some browsers don't have decent
141     // support for plane 1 characters.)
142     //
143     // If this flag is NOT set, then blahtex will never output things like
144     // "&#x1d504;", even when mMathmlEncoding is set to cMathmlEncodingRaw
145     // or cMathmlEncodingNumeric. Instead it will fall back on something
146     // like "&Afr;".
147     //
148     // (This flag is also present in struct MathmlOptions.)
149     bool mAllowPlane1;
150 
EncodingOptionsEncodingOptions151     EncodingOptions() :
152         mMathmlEncoding(cMathmlEncodingNumeric),
153         mOtherEncodingRaw(false),
154         mAllowPlane1(true)
155     { }
156 };
157 
158 
159 // MathmlOptions stores options that affect the MathML output.
160 struct MathmlOptions
161 {
162     // mSpacingControl controls blahtex's MathML spacing markup output. It
163     // corresponds to the command line "--spacing" option.
164     //
165     // Blahtex always uses TeX's rules (or an approximation thereof) to
166     // determine spacing, but the SpacingControl values describe how much of
167     // the time it actually outputs markup (<mspace>, lspace, rspace) to
168     // implement its spacing decisions.
169     //
170     // cSpacingControlStrict:
171     //     Blahtex outputs spacing commands everywhere possible, doesn't
172     //     leave any choice to the MathML renderer.
173     //
174     // cSpacingControlModerate:
175     //     Blahtex outputs spacing commands where it thinks a typical MathML
176     //     renderer is likely to do something visually unsatisfactory
177     //     without additional help. The aim is to get good agreement with
178     //     TeX without overly bloated MathML markup. (It's very difficult
179     //     to get this right, so I expect it to be under continual review.)
180     //
181     // cSpacingControlRelaxed:
182     //     Blahtex only outputs spacing commands when the user specifically
183     //     asks for them, using TeX commands like "\," or "\quad".
184     enum SpacingControl
185     {
186         cSpacingControlStrict,
187         cSpacingControlModerate,
188         cSpacingControlRelaxed
189     }
190     mSpacingControl;
191 
192     // If mUseVersion1FontAttributes is set, blahtex will use MathML version
193     // 1 font attributes (fontstyle, fontweight, fontfamily) instead of
194     // mathvariant, and it will handle the fancier fonts (script,
195     // bold-script, fraktur, bold-fraktur, double-struck) by explicitly
196     // using appropriate MathML entities (e.g. "&Afr;").
197     bool mUseVersion1FontAttributes;
198 
199     // Discussed at struct EncodingOptions.
200     bool mAllowPlane1;
201 
MathmlOptionsMathmlOptions202     MathmlOptions() :
203         mSpacingControl(cSpacingControlStrict),
204         mUseVersion1FontAttributes(false),
205         mAllowPlane1(true)
206     { }
207 };
208 
209 
210 // This class contains options to control how blahtex generates
211 // "purified Tex", that is, the .tex file which is sent to LaTeX to
212 // generate PNG output.
213 struct PurifiedTexOptions
214 {
215     // Generate display math instead of inline math
216     bool mDisplayMath;
217 
218     // Blahtex may use "\usepackage[utf8x]{inputenc}" (which also requires
219     // the "ucs" package)
220     bool mAllowUcs;
221 
222     // Blahtex may use "\usepackage{CJK}"
223     bool mAllowCJK;
224 
225     // Blahtex may use the "preview" package.
226     bool mAllowPreview;
227 
228     // The font name (e.g. "ipam") which gets passed to "\begin{CJK}..."
229     // for handling japanese, or blank if no font is available.
230     std::wstring mJapaneseFont;
231 
232     // LaTeX preamble
233     std::wstring mLaTeXPreamble;
234 
235     // LaTeX content inserted before math
236     std::wstring mLaTeXBeforeMath;
237 
PurifiedTexOptionsPurifiedTexOptions238     PurifiedTexOptions() :
239         mDisplayMath(false),
240         mAllowUcs(false),
241         mAllowCJK(false),
242         mAllowPreview(false)
243     { }
244 };
245 
246 }
247 
248 #endif
249 
250 // end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
251