1 /* 2 blahtex: a TeX to MathML converter designed with MediaWiki in mind 3 blahtexml: an extension of blahtex with XML processing in mind 4 http://gva.noekeon.org/blahtexml 5 6 Copyright (c) 2006, David Harvey 7 Copyright (c) 2009, Gilles Van Assche 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 11 12 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 13 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 14 * Neither the names of the authors nor the names of their affiliation may be used to endorse or promote products derived from this software without specific prior written permission. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 17 */ 18 19 #ifndef BLAHTEX_MISC_H 20 #define BLAHTEX_MISC_H 21 22 23 #include <set> 24 #include <vector> 25 #include <string> 26 27 #include "Token.h" 28 29 30 // I use wishful_hash_set/map wherever I really want to use hash_set/map. 31 // Unfortunately hash_set/map is not quite standard enough yet, so for now 32 // it just gets mapped to set/map. 33 #define wishful_hash_map std::map 34 #define wishful_hash_set std::set 35 36 37 // The macro END_ARRAY is used in several places to simplify code that 38 // constructs an STL container from an array of data. 39 // (Yes, I hate macros too. Sorry.) 40 #define END_ARRAY(zzz_array) \ 41 ((zzz_array) + sizeof(zzz_array)/sizeof((zzz_array)[0])) 42 43 44 namespace blahtex 45 { 46 47 48 // Stores colours in 0x00rrggbb format. 49 // Better be 32 bits wide! 50 typedef unsigned RGBColour; 51 52 53 // The blahtex core throws an Exception object when it detects the input is 54 // invalid in some way. This doesn't include more serious things like debug 55 // assertions (these are thrown as std::logic_error). 56 // 57 // Each exception consists of an identifying string (mCode) plus zero or 58 // more arguments (mArgs). This scheme is designed to facilitate 59 // localisation of error messages. A complete list of corresponding 60 // messages in English is provided in Messages.cpp (not part of the 61 // blahtex core). 62 class Exception 63 { 64 private: 65 std::wstring mCode; 66 std::vector<std::wstring> mArgs; 67 68 public: Exception()69 Exception() 70 { 71 } 72 73 Exception( 74 const std::wstring& code, 75 const std::wstring& arg1 = L"", 76 const std::wstring& arg2 = L"", 77 const std::wstring& arg3 = L"" 78 ) : mCode(code)79 mCode(code) 80 { 81 if (!arg1.empty()) 82 mArgs.push_back(arg1); 83 if (!arg2.empty()) 84 mArgs.push_back(arg2); 85 if (!arg3.empty()) 86 mArgs.push_back(arg3); 87 } 88 GetCode()89 const std::wstring& GetCode() const 90 { 91 return mCode; 92 } 93 GetArgs()94 const std::vector<std::wstring>& GetArgs() const 95 { 96 return mArgs; 97 } 98 }; 99 100 101 class TokenException : public Exception 102 { 103 private: 104 Token mToken; 105 106 public: TokenException(const std::wstring & code,const Token & token)107 TokenException(const std::wstring & code, const Token & token) 108 : Exception(code, L"", L"", L""), mToken(token) {} 109 TokenException(const std::wstring & code,const std::wstring & arg1,const Token & token)110 TokenException(const std::wstring & code, const std::wstring & arg1, const Token & token) 111 : Exception(code, arg1, L"", L""), mToken(token) {} 112 getToken()113 const Token & getToken() const { 114 return mToken; 115 } 116 }; 117 118 119 // EncodingOptions describes output character encoding options. 120 struct EncodingOptions 121 { 122 // mMathmlEncoding tells what to do with non-ASCII MathML characters. 123 // It corresponds to the "--mathml-encoding" option on the command line. 124 enum MathmlEncoding 125 { 126 cMathmlEncodingRaw, // directly in unicode 127 cMathmlEncodingNumeric, // use e.g. "〈" 128 cMathmlEncodingShort, // use e.g. "⟨" 129 cMathmlEncodingLong // use e.g. "⟨" 130 } 131 mMathmlEncoding; 132 133 // mOtherEncodingRaw tells what to do with non-ASCII, non-MathML 134 // characters: 135 // * true means use unicode directly 136 // * false means use e.g. "ሴ" 137 bool mOtherEncodingRaw; 138 139 // mAllowPlane1 tells whether to allow unicode plane-1 characters. 140 // (This facility is included because some browsers don't have decent 141 // support for plane 1 characters.) 142 // 143 // If this flag is NOT set, then blahtex will never output things like 144 // "𝔄", even when mMathmlEncoding is set to cMathmlEncodingRaw 145 // or cMathmlEncodingNumeric. Instead it will fall back on something 146 // like "𝔄". 147 // 148 // (This flag is also present in struct MathmlOptions.) 149 bool mAllowPlane1; 150 EncodingOptionsEncodingOptions151 EncodingOptions() : 152 mMathmlEncoding(cMathmlEncodingNumeric), 153 mOtherEncodingRaw(false), 154 mAllowPlane1(true) 155 { } 156 }; 157 158 159 // MathmlOptions stores options that affect the MathML output. 160 struct MathmlOptions 161 { 162 // mSpacingControl controls blahtex's MathML spacing markup output. It 163 // corresponds to the command line "--spacing" option. 164 // 165 // Blahtex always uses TeX's rules (or an approximation thereof) to 166 // determine spacing, but the SpacingControl values describe how much of 167 // the time it actually outputs markup (<mspace>, lspace, rspace) to 168 // implement its spacing decisions. 169 // 170 // cSpacingControlStrict: 171 // Blahtex outputs spacing commands everywhere possible, doesn't 172 // leave any choice to the MathML renderer. 173 // 174 // cSpacingControlModerate: 175 // Blahtex outputs spacing commands where it thinks a typical MathML 176 // renderer is likely to do something visually unsatisfactory 177 // without additional help. The aim is to get good agreement with 178 // TeX without overly bloated MathML markup. (It's very difficult 179 // to get this right, so I expect it to be under continual review.) 180 // 181 // cSpacingControlRelaxed: 182 // Blahtex only outputs spacing commands when the user specifically 183 // asks for them, using TeX commands like "\," or "\quad". 184 enum SpacingControl 185 { 186 cSpacingControlStrict, 187 cSpacingControlModerate, 188 cSpacingControlRelaxed 189 } 190 mSpacingControl; 191 192 // If mUseVersion1FontAttributes is set, blahtex will use MathML version 193 // 1 font attributes (fontstyle, fontweight, fontfamily) instead of 194 // mathvariant, and it will handle the fancier fonts (script, 195 // bold-script, fraktur, bold-fraktur, double-struck) by explicitly 196 // using appropriate MathML entities (e.g. "𝔄"). 197 bool mUseVersion1FontAttributes; 198 199 // Discussed at struct EncodingOptions. 200 bool mAllowPlane1; 201 MathmlOptionsMathmlOptions202 MathmlOptions() : 203 mSpacingControl(cSpacingControlStrict), 204 mUseVersion1FontAttributes(false), 205 mAllowPlane1(true) 206 { } 207 }; 208 209 210 // This class contains options to control how blahtex generates 211 // "purified Tex", that is, the .tex file which is sent to LaTeX to 212 // generate PNG output. 213 struct PurifiedTexOptions 214 { 215 // Generate display math instead of inline math 216 bool mDisplayMath; 217 218 // Blahtex may use "\usepackage[utf8x]{inputenc}" (which also requires 219 // the "ucs" package) 220 bool mAllowUcs; 221 222 // Blahtex may use "\usepackage{CJK}" 223 bool mAllowCJK; 224 225 // Blahtex may use the "preview" package. 226 bool mAllowPreview; 227 228 // The font name (e.g. "ipam") which gets passed to "\begin{CJK}..." 229 // for handling japanese, or blank if no font is available. 230 std::wstring mJapaneseFont; 231 232 // LaTeX preamble 233 std::wstring mLaTeXPreamble; 234 235 // LaTeX content inserted before math 236 std::wstring mLaTeXBeforeMath; 237 PurifiedTexOptionsPurifiedTexOptions238 PurifiedTexOptions() : 239 mDisplayMath(false), 240 mAllowUcs(false), 241 mAllowCJK(false), 242 mAllowPreview(false) 243 { } 244 }; 245 246 } 247 248 #endif 249 250 // end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 251