1 /*
2 blahtex: a TeX to MathML converter designed with MediaWiki in mind
3 blahtexml: an extension of blahtex with XML processing in mind
4 http://gva.noekeon.org/blahtexml
5
6 Copyright (c) 2006, David Harvey
7 All rights reserved.
8
9 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
10
11 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13 * Neither the names of the authors nor the names of their affiliation may be used to endorse or promote products derived from this software without specific prior written permission.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16 */
17
18 #include <iomanip>
19 #include <sstream>
20 #include <map>
21 #include <stdint.h>
22 #include "XmlEncode.h"
23
24 using namespace std;
25
26 namespace blahtex
27 {
28
29 struct UnicodeNameInfo
30 {
31 wstring mShortName;
32 wstring mLongName;
33
UnicodeNameInfoblahtex::UnicodeNameInfo34 UnicodeNameInfo()
35 { }
36
UnicodeNameInfoblahtex::UnicodeNameInfo37 UnicodeNameInfo(
38 const wstring& shortName
39 ) :
40 mShortName(shortName)
41 { }
42
UnicodeNameInfoblahtex::UnicodeNameInfo43 UnicodeNameInfo(
44 const wstring& shortName,
45 const wstring& longName
46 ) :
47 mShortName(shortName),
48 mLongName(longName)
49 { }
50 };
51
52 // This table lists all the non-ASCII characters that blahtex can give
53 // names to. For each one it possibly lists a short and long MathML name.
54 pair<uint32_t, UnicodeNameInfo> gUnicodeNameArray[] =
55 {
56 make_pair(0x00000060, UnicodeNameInfo(L"grave", L"DiacriticalGrave")),
57 make_pair(0x000000A0, UnicodeNameInfo(L"nbsp", L"NonBreakingSpace")),
58 make_pair(0x000000A5, UnicodeNameInfo(L"yen")),
59 make_pair(0x000000A7, UnicodeNameInfo(L"sect")),
60 make_pair(0x000000AC, UnicodeNameInfo(L"not")),
61 make_pair(0x000000AE, UnicodeNameInfo(L"reg", L"circledR")),
62 make_pair(0x000000AF, UnicodeNameInfo(L"macr", L"OverBar")),
63 make_pair(0x000000B1, UnicodeNameInfo(L"pm", L"PlusMinus")),
64 make_pair(0x000000B4, UnicodeNameInfo(L"acute", L"DiacriticalAcute")),
65 make_pair(0x000000B6, UnicodeNameInfo(L"para")),
66 make_pair(0x000000B7, UnicodeNameInfo(L"middot", L"CenterDot")),
67 make_pair(0x000000C5, UnicodeNameInfo(L"Aring")),
68 make_pair(0x000000D7, UnicodeNameInfo(L"times")),
69 make_pair(0x000000D8, UnicodeNameInfo(L"Oslash")),
70 make_pair(0x000000E5, UnicodeNameInfo(L"aring")),
71 make_pair(0x000000F0, UnicodeNameInfo(L"eth")),
72 make_pair(0x000000F7, UnicodeNameInfo(L"div", L"divide")),
73 make_pair(0x00000127, UnicodeNameInfo(L"hstrok")),
74 make_pair(0x00000131, UnicodeNameInfo(L"imath")),
75 make_pair(0x000002C7, UnicodeNameInfo(L"caron", L"Hacek")),
76 make_pair(0x000002D8, UnicodeNameInfo(L"breve", L"Breve")),
77 make_pair(0x000002DC, UnicodeNameInfo(L"tilde", L"DiacriticalTilde")),
78 make_pair(0x00000338, UnicodeNameInfo()), // FIX: combining character that needs some thought
79 make_pair(0x00000393, UnicodeNameInfo(L"Gamma")),
80 make_pair(0x00000394, UnicodeNameInfo(L"Delta")),
81 make_pair(0x00000398, UnicodeNameInfo(L"Theta")),
82 make_pair(0x0000039B, UnicodeNameInfo(L"Lambda")),
83 make_pair(0x0000039E, UnicodeNameInfo(L"Xi")),
84 make_pair(0x000003A0, UnicodeNameInfo(L"Pi")),
85 make_pair(0x000003A3, UnicodeNameInfo(L"Sigma")),
86 make_pair(0x000003A5, UnicodeNameInfo(L"Upsilon")),
87 make_pair(0x000003A6, UnicodeNameInfo(L"Phi")),
88 make_pair(0x000003A8, UnicodeNameInfo(L"Psi")),
89 make_pair(0x000003A9, UnicodeNameInfo(L"Omega")),
90 make_pair(0x000003B1, UnicodeNameInfo(L"alpha")),
91 make_pair(0x000003B2, UnicodeNameInfo(L"beta")),
92 make_pair(0x000003B3, UnicodeNameInfo(L"gamma")),
93 make_pair(0x000003B4, UnicodeNameInfo(L"delta")),
94 make_pair(0x000003B5, UnicodeNameInfo(L"epsiv", L"varepsilon")),
95 make_pair(0x000003B6, UnicodeNameInfo(L"zeta")),
96 make_pair(0x000003B7, UnicodeNameInfo(L"eta")),
97 make_pair(0x000003B8, UnicodeNameInfo(L"theta")),
98 make_pair(0x000003B9, UnicodeNameInfo(L"iota")),
99 make_pair(0x000003BA, UnicodeNameInfo(L"kappa")),
100 make_pair(0x000003BB, UnicodeNameInfo(L"lambda")),
101 make_pair(0x000003BC, UnicodeNameInfo(L"mu")),
102 make_pair(0x000003BD, UnicodeNameInfo(L"nu")),
103 make_pair(0x000003BE, UnicodeNameInfo(L"xi")),
104 make_pair(0x000003C0, UnicodeNameInfo(L"pi")),
105 make_pair(0x000003C1, UnicodeNameInfo(L"rho")),
106 make_pair(0x000003C2, UnicodeNameInfo(L"sigmav", L"varsigma")),
107 make_pair(0x000003C3, UnicodeNameInfo(L"sigma")),
108 make_pair(0x000003C4, UnicodeNameInfo(L"tau")),
109 make_pair(0x000003C5, UnicodeNameInfo(L"upsi", L"upsilon")),
110 #if 0
111 // FIX: note Firefox 1.5 has φ and ϕ around the wrong
112 // way, so better to stick with numeric codes for 0x3C6 and 0x3D5.
113 // See mozilla bug 321438.
114 make_pair(0x000003C6, UnicodeNameInfo(L"phiv", L"varphi")),
115 make_pair(0x000003D5, UnicodeNameInfo(L"phi", L"straightphi")),
116 #endif
117 make_pair(0x000003C7, UnicodeNameInfo(L"chi")),
118 make_pair(0x000003C8, UnicodeNameInfo(L"psi")),
119 make_pair(0x000003C9, UnicodeNameInfo(L"omega")),
120 make_pair(0x000003D1, UnicodeNameInfo(L"thetav", L"vartheta")),
121 make_pair(0x000003D6, UnicodeNameInfo(L"piv", L"varpi")),
122 make_pair(0x000003DD, UnicodeNameInfo(L"gammad", L"digamma")),
123 make_pair(0x000003F0, UnicodeNameInfo(L"kappav", L"varkappa")),
124 make_pair(0x000003F1, UnicodeNameInfo(L"rhov", L"varrho")),
125 make_pair(0x000003F5, UnicodeNameInfo(L"epsi", L"straightepsilon")),
126 make_pair(0x000003F6, UnicodeNameInfo(L"bepsi", L"backepsilon")),
127 make_pair(0x00002020, UnicodeNameInfo(L"dagger")),
128 make_pair(0x00002021, UnicodeNameInfo(L"Dagger", L"ddagger")),
129 make_pair(0x00002022, UnicodeNameInfo(L"bull", L"bullet")),
130 make_pair(0x00002026, UnicodeNameInfo(L"hellip")),
131 make_pair(0x00002032, UnicodeNameInfo(L"prime")),
132 make_pair(0x00002035, UnicodeNameInfo(L"bprime", L"backprime")),
133 make_pair(0x00002102, UnicodeNameInfo(L"Copf", L"complexes")),
134 make_pair(0x0000210B, UnicodeNameInfo(L"Hscr", L"HilbertSpace")),
135 make_pair(0x0000210C, UnicodeNameInfo(L"Hfr", L"Poincareplane")),
136 make_pair(0x0000210D, UnicodeNameInfo(L"Hopf", L"quaternions")),
137 make_pair(0x0000210F, UnicodeNameInfo(L"hbar", L"planck")),
138 make_pair(0x00002110, UnicodeNameInfo(L"Iscr", L"imagline")),
139 make_pair(0x00002111, UnicodeNameInfo(L"Im", L"imagpart")),
140 make_pair(0x00002112, UnicodeNameInfo(L"Lscr", L"Laplacetrf")),
141 make_pair(0x00002113, UnicodeNameInfo(L"ell")),
142 make_pair(0x00002118, UnicodeNameInfo(L"wp", L"weierp")),
143 make_pair(0x00002119, UnicodeNameInfo(L"Popf", L"primes")),
144 make_pair(0x0000211A, UnicodeNameInfo(L"Qopf", L"rationals")),
145 make_pair(0x0000211B, UnicodeNameInfo(L"Rscr", L"realine")),
146 make_pair(0x0000211C, UnicodeNameInfo(L"Re", L"realpart")),
147 make_pair(0x0000211D, UnicodeNameInfo(L"Ropf", L"reals")),
148 make_pair(0x00002124, UnicodeNameInfo(L"Zopf", L"integers")),
149 make_pair(0x00002127, UnicodeNameInfo(L"mho")),
150 make_pair(0x00002128, UnicodeNameInfo(L"Zfr", L"zeetrf")),
151 make_pair(0x0000212C, UnicodeNameInfo(L"Bscr", L"Bernoullis")),
152 make_pair(0x0000212D, UnicodeNameInfo(L"Cfr", L"Cayleys")),
153 make_pair(0x00002130, UnicodeNameInfo(L"Escr", L"expectation")),
154 make_pair(0x00002131, UnicodeNameInfo(L"Fscr", L"Fouriertrf")),
155 make_pair(0x00002133, UnicodeNameInfo(L"Mscr", L"Mellintrf")),
156 make_pair(0x00002135, UnicodeNameInfo(L"aleph")),
157 make_pair(0x00002136, UnicodeNameInfo(L"beth")),
158 make_pair(0x00002137, UnicodeNameInfo(L"gimel")),
159 make_pair(0x00002138, UnicodeNameInfo(L"daleth")),
160 make_pair(0x00002190, UnicodeNameInfo(L"larr", L"LeftArrow")),
161 make_pair(0x00002191, UnicodeNameInfo(L"uarr", L"UpArrow")),
162 make_pair(0x00002192, UnicodeNameInfo(L"rarr", L"RightArrow")),
163 make_pair(0x00002193, UnicodeNameInfo(L"darr", L"DownArrow")),
164 make_pair(0x00002194, UnicodeNameInfo(L"harr", L"LeftRightArrow")),
165 make_pair(0x00002195, UnicodeNameInfo(L"varr", L"UpDownArrow")),
166 make_pair(0x00002196, UnicodeNameInfo(L"nwarr", L"UpperLeftArrow")),
167 make_pair(0x00002197, UnicodeNameInfo(L"nearr", L"UpperRightArrow")),
168 make_pair(0x00002198, UnicodeNameInfo(L"searr", L"LowerRightArrow")),
169 make_pair(0x00002199, UnicodeNameInfo(L"swarr", L"LowerLeftArrow")),
170 make_pair(0x0000219A, UnicodeNameInfo(L"nlarr", L"nleftarrow")),
171 make_pair(0x0000219B, UnicodeNameInfo(L"nrarr", L"nrightarrow")),
172 make_pair(0x0000219D, UnicodeNameInfo(L"rarrw", L"rightsquigarrow")),
173 make_pair(0x0000219E, UnicodeNameInfo(L"Larr", L"twoheadleftarrow")),
174 make_pair(0x000021A0, UnicodeNameInfo(L"Rarr", L"twoheadrightarrow")),
175 make_pair(0x000021A2, UnicodeNameInfo(L"larrtl", L"leftarrowtail")),
176 make_pair(0x000021A3, UnicodeNameInfo(L"rarrtl", L"rightarrowtail")),
177 make_pair(0x000021A6, UnicodeNameInfo(L"map", L"RightTeeArrow")),
178 make_pair(0x000021A9, UnicodeNameInfo(L"larrhk", L"hookleftarrow")),
179 make_pair(0x000021AA, UnicodeNameInfo(L"rarrhk", L"hookrightarrow")),
180 make_pair(0x000021AB, UnicodeNameInfo(L"larrlp", L"looparrowleft")),
181 make_pair(0x000021AC, UnicodeNameInfo(L"rarrlp", L"looparrowright")),
182 make_pair(0x000021AD, UnicodeNameInfo(L"harrw", L"leftrightsquigarrow")),
183 make_pair(0x000021AE, UnicodeNameInfo(L"nharr", L"nleftrightarrow")),
184 make_pair(0x000021B0, UnicodeNameInfo(L"lsh", L"Lsh")),
185 make_pair(0x000021B1, UnicodeNameInfo(L"rsh", L"Rsh")),
186 make_pair(0x000021B6, UnicodeNameInfo(L"cularr", L"curvearrowleft")),
187 make_pair(0x000021B7, UnicodeNameInfo(L"curarr", L"curvearrowright")),
188 make_pair(0x000021BA, UnicodeNameInfo(L"olarr", L"circlearrowleft")),
189 make_pair(0x000021BB, UnicodeNameInfo(L"orarr", L"circlearrowright")),
190 make_pair(0x000021BC, UnicodeNameInfo(L"lharu", L"leftharpoonup")),
191 make_pair(0x000021BD, UnicodeNameInfo(L"lhard", L"leftharpoondown")),
192 make_pair(0x000021BE, UnicodeNameInfo(L"uharr", L"upharpoonright")),
193 make_pair(0x000021BF, UnicodeNameInfo(L"uharl", L"upharpoonleft")),
194 make_pair(0x000021C0, UnicodeNameInfo(L"rharu", L"rightharpoonup")),
195 make_pair(0x000021C1, UnicodeNameInfo(L"rhard", L"rightharpoondown")),
196 make_pair(0x000021C2, UnicodeNameInfo(L"dharr", L"downharpoonright")),
197 make_pair(0x000021C3, UnicodeNameInfo(L"dharl", L"downharpoonleft")),
198 make_pair(0x000021C4, UnicodeNameInfo(L"rlarr", L"RightArrowLeftArrow")),
199 make_pair(0x000021C6, UnicodeNameInfo(L"lrarr", L"LeftArrowRightArrow")),
200 make_pair(0x000021C7, UnicodeNameInfo(L"llarr", L"leftleftarrows")),
201 make_pair(0x000021C8, UnicodeNameInfo(L"uuarr", L"upuparrows")),
202 make_pair(0x000021C9, UnicodeNameInfo(L"rrarr", L"rightrightarrows")),
203 make_pair(0x000021CA, UnicodeNameInfo(L"ddarr", L"downdownarrows")),
204 make_pair(0x000021CB, UnicodeNameInfo(L"lrhar", L"ReverseEquilibrium")),
205 make_pair(0x000021CC, UnicodeNameInfo(L"rlhar", L"Equilibrium")),
206 make_pair(0x000021CD, UnicodeNameInfo(L"nlArr", L"nLeftarrow")),
207 make_pair(0x000021CE, UnicodeNameInfo(L"nhArr", L"nLeftrightarrow")),
208 make_pair(0x000021CF, UnicodeNameInfo(L"nrArr", L"nRightarrow")),
209 make_pair(0x000021D0, UnicodeNameInfo(L"lArr", L"DoubleLeftArrow")),
210 make_pair(0x000021D1, UnicodeNameInfo(L"uArr", L"DoubleUpArrow")),
211 make_pair(0x000021D2, UnicodeNameInfo(L"rArr", L"DoubleRightArrow")),
212 make_pair(0x000021D3, UnicodeNameInfo(L"dArr", L"DoubleDownArrow")),
213 make_pair(0x000021D4, UnicodeNameInfo(L"hArr", L"DoubleLeftRightArrow")),
214 make_pair(0x000021D5, UnicodeNameInfo(L"vArr", L"DoubleUpDownArrow")),
215 make_pair(0x000021DA, UnicodeNameInfo(L"lAarr", L"Lleftarrow")),
216 make_pair(0x000021DB, UnicodeNameInfo(L"rAarr", L"Rrightarrow")),
217 make_pair(0x000021DD, UnicodeNameInfo(L"zigrarr")),
218 make_pair(0x00002200, UnicodeNameInfo(L"forall", L"ForAll")),
219 make_pair(0x00002201, UnicodeNameInfo(L"comp", L"complement")),
220 make_pair(0x00002202, UnicodeNameInfo(L"part", L"PartialD")),
221 make_pair(0x00002203, UnicodeNameInfo(L"exist", L"Exists")),
222 make_pair(0x00002204, UnicodeNameInfo(L"nexist", L"NotExists")),
223 make_pair(0x00002205, UnicodeNameInfo(L"empty", L"emptyset")),
224 make_pair(0x00002207, UnicodeNameInfo(L"nabla", L"Del")),
225 make_pair(0x00002208, UnicodeNameInfo(L"in", L"Element")),
226 make_pair(0x00002209, UnicodeNameInfo(L"notin", L"NotElement")),
227 make_pair(0x0000220B, UnicodeNameInfo(L"ni", L"ReverseElement")),
228 make_pair(0x0000220C, UnicodeNameInfo(L"notni", L"NotReverseElement")),
229 make_pair(0x0000220F, UnicodeNameInfo(L"prod", L"Product")),
230 make_pair(0x00002210, UnicodeNameInfo(L"coprod", L"Coproduct")),
231 make_pair(0x00002211, UnicodeNameInfo(L"sum", L"Sum")),
232 make_pair(0x00002213, UnicodeNameInfo(L"mp", L"MinusPlus")),
233 make_pair(0x00002214, UnicodeNameInfo(L"dotplus")),
234 make_pair(0x00002216, UnicodeNameInfo(L"setmn", L"Backslash")),
235 make_pair(0x00002218, UnicodeNameInfo(L"compfn", L"SmallCircle")),
236 make_pair(0x0000221A, UnicodeNameInfo(L"radic", L"Sqrt")),
237 make_pair(0x0000221D, UnicodeNameInfo(L"prop", L"Proportional")),
238 make_pair(0x0000221E, UnicodeNameInfo(L"infin")),
239 make_pair(0x00002220, UnicodeNameInfo(L"ang", L"angle")),
240 make_pair(0x00002221, UnicodeNameInfo(L"angmsd", L"measuredangle")),
241 make_pair(0x00002222, UnicodeNameInfo(L"angsph")),
242 make_pair(0x00002223, UnicodeNameInfo(L"mid", L"VerticalBar")),
243 make_pair(0x00002224, UnicodeNameInfo(L"nmid", L"NotVerticalBar")),
244 make_pair(0x00002225, UnicodeNameInfo(L"par", L"DoubleVerticalBar")),
245 make_pair(0x00002226, UnicodeNameInfo(L"npar", L"NotDoubleVerticalBar")),
246 make_pair(0x00002227, UnicodeNameInfo(L"and", L"wedge")),
247 make_pair(0x00002228, UnicodeNameInfo(L"or", L"vee")),
248 make_pair(0x00002229, UnicodeNameInfo(L"cap")),
249 make_pair(0x0000222A, UnicodeNameInfo(L"cup")),
250 make_pair(0x0000222B, UnicodeNameInfo(L"int", L"Integral")),
251 make_pair(0x0000222C, UnicodeNameInfo(L"Int")),
252 make_pair(0x0000222D, UnicodeNameInfo(L"tint", L"iiint")),
253 make_pair(0x0000222E, UnicodeNameInfo(L"conint", L"ContourIntegral")),
254 make_pair(0x00002234, UnicodeNameInfo(L"there4", L"Therefore")),
255 make_pair(0x00002235, UnicodeNameInfo(L"becaus", L"Because")),
256 make_pair(0x0000223C, UnicodeNameInfo(L"sim", L"Tilde")),
257 make_pair(0x0000223D, UnicodeNameInfo(L"bsim", L"backsim")),
258 make_pair(0x00002240, UnicodeNameInfo(L"wr", L"VerticalTilde")),
259 make_pair(0x00002241, UnicodeNameInfo(L"nsim", L"NotTilde")),
260 make_pair(0x00002242, UnicodeNameInfo(L"esim", L"EqualTilde")),
261 make_pair(0x00002243, UnicodeNameInfo(L"sime", L"TildeEqual")),
262 make_pair(0x00002244, UnicodeNameInfo(L"nsime", L"NotTildeEqual")),
263 make_pair(0x00002245, UnicodeNameInfo(L"cong", L"TildeFullEqual")),
264 make_pair(0x00002247, UnicodeNameInfo(L"ncong", L"NotTildeFullEqual")),
265 make_pair(0x00002248, UnicodeNameInfo(L"ap", L"TildeTilde")),
266 make_pair(0x00002249, UnicodeNameInfo(L"nap", L"NotTildeTilde")),
267 make_pair(0x0000224A, UnicodeNameInfo(L"ape", L"approxeq")),
268 make_pair(0x0000224E, UnicodeNameInfo(L"bump", L"HumpDownHump")),
269 make_pair(0x0000224F, UnicodeNameInfo(L"nbump", L"NotHumpDownHump")),
270 make_pair(0x00002250, UnicodeNameInfo(L"esdot", L"DotEqual")),
271 make_pair(0x00002251, UnicodeNameInfo(L"eDot", L"doteqdot")),
272 make_pair(0x00002252, UnicodeNameInfo(L"efDot", L"fallingdotseq")),
273 make_pair(0x00002253, UnicodeNameInfo(L"erDot", L"risingdotseq")),
274 make_pair(0x00002256, UnicodeNameInfo(L"ecir", L"eqcirc")),
275 make_pair(0x00002257, UnicodeNameInfo(L"cire", L"circeq")),
276 make_pair(0x0000225C, UnicodeNameInfo(L"trie", L"triangleq")),
277 make_pair(0x00002260, UnicodeNameInfo(L"ne", L"NotEqual")),
278 make_pair(0x00002261, UnicodeNameInfo(L"equiv", L"Congruent")),
279 make_pair(0x00002262, UnicodeNameInfo(L"nequiv", L"NotCongruent")),
280 make_pair(0x00002264, UnicodeNameInfo(L"le", L"leq")),
281 make_pair(0x00002265, UnicodeNameInfo(L"ge", L"GreaterEqual")),
282 make_pair(0x00002266, UnicodeNameInfo(L"lE", L"LessFullEqual")),
283 make_pair(0x00002267, UnicodeNameInfo(L"gE", L"GreaterFullEqual")),
284 make_pair(0x00002268, UnicodeNameInfo(L"lnE", L"lneqq")),
285 make_pair(0x00002269, UnicodeNameInfo(L"gnE", L"gneqq")),
286 make_pair(0x0000226A, UnicodeNameInfo(L"Lt", L"NestedLessLess")),
287 make_pair(0x0000226B, UnicodeNameInfo(L"Gt", L"NestedGreaterGreater")),
288 make_pair(0x0000226C, UnicodeNameInfo(L"twixt", L"between")),
289 make_pair(0x0000226E, UnicodeNameInfo(L"nlt", L"NotLess")),
290 make_pair(0x0000226F, UnicodeNameInfo(L"ngt", L"NotGreater")),
291 make_pair(0x00002270, UnicodeNameInfo(L"nle", L"NotLessEqual")),
292 make_pair(0x00002271, UnicodeNameInfo(L"nge", L"NotGreaterEqual")),
293 make_pair(0x00002272, UnicodeNameInfo(L"lsim", L"LessTilde")),
294 make_pair(0x00002273, UnicodeNameInfo(L"gsim", L"GreaterTilde")),
295 make_pair(0x00002276, UnicodeNameInfo(L"lg", L"LessGreater")),
296 make_pair(0x00002277, UnicodeNameInfo(L"gl", L"GreaterLess")),
297 make_pair(0x0000227A, UnicodeNameInfo(L"pr", L"Precedes")),
298 make_pair(0x0000227B, UnicodeNameInfo(L"sc", L"Succeeds")),
299 make_pair(0x0000227C, UnicodeNameInfo(L"prcue", L"PrecedesSlantEqual")),
300 make_pair(0x0000227D, UnicodeNameInfo(L"sccue", L"SucceedsSlantEqual")),
301 make_pair(0x0000227E, UnicodeNameInfo(L"prsim", L"PrecedesTilde")),
302 make_pair(0x0000227F, UnicodeNameInfo(L"scsim", L"SucceedsTilde")),
303 make_pair(0x00002280, UnicodeNameInfo(L"npr", L"NotPrecedes")),
304 make_pair(0x00002281, UnicodeNameInfo(L"nsc", L"NotSucceeds")),
305 make_pair(0x00002282, UnicodeNameInfo(L"sub", L"subset")),
306 make_pair(0x00002283, UnicodeNameInfo(L"sup", L"supset")),
307 make_pair(0x00002284, UnicodeNameInfo(L"nsub")),
308 make_pair(0x00002285, UnicodeNameInfo(L"nsup")),
309 make_pair(0x00002286, UnicodeNameInfo(L"sube", L"SubsetEqual")),
310 make_pair(0x00002287, UnicodeNameInfo(L"supe", L"SupersetEqual")),
311 make_pair(0x00002288, UnicodeNameInfo(L"nsube", L"NotSubsetEqual")),
312 make_pair(0x00002289, UnicodeNameInfo(L"nsupe", L"NotSupersetEqual")),
313 make_pair(0x0000228A, UnicodeNameInfo(L"subne", L"subsetneq")),
314 make_pair(0x0000228B, UnicodeNameInfo(L"supne", L"supsetneq")),
315 make_pair(0x0000228E, UnicodeNameInfo(L"uplus", L"UnionPlus")),
316 make_pair(0x0000228F, UnicodeNameInfo(L"sqsub", L"SquareSubset")),
317 make_pair(0x00002290, UnicodeNameInfo(L"sqsup", L"SquareSuperset")),
318 make_pair(0x00002291, UnicodeNameInfo(L"sqsube", L"SquareSubsetEqual")),
319 make_pair(0x00002292, UnicodeNameInfo(L"sqsupe", L"SquareSupersetEqual")),
320 make_pair(0x00002293, UnicodeNameInfo(L"sqcap", L"SquareIntersection")),
321 make_pair(0x00002294, UnicodeNameInfo(L"sqcup", L"SquareUnion")),
322 make_pair(0x00002295, UnicodeNameInfo(L"oplus", L"CirclePlus")),
323 make_pair(0x00002296, UnicodeNameInfo(L"ominus", L"CircleMinus")),
324 make_pair(0x00002297, UnicodeNameInfo(L"otimes", L"CircleTimes")),
325 make_pair(0x00002298, UnicodeNameInfo(L"osol")),
326 make_pair(0x00002299, UnicodeNameInfo(L"odot", L"CircleDot")),
327 make_pair(0x0000229A, UnicodeNameInfo(L"ocir", L"circledcirc")),
328 make_pair(0x0000229B, UnicodeNameInfo(L"oast", L"circledast")),
329 make_pair(0x0000229D, UnicodeNameInfo(L"odash", L"circleddash")),
330 make_pair(0x0000229E, UnicodeNameInfo(L"plusb", L"boxplus")),
331 make_pair(0x0000229F, UnicodeNameInfo(L"minusb", L"boxminus")),
332 make_pair(0x000022A0, UnicodeNameInfo(L"timesb", L"boxtimes")),
333 make_pair(0x000022A1, UnicodeNameInfo(L"sdotb", L"dotsquare")),
334 make_pair(0x000022A2, UnicodeNameInfo(L"vdash", L"RightTee")),
335 make_pair(0x000022A3, UnicodeNameInfo(L"dashv", L"LeftTee")),
336 make_pair(0x000022A4, UnicodeNameInfo(L"top", L"DownTee")),
337 make_pair(0x000022A5, UnicodeNameInfo(L"bot", L"UpTee")),
338 make_pair(0x000022A7, UnicodeNameInfo(L"models")),
339 make_pair(0x000022A8, UnicodeNameInfo(L"vDash", L"DoubleRightTee")),
340 make_pair(0x000022A9, UnicodeNameInfo(L"Vdash")),
341 make_pair(0x000022AA, UnicodeNameInfo(L"Vvdash")),
342 make_pair(0x000022AC, UnicodeNameInfo(L"nvdash")),
343 make_pair(0x000022AD, UnicodeNameInfo(L"nvDash")),
344 make_pair(0x000022AE, UnicodeNameInfo(L"nVdash")),
345 make_pair(0x000022AF, UnicodeNameInfo(L"nVDash")),
346 make_pair(0x000022B2, UnicodeNameInfo(L"vltri", L"LeftTriangle")),
347 make_pair(0x000022B3, UnicodeNameInfo(L"vrtri", L"RightTriangle")),
348 make_pair(0x000022B4, UnicodeNameInfo(L"ltrie", L"LeftTriangleEqual")),
349 make_pair(0x000022B5, UnicodeNameInfo(L"rtrie", L"RightTriangleEqual")),
350 make_pair(0x000022B8, UnicodeNameInfo(L"mumap", L"multimap")),
351 make_pair(0x000022BA, UnicodeNameInfo(L"intcal", L"intercal")),
352 make_pair(0x000022BB, UnicodeNameInfo(L"veebar")),
353 make_pair(0x000022C0, UnicodeNameInfo(L"xwedge", L"Wedge")),
354 make_pair(0x000022C1, UnicodeNameInfo(L"xvee", L"Vee")),
355 make_pair(0x000022C2, UnicodeNameInfo(L"xcap", L"Intersection")),
356 make_pair(0x000022C3, UnicodeNameInfo(L"xcup", L"Union")),
357 make_pair(0x000022C4, UnicodeNameInfo(L"diam", L"Diamond")),
358 make_pair(0x000022C5, UnicodeNameInfo(L"sdot")),
359 make_pair(0x000022C6, UnicodeNameInfo(L"Star")),
360 make_pair(0x000022C7, UnicodeNameInfo(L"divonx", L"divideontimes")),
361 make_pair(0x000022C8, UnicodeNameInfo(L"bowtie")),
362 make_pair(0x000022C9, UnicodeNameInfo(L"ltimes")),
363 make_pair(0x000022CA, UnicodeNameInfo(L"rtimes")),
364 make_pair(0x000022CB, UnicodeNameInfo(L"lthree", L"leftthreetimes")),
365 make_pair(0x000022CC, UnicodeNameInfo(L"rthree", L"rightthreetimes")),
366 make_pair(0x000022CD, UnicodeNameInfo(L"bsime", L"backsimeq")),
367 make_pair(0x000022CE, UnicodeNameInfo(L"cuvee", L"curlyvee")),
368 make_pair(0x000022CF, UnicodeNameInfo(L"cuwed", L"curlywedge")),
369 make_pair(0x000022D0, UnicodeNameInfo(L"Sub", L"Subset")),
370 make_pair(0x000022D1, UnicodeNameInfo(L"Sup", L"Supset")),
371 make_pair(0x000022D2, UnicodeNameInfo(L"Cap")),
372 make_pair(0x000022D3, UnicodeNameInfo(L"Cup")),
373 make_pair(0x000022D4, UnicodeNameInfo(L"fork", L"pitchfork")),
374 make_pair(0x000022D6, UnicodeNameInfo(L"ltdot", L"lessdot")),
375 make_pair(0x000022D7, UnicodeNameInfo(L"gtdot", L"gtrdot")),
376 make_pair(0x000022D8, UnicodeNameInfo(L"Ll")),
377 make_pair(0x000022D9, UnicodeNameInfo(L"Gg")),
378 make_pair(0x000022DA, UnicodeNameInfo(L"leg", L"LessEqualGreater")),
379 make_pair(0x000022DB, UnicodeNameInfo(L"gel", L"GreaterEqualLess")),
380 make_pair(0x000022DE, UnicodeNameInfo(L"cuepr", L"curlyeqprec")),
381 make_pair(0x000022DF, UnicodeNameInfo(L"cuesc", L"curlyeqsucc")),
382 make_pair(0x000022E2, UnicodeNameInfo(L"nsqsube", L"NotSquareSubsetEqual")),
383 make_pair(0x000022E3, UnicodeNameInfo(L"nsqsupe", L"NotSquareSupersetEqual")),
384 make_pair(0x000022E6, UnicodeNameInfo(L"lnsim")),
385 make_pair(0x000022E7, UnicodeNameInfo(L"gnsim")),
386 make_pair(0x000022E8, UnicodeNameInfo(L"prnsim", L"precnsim")),
387 make_pair(0x000022E9, UnicodeNameInfo(L"scnsim", L"succnsim")),
388 make_pair(0x000022EA, UnicodeNameInfo(L"nltri", L"NotLeftTriangle")),
389 make_pair(0x000022EB, UnicodeNameInfo(L"nrtri", L"NotRightTriangle")),
390 make_pair(0x000022EC, UnicodeNameInfo(L"nltrie", L"NotLeftTriangleEqual")),
391 make_pair(0x000022ED, UnicodeNameInfo(L"nrtrie", L"NotRightTriangleEqual")),
392 make_pair(0x000022EE, UnicodeNameInfo(L"vellip")),
393 make_pair(0x000022EF, UnicodeNameInfo(L"ctdot")),
394 make_pair(0x000022F1, UnicodeNameInfo(L"dtdot")),
395 make_pair(0x00002305, UnicodeNameInfo(L"barwed", L"barwedge")),
396 make_pair(0x00002306, UnicodeNameInfo(L"Barwed", L"doublebarwedge")),
397 make_pair(0x00002308, UnicodeNameInfo(L"lceil", L"LeftCeiling")),
398 make_pair(0x00002309, UnicodeNameInfo(L"rceil", L"RightCeiling")),
399 make_pair(0x0000230A, UnicodeNameInfo(L"lfloor", L"LeftFloor")),
400 make_pair(0x0000230B, UnicodeNameInfo(L"rfloor", L"RightFloor")),
401 make_pair(0x0000231C, UnicodeNameInfo(L"ulcorn", L"ulcorner")),
402 make_pair(0x0000231D, UnicodeNameInfo(L"urcorn", L"urcorner")),
403 make_pair(0x0000231E, UnicodeNameInfo(L"dlcorn", L"llcorner")),
404 make_pair(0x0000231F, UnicodeNameInfo(L"drcorn", L"lrcorner")),
405 make_pair(0x00002322, UnicodeNameInfo(L"frown", L"sfrown")),
406 make_pair(0x00002323, UnicodeNameInfo(L"smile", L"ssmile")),
407 make_pair(0x00002329, UnicodeNameInfo(L"lang", L"LeftAngleBracket")),
408 make_pair(0x0000232A, UnicodeNameInfo(L"rang", L"RightAngleBracket")),
409 make_pair(0x000023B5, UnicodeNameInfo(L"bbrk", L"UnderBracket")),
410 make_pair(0x000024C8, UnicodeNameInfo(L"oS", L"circledS")),
411 make_pair(0x000025A1, UnicodeNameInfo(L"squ", L"Square")),
412 make_pair(0x000025B3, UnicodeNameInfo(L"xutri", L"bigtriangleup")),
413 make_pair(0x000025B4, UnicodeNameInfo(L"utrif", L"blacktriangle")),
414 make_pair(0x000025B5, UnicodeNameInfo(L"utri", L"triangle")),
415 make_pair(0x000025B6, UnicodeNameInfo()),
416 make_pair(0x000025B9, UnicodeNameInfo(L"rtri", L"triangleright")),
417 make_pair(0x000025BD, UnicodeNameInfo(L"xdtri", L"bigtriangledown")),
418 make_pair(0x000025BE, UnicodeNameInfo(L"dtrif", L"blacktriangledown")),
419 make_pair(0x000025BF, UnicodeNameInfo(L"dtri", L"triangledown")),
420 make_pair(0x000025C0, UnicodeNameInfo()),
421 make_pair(0x000025C3, UnicodeNameInfo(L"ltri", L"triangleleft")),
422 make_pair(0x000025CA, UnicodeNameInfo(L"loz", L"lozenge")),
423 make_pair(0x000025EF, UnicodeNameInfo(L"xcirc", L"bigcirc")),
424 make_pair(0x000025FC, UnicodeNameInfo(L"FilledSmallSquare")),
425 make_pair(0x00002605, UnicodeNameInfo(L"starf", L"bigstar")),
426 make_pair(0x00002660, UnicodeNameInfo(L"spades", L"spadesuit")),
427 make_pair(0x00002663, UnicodeNameInfo(L"clubs", L"clubsuit")),
428 make_pair(0x00002665, UnicodeNameInfo(L"hearts", L"heartsuit")),
429 make_pair(0x00002666, UnicodeNameInfo(L"diams", L"diamondsuit")),
430 make_pair(0x0000266D, UnicodeNameInfo(L"flat")),
431 make_pair(0x0000266E, UnicodeNameInfo(L"natur", L"natural")),
432 make_pair(0x0000266F, UnicodeNameInfo(L"sharp")),
433 make_pair(0x00002713, UnicodeNameInfo(L"check", L"checkmark")),
434 make_pair(0x00002720, UnicodeNameInfo(L"malt", L"maltese")),
435 make_pair(0x000027F5, UnicodeNameInfo(L"xlarr", L"LongLeftArrow")),
436 make_pair(0x000027F6, UnicodeNameInfo(L"xrarr", L"LongRightArrow")),
437 make_pair(0x000027F7, UnicodeNameInfo(L"xharr", L"LongLeftRightArrow")),
438 make_pair(0x000027F8, UnicodeNameInfo(L"xlArr", L"DoubleLongLeftArrow")),
439 make_pair(0x000027F9, UnicodeNameInfo(L"xrArr", L"DoubleLongRightArrow")),
440 make_pair(0x000027FA, UnicodeNameInfo(L"xhArr", L"DoubleLongLeftRightArrow")),
441 make_pair(0x000027FC, UnicodeNameInfo(L"xmap", L"longMapsto")),
442 make_pair(0x0000290E, UnicodeNameInfo(L"lBarr")),
443 make_pair(0x0000290F, UnicodeNameInfo(L"rBarr", L"dbkarow")),
444 make_pair(0x000029EB, UnicodeNameInfo(L"lozf", L"blacklozenge")),
445 make_pair(0x00002A00, UnicodeNameInfo(L"xodot", L"bigodot")),
446 make_pair(0x00002A01, UnicodeNameInfo(L"xoplus", L"bigoplus")),
447 make_pair(0x00002A02, UnicodeNameInfo(L"xotime", L"bigotimes")),
448 make_pair(0x00002A04, UnicodeNameInfo(L"xuplus", L"biguplus")),
449 make_pair(0x00002A06, UnicodeNameInfo(L"xsqcup", L"bigsqcup")),
450 make_pair(0x00002A0C, UnicodeNameInfo(L"qint", L"iiiint")),
451 make_pair(0x00002A2F, UnicodeNameInfo(L"Cross")),
452 make_pair(0x00002A3F, UnicodeNameInfo(L"amalg")),
453 make_pair(0x00002A7D, UnicodeNameInfo(L"les", L"LessSlantEqual")),
454 make_pair(0x00002A7E, UnicodeNameInfo(L"ges", L"GreaterSlantEqual")),
455 make_pair(0x00002A85, UnicodeNameInfo(L"lap", L"lessapprox")),
456 make_pair(0x00002A86, UnicodeNameInfo(L"gap", L"gtrapprox")),
457 make_pair(0x00002A89, UnicodeNameInfo(L"lnap", L"lnapprox")),
458 make_pair(0x00002A8A, UnicodeNameInfo(L"gnap", L"gnapprox")),
459 make_pair(0x00002A8B, UnicodeNameInfo(L"lEg", L"lesseqqgtr")),
460 make_pair(0x00002A8C, UnicodeNameInfo(L"gEl", L"gtreqqless")),
461 make_pair(0x00002A95, UnicodeNameInfo(L"els", L"eqslantless")),
462 make_pair(0x00002A96, UnicodeNameInfo(L"egs", L"eqslantgtr")),
463 make_pair(0x00002AAF, UnicodeNameInfo(L"pre", L"PrecedesEqual")),
464 make_pair(0x00002AB0, UnicodeNameInfo(L"sce", L"SucceedsEqual")),
465 make_pair(0x00002AB5, UnicodeNameInfo(L"prnE", L"precneqq")),
466 make_pair(0x00002AB6, UnicodeNameInfo(L"scnE", L"succneqq")),
467 make_pair(0x00002AB7, UnicodeNameInfo(L"prap", L"precapprox")),
468 make_pair(0x00002AB8, UnicodeNameInfo(L"scap", L"succapprox")),
469 make_pair(0x00002AB9, UnicodeNameInfo(L"prnap", L"precnapprox")),
470 make_pair(0x00002ABA, UnicodeNameInfo(L"scnap", L"succnapprox")),
471 make_pair(0x00002AC5, UnicodeNameInfo(L"subE", L"subseteqq")),
472 make_pair(0x00002AC6, UnicodeNameInfo(L"supE", L"supseteqq")),
473 make_pair(0x00002ACB, UnicodeNameInfo(L"subnE", L"subsetneqq")),
474 make_pair(0x00002ACC, UnicodeNameInfo(L"supnE", L"supsetneqq")),
475 make_pair(0x0000FE00, UnicodeNameInfo()), // FIX: think about this combining character...
476 make_pair(0x0000FE37, UnicodeNameInfo(L"OverBrace")),
477 make_pair(0x0000FE38, UnicodeNameInfo(L"UnderBrace")),
478 make_pair(0x0001D49C, UnicodeNameInfo(L"Ascr")),
479 make_pair(0x0001D49E, UnicodeNameInfo(L"Cscr")),
480 make_pair(0x0001D49F, UnicodeNameInfo(L"Dscr")),
481 make_pair(0x0001D4A2, UnicodeNameInfo(L"Gscr")),
482 make_pair(0x0001D4A5, UnicodeNameInfo(L"Jscr")),
483 make_pair(0x0001D4A6, UnicodeNameInfo(L"Kscr")),
484 make_pair(0x0001D4A9, UnicodeNameInfo(L"Nscr")),
485 make_pair(0x0001D4AA, UnicodeNameInfo(L"Oscr")),
486 make_pair(0x0001D4AB, UnicodeNameInfo(L"Pscr")),
487 make_pair(0x0001D4AC, UnicodeNameInfo(L"Qscr")),
488 make_pair(0x0001D4AE, UnicodeNameInfo(L"Sscr")),
489 make_pair(0x0001D4AF, UnicodeNameInfo(L"Tscr")),
490 make_pair(0x0001D4B0, UnicodeNameInfo(L"Uscr")),
491 make_pair(0x0001D4B1, UnicodeNameInfo(L"Vscr")),
492 make_pair(0x0001D4B2, UnicodeNameInfo(L"Wscr")),
493 make_pair(0x0001D4B3, UnicodeNameInfo(L"Xscr")),
494 make_pair(0x0001D4B4, UnicodeNameInfo(L"Yscr")),
495 make_pair(0x0001D4B5, UnicodeNameInfo(L"Zscr")),
496 make_pair(0x0001D4D0, UnicodeNameInfo()), // mathematical bold script capitals
497 make_pair(0x0001D4D1, UnicodeNameInfo()),
498 make_pair(0x0001D4D2, UnicodeNameInfo()),
499 make_pair(0x0001D4D3, UnicodeNameInfo()),
500 make_pair(0x0001D4D4, UnicodeNameInfo()),
501 make_pair(0x0001D4D5, UnicodeNameInfo()),
502 make_pair(0x0001D4D6, UnicodeNameInfo()),
503 make_pair(0x0001D4D7, UnicodeNameInfo()),
504 make_pair(0x0001D4D8, UnicodeNameInfo()),
505 make_pair(0x0001D4D9, UnicodeNameInfo()),
506 make_pair(0x0001D4DA, UnicodeNameInfo()),
507 make_pair(0x0001D4DB, UnicodeNameInfo()),
508 make_pair(0x0001D4DC, UnicodeNameInfo()),
509 make_pair(0x0001D4DD, UnicodeNameInfo()),
510 make_pair(0x0001D4DE, UnicodeNameInfo()),
511 make_pair(0x0001D4DF, UnicodeNameInfo()),
512 make_pair(0x0001D4E0, UnicodeNameInfo()),
513 make_pair(0x0001D4E1, UnicodeNameInfo()),
514 make_pair(0x0001D4E2, UnicodeNameInfo()),
515 make_pair(0x0001D4E3, UnicodeNameInfo()),
516 make_pair(0x0001D4E4, UnicodeNameInfo()),
517 make_pair(0x0001D4E5, UnicodeNameInfo()),
518 make_pair(0x0001D4E6, UnicodeNameInfo()),
519 make_pair(0x0001D4E7, UnicodeNameInfo()),
520 make_pair(0x0001D4E8, UnicodeNameInfo()),
521 make_pair(0x0001D4E9, UnicodeNameInfo()),
522 make_pair(0x0001D504, UnicodeNameInfo(L"Afr")),
523 make_pair(0x0001D505, UnicodeNameInfo(L"Bfr")),
524 make_pair(0x0001D507, UnicodeNameInfo(L"Dfr")),
525 make_pair(0x0001D508, UnicodeNameInfo(L"Efr")),
526 make_pair(0x0001D509, UnicodeNameInfo(L"Ffr")),
527 make_pair(0x0001D50A, UnicodeNameInfo(L"Gfr")),
528 make_pair(0x0001D50D, UnicodeNameInfo(L"Jfr")),
529 make_pair(0x0001D50E, UnicodeNameInfo(L"Kfr")),
530 make_pair(0x0001D50F, UnicodeNameInfo(L"Lfr")),
531 make_pair(0x0001D510, UnicodeNameInfo(L"Mfr")),
532 make_pair(0x0001D511, UnicodeNameInfo(L"Nfr")),
533 make_pair(0x0001D512, UnicodeNameInfo(L"Ofr")),
534 make_pair(0x0001D513, UnicodeNameInfo(L"Pfr")),
535 make_pair(0x0001D514, UnicodeNameInfo(L"Qfr")),
536 make_pair(0x0001D516, UnicodeNameInfo(L"Sfr")),
537 make_pair(0x0001D517, UnicodeNameInfo(L"Tfr")),
538 make_pair(0x0001D518, UnicodeNameInfo(L"Ufr")),
539 make_pair(0x0001D519, UnicodeNameInfo(L"Vfr")),
540 make_pair(0x0001D51A, UnicodeNameInfo(L"Wfr")),
541 make_pair(0x0001D51B, UnicodeNameInfo(L"Xfr")),
542 make_pair(0x0001D51C, UnicodeNameInfo(L"Yfr")),
543 make_pair(0x0001D51E, UnicodeNameInfo(L"afr")),
544 make_pair(0x0001D51F, UnicodeNameInfo(L"bfr")),
545 make_pair(0x0001D520, UnicodeNameInfo(L"cfr")),
546 make_pair(0x0001D521, UnicodeNameInfo(L"dfr")),
547 make_pair(0x0001D522, UnicodeNameInfo(L"efr")),
548 make_pair(0x0001D523, UnicodeNameInfo(L"ffr")),
549 make_pair(0x0001D524, UnicodeNameInfo(L"gfr")),
550 make_pair(0x0001D525, UnicodeNameInfo(L"hfr")),
551 make_pair(0x0001D526, UnicodeNameInfo(L"ifr")),
552 make_pair(0x0001D527, UnicodeNameInfo(L"jfr")),
553 make_pair(0x0001D528, UnicodeNameInfo(L"kfr")),
554 make_pair(0x0001D529, UnicodeNameInfo(L"lfr")),
555 make_pair(0x0001D52A, UnicodeNameInfo(L"mfr")),
556 make_pair(0x0001D52B, UnicodeNameInfo(L"nfr")),
557 make_pair(0x0001D52C, UnicodeNameInfo(L"ofr")),
558 make_pair(0x0001D52D, UnicodeNameInfo(L"pfr")),
559 make_pair(0x0001D52E, UnicodeNameInfo(L"qfr")),
560 make_pair(0x0001D52F, UnicodeNameInfo(L"rfr")),
561 make_pair(0x0001D530, UnicodeNameInfo(L"sfr")),
562 make_pair(0x0001D531, UnicodeNameInfo(L"tfr")),
563 make_pair(0x0001D532, UnicodeNameInfo(L"ufr")),
564 make_pair(0x0001D533, UnicodeNameInfo(L"vfr")),
565 make_pair(0x0001D534, UnicodeNameInfo(L"wfr")),
566 make_pair(0x0001D535, UnicodeNameInfo(L"xfr")),
567 make_pair(0x0001D536, UnicodeNameInfo(L"yfr")),
568 make_pair(0x0001D537, UnicodeNameInfo(L"zfr")),
569 make_pair(0x0001D538, UnicodeNameInfo(L"Aopf")),
570 make_pair(0x0001D539, UnicodeNameInfo(L"Bopf")),
571 make_pair(0x0001D53B, UnicodeNameInfo(L"Dopf")),
572 make_pair(0x0001D53C, UnicodeNameInfo(L"Eopf")),
573 make_pair(0x0001D53D, UnicodeNameInfo(L"Fopf")),
574 make_pair(0x0001D53E, UnicodeNameInfo(L"Gopf")),
575 make_pair(0x0001D540, UnicodeNameInfo(L"Iopf")),
576 make_pair(0x0001D541, UnicodeNameInfo(L"Jopf")),
577 make_pair(0x0001D542, UnicodeNameInfo(L"Kopf")),
578 make_pair(0x0001D543, UnicodeNameInfo(L"Lopf")),
579 make_pair(0x0001D544, UnicodeNameInfo(L"Mopf")),
580 make_pair(0x0001D546, UnicodeNameInfo(L"Oopf")),
581 make_pair(0x0001D54A, UnicodeNameInfo(L"Sopf")),
582 make_pair(0x0001D54B, UnicodeNameInfo(L"Topf")),
583 make_pair(0x0001D54C, UnicodeNameInfo(L"Uopf")),
584 make_pair(0x0001D54D, UnicodeNameInfo(L"Vopf")),
585 make_pair(0x0001D54E, UnicodeNameInfo(L"Wopf")),
586 make_pair(0x0001D54F, UnicodeNameInfo(L"Xopf")),
587 make_pair(0x0001D550, UnicodeNameInfo(L"Yopf")),
588 make_pair(0x0001D55C, UnicodeNameInfo(L"kopf")),
589 make_pair(0x0001D6A5, UnicodeNameInfo())
590 };
591
592 wishful_hash_map<uint32_t, UnicodeNameInfo> gUnicodeNameTable(
593 gUnicodeNameArray,
594 END_ARRAY(gUnicodeNameArray)
595 );
596
597
598 // FIX:
599 // Need to read about and think about combining characters.
600 // In particular, does the current strategy work for *named* entities
601 // and combining characters? I'm not sure.
602
603
604 // XmlEncode() handles conversion of non-ASCII characters to entities.
605 // It uses the "options" parameter and gUnicodeNameTable to decide how to
606 // translate each character.
XmlEncode(const wstring & input,const EncodingOptions & options)607 wstring XmlEncode(
608 const wstring& input,
609 const EncodingOptions& options
610 )
611 {
612 wostringstream os;
613 #ifdef WCHAR_T_IS_16BIT
614 wchar_t surrogate_upper = 0;
615 #endif
616
617 for (wstring::const_iterator
618 ptr = input.begin(); ptr != input.end(); ptr++
619 )
620 {
621 if (*ptr == L'&')
622 os << L"&";
623 else if (*ptr == L'<')
624 os << L"<";
625 else if (*ptr == L'>')
626 os << L">";
627 else if (*ptr <= 0x7F)
628 os << *ptr;
629 #ifdef WCHAR_T_IS_16BIT
630 else if (static_cast<wchar_t>(0xD800) <= *ptr &&
631 *ptr < static_cast<wchar_t>(0xDC00)) {
632 surrogate_upper = *ptr;
633 continue;
634 }
635 #endif
636 else
637 {
638 uint32_t chara = (uint32_t)*ptr;
639 #ifdef WCHAR_T_IS_16BIT
640 if (0xDC00 <= chara && chara < 0xDF00) {
641 if (surrogate_upper == 0) {
642 continue;
643 }
644 chara &= 0x3FF;
645 chara |= ((uint32_t)surrogate_upper & 0x000003FF) << 10;
646 chara += 0x00010000;
647 }
648 #endif
649 wishful_hash_map<uint32_t, UnicodeNameInfo>::const_iterator
650 search = gUnicodeNameTable.find(chara);
651
652 if (search == gUnicodeNameTable.end())
653 {
654 if (options.mOtherEncodingRaw) {
655 #ifdef WCHAR_T_IS_16BIT
656 if (surrogate_upper)
657 os << surrogate_upper;
658 #endif
659 os << *ptr;
660 }
661 else
662 os << L"&#x" << hex << chara << L";";
663 }
664 else
665 {
666 EncodingOptions::MathmlEncoding encoding
667 = options.mMathmlEncoding;
668
669 // Deal with plane-1 characters.
670 if (!options.mAllowPlane1 && chara >= 0x10000 &&
671 (
672 encoding == EncodingOptions::cMathmlEncodingNumeric
673 ||
674 encoding == EncodingOptions::cMathmlEncodingRaw
675 )
676 )
677 {
678 encoding = EncodingOptions::cMathmlEncodingShort;
679 }
680
681 // Notice the missing "break"s in this switch statement.
682 // We are falling back on other encoding methods if certain
683 // ones aren't available.
684 switch (encoding)
685 {
686 case EncodingOptions::cMathmlEncodingLong:
687 if (!search->second.mLongName.empty())
688 {
689 os << L"&" << search->second.mLongName << L";";
690 break;
691 }
692
693 case EncodingOptions::cMathmlEncodingShort:
694 if (!search->second.mShortName.empty())
695 {
696 os << L"&" << search->second.mShortName << L";";
697 break;
698 }
699
700 case EncodingOptions::cMathmlEncodingNumeric:
701 os << L"&#x" << hex << chara << L";";
702 break;
703
704 case EncodingOptions::cMathmlEncodingRaw:
705 #ifdef WCHAR_T_IS_16BIT
706 if (surrogate_upper)
707 os << surrogate_upper;
708 #endif
709 os << *ptr;
710 break;
711 }
712
713 }
714 }
715 #ifdef WCHAR_T_IS_16BIT
716 surrogate_upper = 0;
717 #endif
718 }
719
720 return os.str();
721 }
722
723 }
724
725 // end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
726