1 /*
2 blahtex: a TeX to MathML converter designed with MediaWiki in mind
3 blahtexml: an extension of blahtex with XML processing in mind
4 http://gva.noekeon.org/blahtexml
5 
6 Copyright (c) 2006, David Harvey
7 All rights reserved.
8 
9 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
10 
11     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
12     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
13     * Neither the names of the authors nor the names of their affiliation may be used to endorse or promote products derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
16 */
17 
18 #include <iomanip>
19 #include <sstream>
20 #include <map>
21 #include <stdint.h>
22 #include "XmlEncode.h"
23 
24 using namespace std;
25 
26 namespace blahtex
27 {
28 
29 struct UnicodeNameInfo
30 {
31     wstring mShortName;
32     wstring mLongName;
33 
UnicodeNameInfoblahtex::UnicodeNameInfo34     UnicodeNameInfo()
35     { }
36 
UnicodeNameInfoblahtex::UnicodeNameInfo37     UnicodeNameInfo(
38         const wstring& shortName
39     ) :
40         mShortName(shortName)
41     { }
42 
UnicodeNameInfoblahtex::UnicodeNameInfo43     UnicodeNameInfo(
44         const wstring& shortName,
45         const wstring& longName
46     ) :
47         mShortName(shortName),
48         mLongName(longName)
49     { }
50 };
51 
52 // This table lists all the non-ASCII characters that blahtex can give
53 // names to. For each one it possibly lists a short and long MathML name.
54 pair<uint32_t, UnicodeNameInfo> gUnicodeNameArray[] =
55 {
56     make_pair(0x00000060, UnicodeNameInfo(L"grave", L"DiacriticalGrave")),
57     make_pair(0x000000A0, UnicodeNameInfo(L"nbsp", L"NonBreakingSpace")),
58     make_pair(0x000000A5, UnicodeNameInfo(L"yen")),
59     make_pair(0x000000A7, UnicodeNameInfo(L"sect")),
60     make_pair(0x000000AC, UnicodeNameInfo(L"not")),
61     make_pair(0x000000AE, UnicodeNameInfo(L"reg", L"circledR")),
62     make_pair(0x000000AF, UnicodeNameInfo(L"macr", L"OverBar")),
63     make_pair(0x000000B1, UnicodeNameInfo(L"pm", L"PlusMinus")),
64     make_pair(0x000000B4, UnicodeNameInfo(L"acute", L"DiacriticalAcute")),
65     make_pair(0x000000B6, UnicodeNameInfo(L"para")),
66     make_pair(0x000000B7, UnicodeNameInfo(L"middot", L"CenterDot")),
67     make_pair(0x000000C5, UnicodeNameInfo(L"Aring")),
68     make_pair(0x000000D7, UnicodeNameInfo(L"times")),
69     make_pair(0x000000D8, UnicodeNameInfo(L"Oslash")),
70     make_pair(0x000000E5, UnicodeNameInfo(L"aring")),
71     make_pair(0x000000F0, UnicodeNameInfo(L"eth")),
72     make_pair(0x000000F7, UnicodeNameInfo(L"div", L"divide")),
73     make_pair(0x00000127, UnicodeNameInfo(L"hstrok")),
74     make_pair(0x00000131, UnicodeNameInfo(L"imath")),
75     make_pair(0x000002C7, UnicodeNameInfo(L"caron", L"Hacek")),
76     make_pair(0x000002D8, UnicodeNameInfo(L"breve", L"Breve")),
77     make_pair(0x000002DC, UnicodeNameInfo(L"tilde", L"DiacriticalTilde")),
78     make_pair(0x00000338, UnicodeNameInfo()),     // FIX: combining character that needs some thought
79     make_pair(0x00000393, UnicodeNameInfo(L"Gamma")),
80     make_pair(0x00000394, UnicodeNameInfo(L"Delta")),
81     make_pair(0x00000398, UnicodeNameInfo(L"Theta")),
82     make_pair(0x0000039B, UnicodeNameInfo(L"Lambda")),
83     make_pair(0x0000039E, UnicodeNameInfo(L"Xi")),
84     make_pair(0x000003A0, UnicodeNameInfo(L"Pi")),
85     make_pair(0x000003A3, UnicodeNameInfo(L"Sigma")),
86     make_pair(0x000003A5, UnicodeNameInfo(L"Upsilon")),
87     make_pair(0x000003A6, UnicodeNameInfo(L"Phi")),
88     make_pair(0x000003A8, UnicodeNameInfo(L"Psi")),
89     make_pair(0x000003A9, UnicodeNameInfo(L"Omega")),
90     make_pair(0x000003B1, UnicodeNameInfo(L"alpha")),
91     make_pair(0x000003B2, UnicodeNameInfo(L"beta")),
92     make_pair(0x000003B3, UnicodeNameInfo(L"gamma")),
93     make_pair(0x000003B4, UnicodeNameInfo(L"delta")),
94     make_pair(0x000003B5, UnicodeNameInfo(L"epsiv", L"varepsilon")),
95     make_pair(0x000003B6, UnicodeNameInfo(L"zeta")),
96     make_pair(0x000003B7, UnicodeNameInfo(L"eta")),
97     make_pair(0x000003B8, UnicodeNameInfo(L"theta")),
98     make_pair(0x000003B9, UnicodeNameInfo(L"iota")),
99     make_pair(0x000003BA, UnicodeNameInfo(L"kappa")),
100     make_pair(0x000003BB, UnicodeNameInfo(L"lambda")),
101     make_pair(0x000003BC, UnicodeNameInfo(L"mu")),
102     make_pair(0x000003BD, UnicodeNameInfo(L"nu")),
103     make_pair(0x000003BE, UnicodeNameInfo(L"xi")),
104     make_pair(0x000003C0, UnicodeNameInfo(L"pi")),
105     make_pair(0x000003C1, UnicodeNameInfo(L"rho")),
106     make_pair(0x000003C2, UnicodeNameInfo(L"sigmav", L"varsigma")),
107     make_pair(0x000003C3, UnicodeNameInfo(L"sigma")),
108     make_pair(0x000003C4, UnicodeNameInfo(L"tau")),
109     make_pair(0x000003C5, UnicodeNameInfo(L"upsi", L"upsilon")),
110 #if 0
111     // FIX: note Firefox 1.5 has &phi; and &varphi; around the wrong
112     // way, so better to stick with numeric codes for 0x3C6 and 0x3D5.
113     // See mozilla bug 321438.
114     make_pair(0x000003C6, UnicodeNameInfo(L"phiv", L"varphi")),
115     make_pair(0x000003D5, UnicodeNameInfo(L"phi", L"straightphi")),
116 #endif
117     make_pair(0x000003C7, UnicodeNameInfo(L"chi")),
118     make_pair(0x000003C8, UnicodeNameInfo(L"psi")),
119     make_pair(0x000003C9, UnicodeNameInfo(L"omega")),
120     make_pair(0x000003D1, UnicodeNameInfo(L"thetav", L"vartheta")),
121     make_pair(0x000003D6, UnicodeNameInfo(L"piv", L"varpi")),
122     make_pair(0x000003DD, UnicodeNameInfo(L"gammad", L"digamma")),
123     make_pair(0x000003F0, UnicodeNameInfo(L"kappav", L"varkappa")),
124     make_pair(0x000003F1, UnicodeNameInfo(L"rhov", L"varrho")),
125     make_pair(0x000003F5, UnicodeNameInfo(L"epsi", L"straightepsilon")),
126     make_pair(0x000003F6, UnicodeNameInfo(L"bepsi", L"backepsilon")),
127     make_pair(0x00002020, UnicodeNameInfo(L"dagger")),
128     make_pair(0x00002021, UnicodeNameInfo(L"Dagger", L"ddagger")),
129     make_pair(0x00002022, UnicodeNameInfo(L"bull", L"bullet")),
130     make_pair(0x00002026, UnicodeNameInfo(L"hellip")),
131     make_pair(0x00002032, UnicodeNameInfo(L"prime")),
132     make_pair(0x00002035, UnicodeNameInfo(L"bprime", L"backprime")),
133     make_pair(0x00002102, UnicodeNameInfo(L"Copf", L"complexes")),
134     make_pair(0x0000210B, UnicodeNameInfo(L"Hscr", L"HilbertSpace")),
135     make_pair(0x0000210C, UnicodeNameInfo(L"Hfr", L"Poincareplane")),
136     make_pair(0x0000210D, UnicodeNameInfo(L"Hopf", L"quaternions")),
137     make_pair(0x0000210F, UnicodeNameInfo(L"hbar", L"planck")),
138     make_pair(0x00002110, UnicodeNameInfo(L"Iscr", L"imagline")),
139     make_pair(0x00002111, UnicodeNameInfo(L"Im", L"imagpart")),
140     make_pair(0x00002112, UnicodeNameInfo(L"Lscr", L"Laplacetrf")),
141     make_pair(0x00002113, UnicodeNameInfo(L"ell")),
142     make_pair(0x00002118, UnicodeNameInfo(L"wp", L"weierp")),
143     make_pair(0x00002119, UnicodeNameInfo(L"Popf", L"primes")),
144     make_pair(0x0000211A, UnicodeNameInfo(L"Qopf", L"rationals")),
145     make_pair(0x0000211B, UnicodeNameInfo(L"Rscr", L"realine")),
146     make_pair(0x0000211C, UnicodeNameInfo(L"Re", L"realpart")),
147     make_pair(0x0000211D, UnicodeNameInfo(L"Ropf", L"reals")),
148     make_pair(0x00002124, UnicodeNameInfo(L"Zopf", L"integers")),
149     make_pair(0x00002127, UnicodeNameInfo(L"mho")),
150     make_pair(0x00002128, UnicodeNameInfo(L"Zfr", L"zeetrf")),
151     make_pair(0x0000212C, UnicodeNameInfo(L"Bscr", L"Bernoullis")),
152     make_pair(0x0000212D, UnicodeNameInfo(L"Cfr", L"Cayleys")),
153     make_pair(0x00002130, UnicodeNameInfo(L"Escr", L"expectation")),
154     make_pair(0x00002131, UnicodeNameInfo(L"Fscr", L"Fouriertrf")),
155     make_pair(0x00002133, UnicodeNameInfo(L"Mscr", L"Mellintrf")),
156     make_pair(0x00002135, UnicodeNameInfo(L"aleph")),
157     make_pair(0x00002136, UnicodeNameInfo(L"beth")),
158     make_pair(0x00002137, UnicodeNameInfo(L"gimel")),
159     make_pair(0x00002138, UnicodeNameInfo(L"daleth")),
160     make_pair(0x00002190, UnicodeNameInfo(L"larr", L"LeftArrow")),
161     make_pair(0x00002191, UnicodeNameInfo(L"uarr", L"UpArrow")),
162     make_pair(0x00002192, UnicodeNameInfo(L"rarr", L"RightArrow")),
163     make_pair(0x00002193, UnicodeNameInfo(L"darr", L"DownArrow")),
164     make_pair(0x00002194, UnicodeNameInfo(L"harr", L"LeftRightArrow")),
165     make_pair(0x00002195, UnicodeNameInfo(L"varr", L"UpDownArrow")),
166     make_pair(0x00002196, UnicodeNameInfo(L"nwarr", L"UpperLeftArrow")),
167     make_pair(0x00002197, UnicodeNameInfo(L"nearr", L"UpperRightArrow")),
168     make_pair(0x00002198, UnicodeNameInfo(L"searr", L"LowerRightArrow")),
169     make_pair(0x00002199, UnicodeNameInfo(L"swarr", L"LowerLeftArrow")),
170     make_pair(0x0000219A, UnicodeNameInfo(L"nlarr", L"nleftarrow")),
171     make_pair(0x0000219B, UnicodeNameInfo(L"nrarr", L"nrightarrow")),
172     make_pair(0x0000219D, UnicodeNameInfo(L"rarrw", L"rightsquigarrow")),
173     make_pair(0x0000219E, UnicodeNameInfo(L"Larr", L"twoheadleftarrow")),
174     make_pair(0x000021A0, UnicodeNameInfo(L"Rarr", L"twoheadrightarrow")),
175     make_pair(0x000021A2, UnicodeNameInfo(L"larrtl", L"leftarrowtail")),
176     make_pair(0x000021A3, UnicodeNameInfo(L"rarrtl", L"rightarrowtail")),
177     make_pair(0x000021A6, UnicodeNameInfo(L"map", L"RightTeeArrow")),
178     make_pair(0x000021A9, UnicodeNameInfo(L"larrhk", L"hookleftarrow")),
179     make_pair(0x000021AA, UnicodeNameInfo(L"rarrhk", L"hookrightarrow")),
180     make_pair(0x000021AB, UnicodeNameInfo(L"larrlp", L"looparrowleft")),
181     make_pair(0x000021AC, UnicodeNameInfo(L"rarrlp", L"looparrowright")),
182     make_pair(0x000021AD, UnicodeNameInfo(L"harrw", L"leftrightsquigarrow")),
183     make_pair(0x000021AE, UnicodeNameInfo(L"nharr", L"nleftrightarrow")),
184     make_pair(0x000021B0, UnicodeNameInfo(L"lsh", L"Lsh")),
185     make_pair(0x000021B1, UnicodeNameInfo(L"rsh", L"Rsh")),
186     make_pair(0x000021B6, UnicodeNameInfo(L"cularr", L"curvearrowleft")),
187     make_pair(0x000021B7, UnicodeNameInfo(L"curarr", L"curvearrowright")),
188     make_pair(0x000021BA, UnicodeNameInfo(L"olarr", L"circlearrowleft")),
189     make_pair(0x000021BB, UnicodeNameInfo(L"orarr", L"circlearrowright")),
190     make_pair(0x000021BC, UnicodeNameInfo(L"lharu", L"leftharpoonup")),
191     make_pair(0x000021BD, UnicodeNameInfo(L"lhard", L"leftharpoondown")),
192     make_pair(0x000021BE, UnicodeNameInfo(L"uharr", L"upharpoonright")),
193     make_pair(0x000021BF, UnicodeNameInfo(L"uharl", L"upharpoonleft")),
194     make_pair(0x000021C0, UnicodeNameInfo(L"rharu", L"rightharpoonup")),
195     make_pair(0x000021C1, UnicodeNameInfo(L"rhard", L"rightharpoondown")),
196     make_pair(0x000021C2, UnicodeNameInfo(L"dharr", L"downharpoonright")),
197     make_pair(0x000021C3, UnicodeNameInfo(L"dharl", L"downharpoonleft")),
198     make_pair(0x000021C4, UnicodeNameInfo(L"rlarr", L"RightArrowLeftArrow")),
199     make_pair(0x000021C6, UnicodeNameInfo(L"lrarr", L"LeftArrowRightArrow")),
200     make_pair(0x000021C7, UnicodeNameInfo(L"llarr", L"leftleftarrows")),
201     make_pair(0x000021C8, UnicodeNameInfo(L"uuarr", L"upuparrows")),
202     make_pair(0x000021C9, UnicodeNameInfo(L"rrarr", L"rightrightarrows")),
203     make_pair(0x000021CA, UnicodeNameInfo(L"ddarr", L"downdownarrows")),
204     make_pair(0x000021CB, UnicodeNameInfo(L"lrhar", L"ReverseEquilibrium")),
205     make_pair(0x000021CC, UnicodeNameInfo(L"rlhar", L"Equilibrium")),
206     make_pair(0x000021CD, UnicodeNameInfo(L"nlArr", L"nLeftarrow")),
207     make_pair(0x000021CE, UnicodeNameInfo(L"nhArr", L"nLeftrightarrow")),
208     make_pair(0x000021CF, UnicodeNameInfo(L"nrArr", L"nRightarrow")),
209     make_pair(0x000021D0, UnicodeNameInfo(L"lArr", L"DoubleLeftArrow")),
210     make_pair(0x000021D1, UnicodeNameInfo(L"uArr", L"DoubleUpArrow")),
211     make_pair(0x000021D2, UnicodeNameInfo(L"rArr", L"DoubleRightArrow")),
212     make_pair(0x000021D3, UnicodeNameInfo(L"dArr", L"DoubleDownArrow")),
213     make_pair(0x000021D4, UnicodeNameInfo(L"hArr", L"DoubleLeftRightArrow")),
214     make_pair(0x000021D5, UnicodeNameInfo(L"vArr", L"DoubleUpDownArrow")),
215     make_pair(0x000021DA, UnicodeNameInfo(L"lAarr", L"Lleftarrow")),
216     make_pair(0x000021DB, UnicodeNameInfo(L"rAarr", L"Rrightarrow")),
217     make_pair(0x000021DD, UnicodeNameInfo(L"zigrarr")),
218     make_pair(0x00002200, UnicodeNameInfo(L"forall", L"ForAll")),
219     make_pair(0x00002201, UnicodeNameInfo(L"comp", L"complement")),
220     make_pair(0x00002202, UnicodeNameInfo(L"part", L"PartialD")),
221     make_pair(0x00002203, UnicodeNameInfo(L"exist", L"Exists")),
222     make_pair(0x00002204, UnicodeNameInfo(L"nexist", L"NotExists")),
223     make_pair(0x00002205, UnicodeNameInfo(L"empty", L"emptyset")),
224     make_pair(0x00002207, UnicodeNameInfo(L"nabla", L"Del")),
225     make_pair(0x00002208, UnicodeNameInfo(L"in", L"Element")),
226     make_pair(0x00002209, UnicodeNameInfo(L"notin", L"NotElement")),
227     make_pair(0x0000220B, UnicodeNameInfo(L"ni", L"ReverseElement")),
228     make_pair(0x0000220C, UnicodeNameInfo(L"notni", L"NotReverseElement")),
229     make_pair(0x0000220F, UnicodeNameInfo(L"prod", L"Product")),
230     make_pair(0x00002210, UnicodeNameInfo(L"coprod", L"Coproduct")),
231     make_pair(0x00002211, UnicodeNameInfo(L"sum", L"Sum")),
232     make_pair(0x00002213, UnicodeNameInfo(L"mp", L"MinusPlus")),
233     make_pair(0x00002214, UnicodeNameInfo(L"dotplus")),
234     make_pair(0x00002216, UnicodeNameInfo(L"setmn", L"Backslash")),
235     make_pair(0x00002218, UnicodeNameInfo(L"compfn", L"SmallCircle")),
236     make_pair(0x0000221A, UnicodeNameInfo(L"radic", L"Sqrt")),
237     make_pair(0x0000221D, UnicodeNameInfo(L"prop", L"Proportional")),
238     make_pair(0x0000221E, UnicodeNameInfo(L"infin")),
239     make_pair(0x00002220, UnicodeNameInfo(L"ang", L"angle")),
240     make_pair(0x00002221, UnicodeNameInfo(L"angmsd", L"measuredangle")),
241     make_pair(0x00002222, UnicodeNameInfo(L"angsph")),
242     make_pair(0x00002223, UnicodeNameInfo(L"mid", L"VerticalBar")),
243     make_pair(0x00002224, UnicodeNameInfo(L"nmid", L"NotVerticalBar")),
244     make_pair(0x00002225, UnicodeNameInfo(L"par", L"DoubleVerticalBar")),
245     make_pair(0x00002226, UnicodeNameInfo(L"npar", L"NotDoubleVerticalBar")),
246     make_pair(0x00002227, UnicodeNameInfo(L"and", L"wedge")),
247     make_pair(0x00002228, UnicodeNameInfo(L"or", L"vee")),
248     make_pair(0x00002229, UnicodeNameInfo(L"cap")),
249     make_pair(0x0000222A, UnicodeNameInfo(L"cup")),
250     make_pair(0x0000222B, UnicodeNameInfo(L"int", L"Integral")),
251     make_pair(0x0000222C, UnicodeNameInfo(L"Int")),
252     make_pair(0x0000222D, UnicodeNameInfo(L"tint", L"iiint")),
253     make_pair(0x0000222E, UnicodeNameInfo(L"conint", L"ContourIntegral")),
254     make_pair(0x00002234, UnicodeNameInfo(L"there4", L"Therefore")),
255     make_pair(0x00002235, UnicodeNameInfo(L"becaus", L"Because")),
256     make_pair(0x0000223C, UnicodeNameInfo(L"sim", L"Tilde")),
257     make_pair(0x0000223D, UnicodeNameInfo(L"bsim", L"backsim")),
258     make_pair(0x00002240, UnicodeNameInfo(L"wr", L"VerticalTilde")),
259     make_pair(0x00002241, UnicodeNameInfo(L"nsim", L"NotTilde")),
260     make_pair(0x00002242, UnicodeNameInfo(L"esim", L"EqualTilde")),
261     make_pair(0x00002243, UnicodeNameInfo(L"sime", L"TildeEqual")),
262     make_pair(0x00002244, UnicodeNameInfo(L"nsime", L"NotTildeEqual")),
263     make_pair(0x00002245, UnicodeNameInfo(L"cong", L"TildeFullEqual")),
264     make_pair(0x00002247, UnicodeNameInfo(L"ncong", L"NotTildeFullEqual")),
265     make_pair(0x00002248, UnicodeNameInfo(L"ap", L"TildeTilde")),
266     make_pair(0x00002249, UnicodeNameInfo(L"nap", L"NotTildeTilde")),
267     make_pair(0x0000224A, UnicodeNameInfo(L"ape", L"approxeq")),
268     make_pair(0x0000224E, UnicodeNameInfo(L"bump", L"HumpDownHump")),
269     make_pair(0x0000224F, UnicodeNameInfo(L"nbump", L"NotHumpDownHump")),
270     make_pair(0x00002250, UnicodeNameInfo(L"esdot", L"DotEqual")),
271     make_pair(0x00002251, UnicodeNameInfo(L"eDot", L"doteqdot")),
272     make_pair(0x00002252, UnicodeNameInfo(L"efDot", L"fallingdotseq")),
273     make_pair(0x00002253, UnicodeNameInfo(L"erDot", L"risingdotseq")),
274     make_pair(0x00002256, UnicodeNameInfo(L"ecir", L"eqcirc")),
275     make_pair(0x00002257, UnicodeNameInfo(L"cire", L"circeq")),
276     make_pair(0x0000225C, UnicodeNameInfo(L"trie", L"triangleq")),
277     make_pair(0x00002260, UnicodeNameInfo(L"ne", L"NotEqual")),
278     make_pair(0x00002261, UnicodeNameInfo(L"equiv", L"Congruent")),
279     make_pair(0x00002262, UnicodeNameInfo(L"nequiv", L"NotCongruent")),
280     make_pair(0x00002264, UnicodeNameInfo(L"le", L"leq")),
281     make_pair(0x00002265, UnicodeNameInfo(L"ge", L"GreaterEqual")),
282     make_pair(0x00002266, UnicodeNameInfo(L"lE", L"LessFullEqual")),
283     make_pair(0x00002267, UnicodeNameInfo(L"gE", L"GreaterFullEqual")),
284     make_pair(0x00002268, UnicodeNameInfo(L"lnE", L"lneqq")),
285     make_pair(0x00002269, UnicodeNameInfo(L"gnE", L"gneqq")),
286     make_pair(0x0000226A, UnicodeNameInfo(L"Lt", L"NestedLessLess")),
287     make_pair(0x0000226B, UnicodeNameInfo(L"Gt", L"NestedGreaterGreater")),
288     make_pair(0x0000226C, UnicodeNameInfo(L"twixt", L"between")),
289     make_pair(0x0000226E, UnicodeNameInfo(L"nlt", L"NotLess")),
290     make_pair(0x0000226F, UnicodeNameInfo(L"ngt", L"NotGreater")),
291     make_pair(0x00002270, UnicodeNameInfo(L"nle", L"NotLessEqual")),
292     make_pair(0x00002271, UnicodeNameInfo(L"nge", L"NotGreaterEqual")),
293     make_pair(0x00002272, UnicodeNameInfo(L"lsim", L"LessTilde")),
294     make_pair(0x00002273, UnicodeNameInfo(L"gsim", L"GreaterTilde")),
295     make_pair(0x00002276, UnicodeNameInfo(L"lg", L"LessGreater")),
296     make_pair(0x00002277, UnicodeNameInfo(L"gl", L"GreaterLess")),
297     make_pair(0x0000227A, UnicodeNameInfo(L"pr", L"Precedes")),
298     make_pair(0x0000227B, UnicodeNameInfo(L"sc", L"Succeeds")),
299     make_pair(0x0000227C, UnicodeNameInfo(L"prcue", L"PrecedesSlantEqual")),
300     make_pair(0x0000227D, UnicodeNameInfo(L"sccue", L"SucceedsSlantEqual")),
301     make_pair(0x0000227E, UnicodeNameInfo(L"prsim", L"PrecedesTilde")),
302     make_pair(0x0000227F, UnicodeNameInfo(L"scsim", L"SucceedsTilde")),
303     make_pair(0x00002280, UnicodeNameInfo(L"npr", L"NotPrecedes")),
304     make_pair(0x00002281, UnicodeNameInfo(L"nsc", L"NotSucceeds")),
305     make_pair(0x00002282, UnicodeNameInfo(L"sub", L"subset")),
306     make_pair(0x00002283, UnicodeNameInfo(L"sup", L"supset")),
307     make_pair(0x00002284, UnicodeNameInfo(L"nsub")),
308     make_pair(0x00002285, UnicodeNameInfo(L"nsup")),
309     make_pair(0x00002286, UnicodeNameInfo(L"sube", L"SubsetEqual")),
310     make_pair(0x00002287, UnicodeNameInfo(L"supe", L"SupersetEqual")),
311     make_pair(0x00002288, UnicodeNameInfo(L"nsube", L"NotSubsetEqual")),
312     make_pair(0x00002289, UnicodeNameInfo(L"nsupe", L"NotSupersetEqual")),
313     make_pair(0x0000228A, UnicodeNameInfo(L"subne", L"subsetneq")),
314     make_pair(0x0000228B, UnicodeNameInfo(L"supne", L"supsetneq")),
315     make_pair(0x0000228E, UnicodeNameInfo(L"uplus", L"UnionPlus")),
316     make_pair(0x0000228F, UnicodeNameInfo(L"sqsub", L"SquareSubset")),
317     make_pair(0x00002290, UnicodeNameInfo(L"sqsup", L"SquareSuperset")),
318     make_pair(0x00002291, UnicodeNameInfo(L"sqsube", L"SquareSubsetEqual")),
319     make_pair(0x00002292, UnicodeNameInfo(L"sqsupe", L"SquareSupersetEqual")),
320     make_pair(0x00002293, UnicodeNameInfo(L"sqcap", L"SquareIntersection")),
321     make_pair(0x00002294, UnicodeNameInfo(L"sqcup", L"SquareUnion")),
322     make_pair(0x00002295, UnicodeNameInfo(L"oplus", L"CirclePlus")),
323     make_pair(0x00002296, UnicodeNameInfo(L"ominus", L"CircleMinus")),
324     make_pair(0x00002297, UnicodeNameInfo(L"otimes", L"CircleTimes")),
325     make_pair(0x00002298, UnicodeNameInfo(L"osol")),
326     make_pair(0x00002299, UnicodeNameInfo(L"odot", L"CircleDot")),
327     make_pair(0x0000229A, UnicodeNameInfo(L"ocir", L"circledcirc")),
328     make_pair(0x0000229B, UnicodeNameInfo(L"oast", L"circledast")),
329     make_pair(0x0000229D, UnicodeNameInfo(L"odash", L"circleddash")),
330     make_pair(0x0000229E, UnicodeNameInfo(L"plusb", L"boxplus")),
331     make_pair(0x0000229F, UnicodeNameInfo(L"minusb", L"boxminus")),
332     make_pair(0x000022A0, UnicodeNameInfo(L"timesb", L"boxtimes")),
333     make_pair(0x000022A1, UnicodeNameInfo(L"sdotb", L"dotsquare")),
334     make_pair(0x000022A2, UnicodeNameInfo(L"vdash", L"RightTee")),
335     make_pair(0x000022A3, UnicodeNameInfo(L"dashv", L"LeftTee")),
336     make_pair(0x000022A4, UnicodeNameInfo(L"top", L"DownTee")),
337     make_pair(0x000022A5, UnicodeNameInfo(L"bot", L"UpTee")),
338     make_pair(0x000022A7, UnicodeNameInfo(L"models")),
339     make_pair(0x000022A8, UnicodeNameInfo(L"vDash", L"DoubleRightTee")),
340     make_pair(0x000022A9, UnicodeNameInfo(L"Vdash")),
341     make_pair(0x000022AA, UnicodeNameInfo(L"Vvdash")),
342     make_pair(0x000022AC, UnicodeNameInfo(L"nvdash")),
343     make_pair(0x000022AD, UnicodeNameInfo(L"nvDash")),
344     make_pair(0x000022AE, UnicodeNameInfo(L"nVdash")),
345     make_pair(0x000022AF, UnicodeNameInfo(L"nVDash")),
346     make_pair(0x000022B2, UnicodeNameInfo(L"vltri", L"LeftTriangle")),
347     make_pair(0x000022B3, UnicodeNameInfo(L"vrtri", L"RightTriangle")),
348     make_pair(0x000022B4, UnicodeNameInfo(L"ltrie", L"LeftTriangleEqual")),
349     make_pair(0x000022B5, UnicodeNameInfo(L"rtrie", L"RightTriangleEqual")),
350     make_pair(0x000022B8, UnicodeNameInfo(L"mumap", L"multimap")),
351     make_pair(0x000022BA, UnicodeNameInfo(L"intcal", L"intercal")),
352     make_pair(0x000022BB, UnicodeNameInfo(L"veebar")),
353     make_pair(0x000022C0, UnicodeNameInfo(L"xwedge", L"Wedge")),
354     make_pair(0x000022C1, UnicodeNameInfo(L"xvee", L"Vee")),
355     make_pair(0x000022C2, UnicodeNameInfo(L"xcap", L"Intersection")),
356     make_pair(0x000022C3, UnicodeNameInfo(L"xcup", L"Union")),
357     make_pair(0x000022C4, UnicodeNameInfo(L"diam", L"Diamond")),
358     make_pair(0x000022C5, UnicodeNameInfo(L"sdot")),
359     make_pair(0x000022C6, UnicodeNameInfo(L"Star")),
360     make_pair(0x000022C7, UnicodeNameInfo(L"divonx", L"divideontimes")),
361     make_pair(0x000022C8, UnicodeNameInfo(L"bowtie")),
362     make_pair(0x000022C9, UnicodeNameInfo(L"ltimes")),
363     make_pair(0x000022CA, UnicodeNameInfo(L"rtimes")),
364     make_pair(0x000022CB, UnicodeNameInfo(L"lthree", L"leftthreetimes")),
365     make_pair(0x000022CC, UnicodeNameInfo(L"rthree", L"rightthreetimes")),
366     make_pair(0x000022CD, UnicodeNameInfo(L"bsime", L"backsimeq")),
367     make_pair(0x000022CE, UnicodeNameInfo(L"cuvee", L"curlyvee")),
368     make_pair(0x000022CF, UnicodeNameInfo(L"cuwed", L"curlywedge")),
369     make_pair(0x000022D0, UnicodeNameInfo(L"Sub", L"Subset")),
370     make_pair(0x000022D1, UnicodeNameInfo(L"Sup", L"Supset")),
371     make_pair(0x000022D2, UnicodeNameInfo(L"Cap")),
372     make_pair(0x000022D3, UnicodeNameInfo(L"Cup")),
373     make_pair(0x000022D4, UnicodeNameInfo(L"fork", L"pitchfork")),
374     make_pair(0x000022D6, UnicodeNameInfo(L"ltdot", L"lessdot")),
375     make_pair(0x000022D7, UnicodeNameInfo(L"gtdot", L"gtrdot")),
376     make_pair(0x000022D8, UnicodeNameInfo(L"Ll")),
377     make_pair(0x000022D9, UnicodeNameInfo(L"Gg")),
378     make_pair(0x000022DA, UnicodeNameInfo(L"leg", L"LessEqualGreater")),
379     make_pair(0x000022DB, UnicodeNameInfo(L"gel", L"GreaterEqualLess")),
380     make_pair(0x000022DE, UnicodeNameInfo(L"cuepr", L"curlyeqprec")),
381     make_pair(0x000022DF, UnicodeNameInfo(L"cuesc", L"curlyeqsucc")),
382     make_pair(0x000022E2, UnicodeNameInfo(L"nsqsube", L"NotSquareSubsetEqual")),
383     make_pair(0x000022E3, UnicodeNameInfo(L"nsqsupe", L"NotSquareSupersetEqual")),
384     make_pair(0x000022E6, UnicodeNameInfo(L"lnsim")),
385     make_pair(0x000022E7, UnicodeNameInfo(L"gnsim")),
386     make_pair(0x000022E8, UnicodeNameInfo(L"prnsim", L"precnsim")),
387     make_pair(0x000022E9, UnicodeNameInfo(L"scnsim", L"succnsim")),
388     make_pair(0x000022EA, UnicodeNameInfo(L"nltri", L"NotLeftTriangle")),
389     make_pair(0x000022EB, UnicodeNameInfo(L"nrtri", L"NotRightTriangle")),
390     make_pair(0x000022EC, UnicodeNameInfo(L"nltrie", L"NotLeftTriangleEqual")),
391     make_pair(0x000022ED, UnicodeNameInfo(L"nrtrie", L"NotRightTriangleEqual")),
392     make_pair(0x000022EE, UnicodeNameInfo(L"vellip")),
393     make_pair(0x000022EF, UnicodeNameInfo(L"ctdot")),
394     make_pair(0x000022F1, UnicodeNameInfo(L"dtdot")),
395     make_pair(0x00002305, UnicodeNameInfo(L"barwed", L"barwedge")),
396     make_pair(0x00002306, UnicodeNameInfo(L"Barwed", L"doublebarwedge")),
397     make_pair(0x00002308, UnicodeNameInfo(L"lceil", L"LeftCeiling")),
398     make_pair(0x00002309, UnicodeNameInfo(L"rceil", L"RightCeiling")),
399     make_pair(0x0000230A, UnicodeNameInfo(L"lfloor", L"LeftFloor")),
400     make_pair(0x0000230B, UnicodeNameInfo(L"rfloor", L"RightFloor")),
401     make_pair(0x0000231C, UnicodeNameInfo(L"ulcorn", L"ulcorner")),
402     make_pair(0x0000231D, UnicodeNameInfo(L"urcorn", L"urcorner")),
403     make_pair(0x0000231E, UnicodeNameInfo(L"dlcorn", L"llcorner")),
404     make_pair(0x0000231F, UnicodeNameInfo(L"drcorn", L"lrcorner")),
405     make_pair(0x00002322, UnicodeNameInfo(L"frown", L"sfrown")),
406     make_pair(0x00002323, UnicodeNameInfo(L"smile", L"ssmile")),
407     make_pair(0x00002329, UnicodeNameInfo(L"lang", L"LeftAngleBracket")),
408     make_pair(0x0000232A, UnicodeNameInfo(L"rang", L"RightAngleBracket")),
409     make_pair(0x000023B5, UnicodeNameInfo(L"bbrk", L"UnderBracket")),
410     make_pair(0x000024C8, UnicodeNameInfo(L"oS", L"circledS")),
411     make_pair(0x000025A1, UnicodeNameInfo(L"squ", L"Square")),
412     make_pair(0x000025B3, UnicodeNameInfo(L"xutri", L"bigtriangleup")),
413     make_pair(0x000025B4, UnicodeNameInfo(L"utrif", L"blacktriangle")),
414     make_pair(0x000025B5, UnicodeNameInfo(L"utri", L"triangle")),
415     make_pair(0x000025B6, UnicodeNameInfo()),
416     make_pair(0x000025B9, UnicodeNameInfo(L"rtri", L"triangleright")),
417     make_pair(0x000025BD, UnicodeNameInfo(L"xdtri", L"bigtriangledown")),
418     make_pair(0x000025BE, UnicodeNameInfo(L"dtrif", L"blacktriangledown")),
419     make_pair(0x000025BF, UnicodeNameInfo(L"dtri", L"triangledown")),
420     make_pair(0x000025C0, UnicodeNameInfo()),
421     make_pair(0x000025C3, UnicodeNameInfo(L"ltri", L"triangleleft")),
422     make_pair(0x000025CA, UnicodeNameInfo(L"loz", L"lozenge")),
423     make_pair(0x000025EF, UnicodeNameInfo(L"xcirc", L"bigcirc")),
424     make_pair(0x000025FC, UnicodeNameInfo(L"FilledSmallSquare")),
425     make_pair(0x00002605, UnicodeNameInfo(L"starf", L"bigstar")),
426     make_pair(0x00002660, UnicodeNameInfo(L"spades", L"spadesuit")),
427     make_pair(0x00002663, UnicodeNameInfo(L"clubs", L"clubsuit")),
428     make_pair(0x00002665, UnicodeNameInfo(L"hearts", L"heartsuit")),
429     make_pair(0x00002666, UnicodeNameInfo(L"diams", L"diamondsuit")),
430     make_pair(0x0000266D, UnicodeNameInfo(L"flat")),
431     make_pair(0x0000266E, UnicodeNameInfo(L"natur", L"natural")),
432     make_pair(0x0000266F, UnicodeNameInfo(L"sharp")),
433     make_pair(0x00002713, UnicodeNameInfo(L"check", L"checkmark")),
434     make_pair(0x00002720, UnicodeNameInfo(L"malt", L"maltese")),
435     make_pair(0x000027F5, UnicodeNameInfo(L"xlarr", L"LongLeftArrow")),
436     make_pair(0x000027F6, UnicodeNameInfo(L"xrarr", L"LongRightArrow")),
437     make_pair(0x000027F7, UnicodeNameInfo(L"xharr", L"LongLeftRightArrow")),
438     make_pair(0x000027F8, UnicodeNameInfo(L"xlArr", L"DoubleLongLeftArrow")),
439     make_pair(0x000027F9, UnicodeNameInfo(L"xrArr", L"DoubleLongRightArrow")),
440     make_pair(0x000027FA, UnicodeNameInfo(L"xhArr", L"DoubleLongLeftRightArrow")),
441     make_pair(0x000027FC, UnicodeNameInfo(L"xmap", L"longMapsto")),
442     make_pair(0x0000290E, UnicodeNameInfo(L"lBarr")),
443     make_pair(0x0000290F, UnicodeNameInfo(L"rBarr", L"dbkarow")),
444     make_pair(0x000029EB, UnicodeNameInfo(L"lozf", L"blacklozenge")),
445     make_pair(0x00002A00, UnicodeNameInfo(L"xodot", L"bigodot")),
446     make_pair(0x00002A01, UnicodeNameInfo(L"xoplus", L"bigoplus")),
447     make_pair(0x00002A02, UnicodeNameInfo(L"xotime", L"bigotimes")),
448     make_pair(0x00002A04, UnicodeNameInfo(L"xuplus", L"biguplus")),
449     make_pair(0x00002A06, UnicodeNameInfo(L"xsqcup", L"bigsqcup")),
450     make_pair(0x00002A0C, UnicodeNameInfo(L"qint", L"iiiint")),
451     make_pair(0x00002A2F, UnicodeNameInfo(L"Cross")),
452     make_pair(0x00002A3F, UnicodeNameInfo(L"amalg")),
453     make_pair(0x00002A7D, UnicodeNameInfo(L"les", L"LessSlantEqual")),
454     make_pair(0x00002A7E, UnicodeNameInfo(L"ges", L"GreaterSlantEqual")),
455     make_pair(0x00002A85, UnicodeNameInfo(L"lap", L"lessapprox")),
456     make_pair(0x00002A86, UnicodeNameInfo(L"gap", L"gtrapprox")),
457     make_pair(0x00002A89, UnicodeNameInfo(L"lnap", L"lnapprox")),
458     make_pair(0x00002A8A, UnicodeNameInfo(L"gnap", L"gnapprox")),
459     make_pair(0x00002A8B, UnicodeNameInfo(L"lEg", L"lesseqqgtr")),
460     make_pair(0x00002A8C, UnicodeNameInfo(L"gEl", L"gtreqqless")),
461     make_pair(0x00002A95, UnicodeNameInfo(L"els", L"eqslantless")),
462     make_pair(0x00002A96, UnicodeNameInfo(L"egs", L"eqslantgtr")),
463     make_pair(0x00002AAF, UnicodeNameInfo(L"pre", L"PrecedesEqual")),
464     make_pair(0x00002AB0, UnicodeNameInfo(L"sce", L"SucceedsEqual")),
465     make_pair(0x00002AB5, UnicodeNameInfo(L"prnE", L"precneqq")),
466     make_pair(0x00002AB6, UnicodeNameInfo(L"scnE", L"succneqq")),
467     make_pair(0x00002AB7, UnicodeNameInfo(L"prap", L"precapprox")),
468     make_pair(0x00002AB8, UnicodeNameInfo(L"scap", L"succapprox")),
469     make_pair(0x00002AB9, UnicodeNameInfo(L"prnap", L"precnapprox")),
470     make_pair(0x00002ABA, UnicodeNameInfo(L"scnap", L"succnapprox")),
471     make_pair(0x00002AC5, UnicodeNameInfo(L"subE", L"subseteqq")),
472     make_pair(0x00002AC6, UnicodeNameInfo(L"supE", L"supseteqq")),
473     make_pair(0x00002ACB, UnicodeNameInfo(L"subnE", L"subsetneqq")),
474     make_pair(0x00002ACC, UnicodeNameInfo(L"supnE", L"supsetneqq")),
475     make_pair(0x0000FE00, UnicodeNameInfo()),        // FIX: think about this combining character...
476     make_pair(0x0000FE37, UnicodeNameInfo(L"OverBrace")),
477     make_pair(0x0000FE38, UnicodeNameInfo(L"UnderBrace")),
478     make_pair(0x0001D49C, UnicodeNameInfo(L"Ascr")),
479     make_pair(0x0001D49E, UnicodeNameInfo(L"Cscr")),
480     make_pair(0x0001D49F, UnicodeNameInfo(L"Dscr")),
481     make_pair(0x0001D4A2, UnicodeNameInfo(L"Gscr")),
482     make_pair(0x0001D4A5, UnicodeNameInfo(L"Jscr")),
483     make_pair(0x0001D4A6, UnicodeNameInfo(L"Kscr")),
484     make_pair(0x0001D4A9, UnicodeNameInfo(L"Nscr")),
485     make_pair(0x0001D4AA, UnicodeNameInfo(L"Oscr")),
486     make_pair(0x0001D4AB, UnicodeNameInfo(L"Pscr")),
487     make_pair(0x0001D4AC, UnicodeNameInfo(L"Qscr")),
488     make_pair(0x0001D4AE, UnicodeNameInfo(L"Sscr")),
489     make_pair(0x0001D4AF, UnicodeNameInfo(L"Tscr")),
490     make_pair(0x0001D4B0, UnicodeNameInfo(L"Uscr")),
491     make_pair(0x0001D4B1, UnicodeNameInfo(L"Vscr")),
492     make_pair(0x0001D4B2, UnicodeNameInfo(L"Wscr")),
493     make_pair(0x0001D4B3, UnicodeNameInfo(L"Xscr")),
494     make_pair(0x0001D4B4, UnicodeNameInfo(L"Yscr")),
495     make_pair(0x0001D4B5, UnicodeNameInfo(L"Zscr")),
496     make_pair(0x0001D4D0, UnicodeNameInfo()),    // mathematical bold script capitals
497     make_pair(0x0001D4D1, UnicodeNameInfo()),
498     make_pair(0x0001D4D2, UnicodeNameInfo()),
499     make_pair(0x0001D4D3, UnicodeNameInfo()),
500     make_pair(0x0001D4D4, UnicodeNameInfo()),
501     make_pair(0x0001D4D5, UnicodeNameInfo()),
502     make_pair(0x0001D4D6, UnicodeNameInfo()),
503     make_pair(0x0001D4D7, UnicodeNameInfo()),
504     make_pair(0x0001D4D8, UnicodeNameInfo()),
505     make_pair(0x0001D4D9, UnicodeNameInfo()),
506     make_pair(0x0001D4DA, UnicodeNameInfo()),
507     make_pair(0x0001D4DB, UnicodeNameInfo()),
508     make_pair(0x0001D4DC, UnicodeNameInfo()),
509     make_pair(0x0001D4DD, UnicodeNameInfo()),
510     make_pair(0x0001D4DE, UnicodeNameInfo()),
511     make_pair(0x0001D4DF, UnicodeNameInfo()),
512     make_pair(0x0001D4E0, UnicodeNameInfo()),
513     make_pair(0x0001D4E1, UnicodeNameInfo()),
514     make_pair(0x0001D4E2, UnicodeNameInfo()),
515     make_pair(0x0001D4E3, UnicodeNameInfo()),
516     make_pair(0x0001D4E4, UnicodeNameInfo()),
517     make_pair(0x0001D4E5, UnicodeNameInfo()),
518     make_pair(0x0001D4E6, UnicodeNameInfo()),
519     make_pair(0x0001D4E7, UnicodeNameInfo()),
520     make_pair(0x0001D4E8, UnicodeNameInfo()),
521     make_pair(0x0001D4E9, UnicodeNameInfo()),
522     make_pair(0x0001D504, UnicodeNameInfo(L"Afr")),
523     make_pair(0x0001D505, UnicodeNameInfo(L"Bfr")),
524     make_pair(0x0001D507, UnicodeNameInfo(L"Dfr")),
525     make_pair(0x0001D508, UnicodeNameInfo(L"Efr")),
526     make_pair(0x0001D509, UnicodeNameInfo(L"Ffr")),
527     make_pair(0x0001D50A, UnicodeNameInfo(L"Gfr")),
528     make_pair(0x0001D50D, UnicodeNameInfo(L"Jfr")),
529     make_pair(0x0001D50E, UnicodeNameInfo(L"Kfr")),
530     make_pair(0x0001D50F, UnicodeNameInfo(L"Lfr")),
531     make_pair(0x0001D510, UnicodeNameInfo(L"Mfr")),
532     make_pair(0x0001D511, UnicodeNameInfo(L"Nfr")),
533     make_pair(0x0001D512, UnicodeNameInfo(L"Ofr")),
534     make_pair(0x0001D513, UnicodeNameInfo(L"Pfr")),
535     make_pair(0x0001D514, UnicodeNameInfo(L"Qfr")),
536     make_pair(0x0001D516, UnicodeNameInfo(L"Sfr")),
537     make_pair(0x0001D517, UnicodeNameInfo(L"Tfr")),
538     make_pair(0x0001D518, UnicodeNameInfo(L"Ufr")),
539     make_pair(0x0001D519, UnicodeNameInfo(L"Vfr")),
540     make_pair(0x0001D51A, UnicodeNameInfo(L"Wfr")),
541     make_pair(0x0001D51B, UnicodeNameInfo(L"Xfr")),
542     make_pair(0x0001D51C, UnicodeNameInfo(L"Yfr")),
543     make_pair(0x0001D51E, UnicodeNameInfo(L"afr")),
544     make_pair(0x0001D51F, UnicodeNameInfo(L"bfr")),
545     make_pair(0x0001D520, UnicodeNameInfo(L"cfr")),
546     make_pair(0x0001D521, UnicodeNameInfo(L"dfr")),
547     make_pair(0x0001D522, UnicodeNameInfo(L"efr")),
548     make_pair(0x0001D523, UnicodeNameInfo(L"ffr")),
549     make_pair(0x0001D524, UnicodeNameInfo(L"gfr")),
550     make_pair(0x0001D525, UnicodeNameInfo(L"hfr")),
551     make_pair(0x0001D526, UnicodeNameInfo(L"ifr")),
552     make_pair(0x0001D527, UnicodeNameInfo(L"jfr")),
553     make_pair(0x0001D528, UnicodeNameInfo(L"kfr")),
554     make_pair(0x0001D529, UnicodeNameInfo(L"lfr")),
555     make_pair(0x0001D52A, UnicodeNameInfo(L"mfr")),
556     make_pair(0x0001D52B, UnicodeNameInfo(L"nfr")),
557     make_pair(0x0001D52C, UnicodeNameInfo(L"ofr")),
558     make_pair(0x0001D52D, UnicodeNameInfo(L"pfr")),
559     make_pair(0x0001D52E, UnicodeNameInfo(L"qfr")),
560     make_pair(0x0001D52F, UnicodeNameInfo(L"rfr")),
561     make_pair(0x0001D530, UnicodeNameInfo(L"sfr")),
562     make_pair(0x0001D531, UnicodeNameInfo(L"tfr")),
563     make_pair(0x0001D532, UnicodeNameInfo(L"ufr")),
564     make_pair(0x0001D533, UnicodeNameInfo(L"vfr")),
565     make_pair(0x0001D534, UnicodeNameInfo(L"wfr")),
566     make_pair(0x0001D535, UnicodeNameInfo(L"xfr")),
567     make_pair(0x0001D536, UnicodeNameInfo(L"yfr")),
568     make_pair(0x0001D537, UnicodeNameInfo(L"zfr")),
569     make_pair(0x0001D538, UnicodeNameInfo(L"Aopf")),
570     make_pair(0x0001D539, UnicodeNameInfo(L"Bopf")),
571     make_pair(0x0001D53B, UnicodeNameInfo(L"Dopf")),
572     make_pair(0x0001D53C, UnicodeNameInfo(L"Eopf")),
573     make_pair(0x0001D53D, UnicodeNameInfo(L"Fopf")),
574     make_pair(0x0001D53E, UnicodeNameInfo(L"Gopf")),
575     make_pair(0x0001D540, UnicodeNameInfo(L"Iopf")),
576     make_pair(0x0001D541, UnicodeNameInfo(L"Jopf")),
577     make_pair(0x0001D542, UnicodeNameInfo(L"Kopf")),
578     make_pair(0x0001D543, UnicodeNameInfo(L"Lopf")),
579     make_pair(0x0001D544, UnicodeNameInfo(L"Mopf")),
580     make_pair(0x0001D546, UnicodeNameInfo(L"Oopf")),
581     make_pair(0x0001D54A, UnicodeNameInfo(L"Sopf")),
582     make_pair(0x0001D54B, UnicodeNameInfo(L"Topf")),
583     make_pair(0x0001D54C, UnicodeNameInfo(L"Uopf")),
584     make_pair(0x0001D54D, UnicodeNameInfo(L"Vopf")),
585     make_pair(0x0001D54E, UnicodeNameInfo(L"Wopf")),
586     make_pair(0x0001D54F, UnicodeNameInfo(L"Xopf")),
587     make_pair(0x0001D550, UnicodeNameInfo(L"Yopf")),
588     make_pair(0x0001D55C, UnicodeNameInfo(L"kopf")),
589     make_pair(0x0001D6A5, UnicodeNameInfo())
590 };
591 
592 wishful_hash_map<uint32_t, UnicodeNameInfo> gUnicodeNameTable(
593     gUnicodeNameArray,
594     END_ARRAY(gUnicodeNameArray)
595 );
596 
597 
598 // FIX:
599 // Need to read about and think about combining characters.
600 // In particular, does the current strategy work for *named* entities
601 // and combining characters? I'm not sure.
602 
603 
604 // XmlEncode() handles conversion of non-ASCII characters to entities.
605 // It uses the "options" parameter and gUnicodeNameTable to decide how to
606 // translate each character.
XmlEncode(const wstring & input,const EncodingOptions & options)607 wstring XmlEncode(
608     const wstring& input,
609     const EncodingOptions& options
610 )
611 {
612     wostringstream os;
613 #ifdef WCHAR_T_IS_16BIT
614     wchar_t surrogate_upper = 0;
615 #endif
616 
617     for (wstring::const_iterator
618         ptr = input.begin(); ptr != input.end(); ptr++
619     )
620     {
621         if (*ptr == L'&')
622             os << L"&amp;";
623         else if (*ptr == L'<')
624             os << L"&lt;";
625         else if (*ptr == L'>')
626             os << L"&gt;";
627         else if (*ptr <= 0x7F)
628             os << *ptr;
629 #ifdef WCHAR_T_IS_16BIT
630         else if (static_cast<wchar_t>(0xD800) <= *ptr &&
631                  *ptr < static_cast<wchar_t>(0xDC00)) {
632             surrogate_upper = *ptr;
633             continue;
634         }
635 #endif
636         else
637         {
638             uint32_t chara = (uint32_t)*ptr;
639 #ifdef WCHAR_T_IS_16BIT
640             if (0xDC00 <= chara && chara < 0xDF00) {
641                 if (surrogate_upper == 0) {
642                     continue;
643                 }
644                 chara &= 0x3FF;
645                 chara |= ((uint32_t)surrogate_upper & 0x000003FF) << 10;
646                 chara += 0x00010000;
647             }
648 #endif
649             wishful_hash_map<uint32_t, UnicodeNameInfo>::const_iterator
650                 search = gUnicodeNameTable.find(chara);
651 
652             if (search == gUnicodeNameTable.end())
653             {
654                 if (options.mOtherEncodingRaw) {
655 #ifdef WCHAR_T_IS_16BIT
656                     if (surrogate_upper)
657                         os << surrogate_upper;
658 #endif
659                     os << *ptr;
660                 }
661                 else
662                     os << L"&#x" << hex << chara << L";";
663             }
664             else
665             {
666                 EncodingOptions::MathmlEncoding encoding
667                     = options.mMathmlEncoding;
668 
669                 // Deal with plane-1 characters.
670                 if (!options.mAllowPlane1 && chara >= 0x10000 &&
671                     (
672                         encoding == EncodingOptions::cMathmlEncodingNumeric
673                         ||
674                         encoding == EncodingOptions::cMathmlEncodingRaw
675                     )
676                 )
677                 {
678                     encoding = EncodingOptions::cMathmlEncodingShort;
679                 }
680 
681                 // Notice the missing "break"s in this switch statement.
682                 // We are falling back on other encoding methods if certain
683                 // ones aren't available.
684                 switch (encoding)
685                 {
686                     case EncodingOptions::cMathmlEncodingLong:
687                         if (!search->second.mLongName.empty())
688                         {
689                             os << L"&" << search->second.mLongName << L";";
690                             break;
691                         }
692 
693                     case EncodingOptions::cMathmlEncodingShort:
694                         if (!search->second.mShortName.empty())
695                         {
696                             os << L"&" << search->second.mShortName << L";";
697                             break;
698                         }
699 
700                     case EncodingOptions::cMathmlEncodingNumeric:
701                         os << L"&#x" << hex << chara << L";";
702                         break;
703 
704                     case EncodingOptions::cMathmlEncodingRaw:
705 #ifdef WCHAR_T_IS_16BIT
706                         if (surrogate_upper)
707                             os << surrogate_upper;
708 #endif
709                         os << *ptr;
710                         break;
711                 }
712 
713             }
714         }
715 #ifdef WCHAR_T_IS_16BIT
716         surrogate_upper = 0;
717 #endif
718     }
719 
720     return os.str();
721 }
722 
723 }
724 
725 // end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
726