1 /* wvWare
2  * Copyright (C) Caolan McNamara, Dom Lachowicz, and others
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17  * 02111-1307, USA.
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <errno.h>
28 #include "wv.h"
29 #include <glib.h>
30 
31 int (*wvConvertUnicodeToEntity) (U16 char16) = NULL;
32 
33 /* enough word docs use the cp1252 encoding and enough iconv
34  * implementations don't include it that it's worth special-
35  * casing */
36 static const U16 cp1252_to_ucs2_table[] = {
37   0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007
38 , 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F
39 , 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017
40 , 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F
41 , 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027
42 , 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F
43 , 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037
44 , 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F
45 , 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047
46 , 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F
47 , 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057
48 , 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F
49 , 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067
50 , 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F
51 , 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077
52 , 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F
53 , 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021
54 , 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000
55 , 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014
56 , 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178
57 , 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7
58 , 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF
59 , 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7
60 , 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF
61 , 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7
62 , 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF
63 , 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7
64 , 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF
65 , 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7
66 , 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF
67 , 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7
68 , 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
69 };
70 
71 U16
wvnLocaleToLIDConverter(U8 nLocale)72 wvnLocaleToLIDConverter (U8 nLocale)
73 {
74     switch (nLocale)
75       {
76 #if 0
77 	/* case 0: */		/* ANSI_CHARSET */
78 	/* case 1: */		/* DEFAULT_CHARSET */
79 	/* case 2: */		/* SYMBOL_CHARSET */
80 #endif
81 	  case 77:			/* MAC_CHARSET */
82 	  return (0xFFF);	/* This number is a hack */
83 	  case 128:			/* SHIFTJIS_CHARSET */
84 	  return (0x411);	/* Japanese */
85 	  case 129:			/* HANGEUL_CHARSET */
86 	  return (0x412);	/* Korean */
87 	  case 130:			/* JOHAB_CHARSET */
88 	  return (0x812);	/* Korean (Johab) */
89 	  case 134:			/* GB2312_CHARSET - Chinese Simplified */
90 	  return (0x804);	/* China PRC - And others!! */
91 	  case 136:			/* CHINESEBIG5_CHARSET - Chinese Traditional */
92 	  return (0x404);	/* Taiwan - And others!! */
93 	  case 161:			/* GREEK_CHARSET */
94 	  return (0x408);	/* Greek */
95 	  case 162:			/* TURKISH_CHARSET */
96 	  return (0x41f);	/* Turkish */
97 	  case 163:			/* VIETNAMESE_CHARSET */
98 	  return (0x42a);	/* Vietnamese */
99 	  case 177:			/* HEBREW_CHARSET */
100 	  return (0x40d);	/* Hebrew */
101 	  case 178:			/* ARABIC_CHARSET */
102 	  return (0x01);	/* Arabic */
103 	  case 186:			/* BALTIC_CHARSET */
104 	  return (0x425);	/* Estonian - And others!! */
105 	  case 204:			/* RUSSIAN_CHARSET */
106 	  return (0x419);	/* Russian - And others!! */
107 	  case 222:			/* THAI_CHARSET */
108 	  return (0x41e);	/* Thai */
109 	  case 238:			/* EASTEUROPE_CHARSET */
110 	  return (0x405);	/* Czech - And many others!! */
111 
112 #if 0
113 	  /* case 255: */		/* OEM_CHARSET */
114 #endif
115 
116       default:
117 	  return (0x0);
118       }
119     return (0x0);
120 }
121 
122 int
wvOutputTextChar(U16 eachchar,U8 chartype,wvParseStruct * ps,CHP * achp)123 wvOutputTextChar (U16 eachchar, U8 chartype, wvParseStruct * ps, CHP * achp)
124 {
125     U16 lid = 0;
126 
127     wvVersion v = wvQuerySupported (&ps->fib, NULL);
128 
129     /* testing adding a language */
130 
131     /* For version <= WORD7, The charset used could
132      * depend on the font's charset.
133      */
134     if ((v <= WORD7) && (!ps->fib.fFarEast))
135       {
136 	  FFN currentfont;
137 
138 	  if (ps->fonts.ffn == NULL)
139           {
140 	      lid = 0;
141 	  }
142 	  else
143           {
144 	  	currentfont = ps->fonts.ffn[achp->ftc];
145 	  	/* Return 0 if no match */
146 	  	lid = wvnLocaleToLIDConverter (currentfont.chs);
147 	  }
148       }
149 
150     if ((v > WORD6) && !lid)
151 	lid = achp->lidDefault;
152 
153     /* No lidDefault for ver < WORD6 */
154     if (lid == 0x400 || lid == 0)
155 	lid = ps->fib.lid;
156 
157     /* end testing adding a language */
158 
159     if (achp->fSpec)
160       {
161 	  /*
162 	     if the character is still one of the special ones then call this other
163 	     handler
164 	     instead
165 	   */
166 	  if (ps->scharhandler)
167 	      return ((*(ps->scharhandler)) (ps, eachchar, achp));
168       }
169     else
170       {
171 	  /* Most Chars go through this baby */
172 	  if (ps->charhandler)
173 	    {
174 		if (!((v == WORD7 || v == WORD6) && ps->fib.fFarEast))
175 		    if (v <= WORD7)
176 		      {
177 			  /* versions <= 7 do not use unicode. versions >= 8 always do */
178 			  /* versions 7 and 6 use unicode iff the far-east flag is set */
179 			  chartype = 1;
180 		      }
181 
182 		return ((*(ps->charhandler)) (ps, eachchar, chartype, lid));
183 	    }
184       }
185     wvError (("No CharHandler registered, programmer error\n"));
186     return (0);
187 }
188 
189 void
wvOutputHtmlChar(U16 eachchar,U8 chartype,char * outputtype,U16 lid)190 wvOutputHtmlChar (U16 eachchar, U8 chartype, char *outputtype, U16 lid)
191 {
192     if (chartype)
193 	eachchar = wvHandleCodePage (eachchar, lid);
194     wvOutputFromUnicode (eachchar, outputtype);
195 }
196 
197 #define CPNAME_OR_FALLBACK(name,fallbackname)  \
198 {      \
199        static char* cpname = NULL;                             \
200        if (!cpname)    \
201                {       \
202                        GIConv cd = g_iconv_open(name,name);     \
203                        if (cd==(GIConv)-1)    \
204                                {       \
205                                        cpname = fallbackname;  \
206                                }       \
207                        else    \
208                                {       \
209                                        cpname = name;  \
210                                        g_iconv_close(cd);        \
211                                }       \
212                };      \
213        return cpname;  \
214 }
215 
216 typedef struct {
217 	const char * language_tag ;
218 	U16 lid ;
219 } wvLanguageId ;
220 
221 static const wvLanguageId mLanguageIds[] =
222 	{
223 		{ "-none-", 0x0000 }, /* none (language neutral) */
224 		{ "-none-", 0x0400 }, /* none */
225 		{ "af-ZA", 0x0436 }, /* Afrikaans */
226 		{ "am", 0x045e }, /* Amharic */
227 		{ "sq-AL", 0x041c }, /* Albanian */
228 		{ "ar-SA", 0x0401 }, /* Arabic (Saudi) */
229 		{ "ar-IQ", 0x0801 }, /* Arabic (Iraq) */
230 		{ "ar-EG", 0x0c01 }, /* Arabic (Egypt) */
231 		{ "ar-LY", 0x1001 }, /* Arabic (Libya) */
232 		{ "ar-DZ", 0x1401 }, /* Arabic (Algeria) */
233 		{ "ar-MA", 0x1801 }, /* Arabic (Morocco) */
234 		{ "ar-TN", 0x1c01 }, /* Arabic (Tunisia) */
235 		{ "ar-OM", 0x2001 }, /* Arabic (Oman) */
236 		{ "ar-YE", 0x2401 }, /* Arabic (Yemen) */
237 		{ "ar-SY", 0x2801 }, /* Arabic (Syria) */
238 		{ "ar-JO", 0x2c01 }, /* Arabic (Jordan) */
239 		{ "ar-LB", 0x3001 }, /* Arabic (Lebanon) */
240 		{ "ar-KW", 0x3401 }, /* Arabic (Kuwait) */
241 		{ "ar-AE", 0x3801 }, /* Arabic (United Arab Emirates) */
242 		{ "ar-BH", 0x3c01 }, /* Arabic (Bahrain) */
243 		{ "ar-QA", 0x4001 }, /* Arabic (Qatar) */
244 		{ "as", 0x044d }, /* Assamese */
245 		{ "az", 0x042c }, /* Azerbaijani */
246 		{ "hy-AM", 0x042b }, /* Armenian */
247 		{ "az", 0x044c }, /* Azeri (Latin) az- */
248 		{ "az", 0x082c }, /* Azeri (Cyrillic) az- */
249 		{ "eu-ES", 0x042d }, /* Basque */
250 		{ "be-BY", 0x0423 }, /* Belarussian */
251 		{ "bn", 0x0445 }, /* Bengali bn- */
252 		{ "bg-BG", 0x0402 }, /* Bulgarian */
253 		{ "ca-ES", 0x0403 }, /* Catalan */
254 		{ "zh-TW", 0x0404 }, /* Chinese (Taiwan) */
255 		{ "zh-CN", 0x0804 }, /* Chinese (PRC) */
256 		{ "zh-HK", 0x0c04 }, /* Chinese (Hong Kong) */
257 		{ "zh-SG", 0x1004 }, /* Chinese (Singapore) */
258 		{ "ch-MO", 0x1404 }, /* Chinese (Macau SAR) */
259 		{ "hr-HR", 0x041a }, /* Croatian */
260 		{ "cs-CZ", 0x0405 }, /* Czech */
261 		{ "da-DK", 0x0406 }, /* Danish */
262 		{ "div", 0x465 }, /* Divehi div-*/
263 		{ "nl-NL", 0x0413 }, /* Dutch (Netherlands) */
264 		{ "nl-BE", 0x0813 }, /* Dutch (Belgium) */
265 		{ "en-US", 0x0409 }, /* English (USA) */
266 		{ "en-GB", 0x0809 }, /* English (UK) */
267 		{ "en-AU", 0x0c09 }, /* English (Australia) */
268 		{ "en-CA", 0x1009 }, /* English (Canada) */
269 		{ "en-NZ", 0x1409 }, /* English (New Zealand) */
270 		{ "en-IE", 0x1809 }, /* English (Ireland) */
271 		{ "en-ZA", 0x1c09 }, /* English (South Africa) */
272 		{ "en-JM", 0x2009 }, /* English (Jamaica) */
273 		{ "en", 0x2409 }, /* English (Caribbean) */
274 		{ "en-BZ", 0x2809 }, /* English (Belize) */
275 		{ "en-TT", 0x2c09 }, /* English (Trinidad) */
276 		{ "en-ZW", 0x3009 }, /* English (Zimbabwe) */
277 		{ "en-PH", 0x3409 }, /* English (Phillipines) */
278 		{ "et-EE", 0x0425 }, /* Estonian */
279 		{ "fo", 0x0438 }, /* Faeroese fo- */
280 		{ "fa-IR", 0x0429 }, /* Farsi */
281 		{ "fi-FI", 0x040b }, /* Finnish */
282 		{ "fr-FR", 0x040c }, /* French (France) */
283 		{ "fr-BE", 0x080c }, /* French (Belgium) */
284 		{ "fr-CA", 0x0c0c }, /* French (Canada) */
285 		{ "fr-CH", 0x100c }, /* French (Switzerland) */
286 		{ "fr-LU", 0x140c }, /* French (Luxembourg) */
287 		{ "fr-MC", 0x180c }, /* French (Monaco) */
288 		{ "gl", 0x0456 }, /* Galician gl- */
289 		{ "ga-IE", 0x083c }, /* Irish Gaelic */
290 		{ "gd-GB", 0x100c }, /* Scottish Gaelic */
291 		{ "ka-GE", 0x0437 }, /* Georgian */
292 		{ "de-DE", 0x0407 }, /* German (Germany) */
293 		{ "de-CH", 0x0807 }, /* German (Switzerland) */
294 		{ "de-AT", 0x0c07 }, /* German (Austria) */
295 		{ "de-LU", 0x1007 }, /* German (Luxembourg) */
296 		{ "de-LI", 0x1407 }, /* German (Liechtenstein) */
297 		{ "el-GR", 0x0408 }, /* Greek */
298 		{ "gu", 0x0447 }, /* Gujarati gu- */
299 		{ "ha", 0x0468 }, /* Hausa */
300 		{ "he-IL", 0x040d }, /* Hebrew */
301 		{ "hi-IN", 0x0439 }, /* Hindi */
302 		{ "hu-HU", 0x040e }, /* Hungarian */
303 		{ "is-IS", 0x040f }, /* Icelandic */
304 		{ "id-ID", 0x0421 }, /* Indonesian */
305 		{ "iu", 0x045d }, /* Inkutitut */
306 		{ "it-IT", 0x0410 }, /* Italian (Italy) */
307 		{ "it-CH", 0x0810 }, /* Italian (Switzerland) */
308 		{ "ja-JP", 0x0411}, /* Japanese */
309 		{ "kn", 0x044b }, /* Kannada kn- */
310 		{ "ks", 0x0860 }, /* Kashmiri (India) ks- */
311 		{ "kk", 0x043f }, /* Kazakh kk- */
312 		{ "kok", 0x0457 }, /* Konkani kok- */
313 		{ "ko-KR", 0x0412 }, /* Korean */
314 		{ "ko", 0x0812 }, /* Korean (Johab) ko- */
315 		{ "kir", 0x0440 }, /* Kyrgyz */
316 		{ "la", 0x0476 }, /* Latin */
317 		{ "lo", 0x0454 }, /* Laothian */
318 		{ "lv-LV", 0x0426 }, /* Latvian */
319 		{ "lt-LT", 0x0427 }, /* Lithuanian */
320 		{ "lt-LT", 0x0827 }, /* Lithuanian (Classic) */
321 		{ "mk", 0x042f }, /* FYRO Macedonian */
322 		{ "my-MY", 0x043e }, /* Malaysian */
323 		{ "my-BN", 0x083e }, /* Malay Brunei Darussalam */
324 		{ "ml", 0x044c }, /* Malayalam ml- */
325 		{ "mr", 0x044e }, /* Marathi mr- */
326 		{ "mt", 0x043a }, /* Maltese */
327 		{ "mo", 0x0450 }, /* Mongolian */
328 		{ "ne-NP", 0x0461 }, /* Napali (Nepal) */
329 		{ "ne-IN", 0x0861 }, /* Nepali (India) */
330 		{ "nb-NO", 0x0414 }, /* Norwegian (Bokmai) */
331 		{ "nn-NO", 0x0814 }, /* Norwegian (Nynorsk) */
332 		{ "or", 0x0448 }, /* Oriya or- */
333 		{ "om", 0x0472 }, /* Oromo (Afan, Galla) */
334 		{ "pl-PL", 0x0415 }, /* Polish */
335 		{ "pt-BR", 0x0416 }, /* Portuguese (Brazil) */
336 		{ "pt-PT", 0x0816 }, /* Portuguese (Portugal) */
337 		{ "pa", 0x0446 }, /* Punjabi pa- */
338 		{ "ps", 0x0463 }, /* Pashto (Pushto) */
339 		{ "rm", 0x0417 }, /* Rhaeto-Romanic rm- */
340 		{ "ro-RO", 0x0418 }, /* Romanian */
341 		{ "ro-MD", 0x0818 }, /* Romanian (Moldova) */
342 		{ "ru-RU", 0x0419 }, /* Russian */
343 		{ "ru-MD", 0x0819 }, /* Russian (Moldova) */
344 		{ "se", 0x043b }, /* Sami (Lappish) se- */
345 		{ "sa", 0x044f }, /* Sanskrit sa- */
346 		{ "sr", 0x0c1a }, /* Serbian (Cyrillic) sr- */
347 		{ "sr", 0x081a }, /* Serbian (Latin) sr- */
348 		{ "sd", 0x0459 }, /* Sindhi sd- */
349 		{ "sk-SK", 0x041b }, /* Slovak */
350 		{ "sl-SI", 0x0424 }, /* Slovenian */
351 		{ "wen", 0x042e }, /* Sorbian wen- */
352 		{ "so", 0x0477 }, /* Somali */
353 		{ "es-ES", 0x040a }, /* Spanish (Spain, Traditional) */
354 		{ "es-MX", 0x080a }, /* Spanish (Mexico) */
355 		{ "es-ES", 0x0c0a }, /* Spanish (Modern) */
356 		{ "es-GT", 0x100a }, /* Spanish (Guatemala) */
357 		{ "es-CR", 0x140a }, /* Spanish (Costa Rica) */
358 		{ "es-PA", 0x180a }, /* Spanish (Panama) */
359 		{ "es-DO", 0x1c0a }, /* Spanish (Dominican Republic) */
360 		{ "es-VE", 0x200a }, /* Spanish (Venezuela) */
361 		{ "es-CO", 0x240a }, /* Spanish (Colombia) */
362 		{ "es-PE", 0x280a }, /* Spanish (Peru) */
363 		{ "es-AR", 0x2c0a }, /* Spanish (Argentina) */
364 		{ "es-EC", 0x300a }, /* Spanish (Ecuador) */
365 		{ "es-CL", 0x340a }, /* Spanish (Chile) */
366 		{ "es-UY", 0x380a }, /* Spanish (Uruguay) */
367 		{ "es-PY", 0x3c0a }, /* Spanish (Paraguay) */
368 		{ "es-BO", 0x400a }, /* Spanish (Bolivia) */
369 		{ "es-SV", 0x440a }, /* Spanish (El Salvador) */
370 		{ "es-HN", 0x480a }, /* Spanish (Honduras) */
371 		{ "es-NI", 0x4c0a }, /* Spanish (Nicaragua) */
372 		{ "es-PR", 0x500a }, /* Spanish (Puerto Rico) */
373 		{ "sx", 0x0430 }, /* Sutu */
374 		{ "sw", 0x0441 }, /* Swahili (Kiswahili/Kenya) */
375 		{ "sv-SE", 0x041d }, /* Swedish */
376 		{ "sv-FI", 0x081d }, /* Swedish (Finland) */
377 		{ "ta", 0x0449 }, /* Tamil ta- */
378 		{ "tt", 0x0444 }, /* Tatar (Tatarstan) tt- */
379 		{ "te", 0x044a }, /* Telugu te- */
380 		{ "th-TH", 0x041e }, /* Thai */
381 		{ "ts", 0x0431 }, /* Tsonga ts- */
382 		{ "tn", 0x0432 }, /* Tswana tn- */
383 		{ "tr-TR", 0x041f }, /* Turkish */
384 		{ "tl", 0x0464 }, /* Tagalog */
385 		{ "tg", 0x0428 }, /* Tajik */
386 		{ "bo", 0x0451 }, /* Tibetan */
387 		{ "ti", 0x0473 }, /* Tigrinya */
388 		{ "uk-UA", 0x0422 }, /* Ukrainian */
389 		{ "ur-PK", 0x0420 }, /* Urdu (Pakistan) */
390 		{ "ur-IN", 0x0820 }, /* Urdu (India) */
391 		{ "uz", 0x0443 }, /* Uzbek (Latin) uz- */
392 		{ "uz", 0x0843 }, /* Uzbek (Cyrillic) uz- */
393 		{ "ven", 0x0433 }, /* Venda ven- */
394 		{ "vi-VN", 0x042a }, /* Vietnamese */
395 		{ "cy-GB", 0x0452 }, /* Welsh */
396 		{ "xh", 0x0434 }, /* Xhosa xh */
397 		{ "yi", 0x043d }, /* Yiddish yi- */
398 		{ "yo", 0x046a }, /* Yoruba */
399 		{ "zu", 0x0435 }, /* Zulu zu- */
400 		{ "en-US", 0x0800 } /* Default */
401 	};
402 
403 #define NrMappings (sizeof(mLanguageIds)/sizeof(mLanguageIds[0]))
404 
wvLangToLIDConverter(const char * lang)405 U16 wvLangToLIDConverter ( const char * lang )
406 {
407 	unsigned int i = 0 ;
408 
409 	if (!lang)
410 		return 0x0400;   /* return -none- */
411 
412 	for ( i = 0 ; i < NrMappings ; i++ )
413 		if (!strcmp (lang, mLanguageIds[i].language_tag))
414 			return mLanguageIds[i].lid ;
415 
416 	return 0x0400 ;   /* return -none- */
417 }
418 
419 const char *
wvLIDToLangConverter(U16 lid)420 wvLIDToLangConverter (U16 lid)
421 {
422 	unsigned int i = 0 ;
423 
424 	if ( lid == 0 ) /* language netural */
425 		return "-none-" ;
426 
427 	for ( i = 0 ; i < NrMappings ; i++ )
428 		if ( mLanguageIds[i].lid == lid )
429 			return mLanguageIds[i].language_tag ;
430 
431 	return "-none-"; /* default */
432 }
433 
434 static int
wvIsCP1252(U16 lid)435 wvIsCP1252 (U16 lid)
436 {
437   switch (lid & 0xff)
438     {
439       case 0x03:		/*Catalan */
440       case 0x06:		/*Danish */
441       case 0x07:		/*German */
442       case 0x09:		/*English */
443       case 0x0a:		/*Spanish */
444       case 0x0b:		/*Finnish */
445       case 0x0c:		/*French */
446       case 0x0f:		/*Icelandic */
447       case 0x10:		/*Italian */
448       case 0x13:		/*Dutch */
449       case 0x14:		/*Norwegian */
450       case 0x16:		/*Portuguese */
451       case 0x17:		/*Rhaeto-Romanic */
452       case 0x1d:		/*Swedish */
453       case 0x21:		/*Bahasa Indonesian */
454       case 0x2d:		/*Basque */
455       case 0x36:		/*Afrikaans */
456       case 0x38:		/*Faeroese */
457       case 0x3E:		/*Malaysian / Malay */
458       case 0x41:		/*Swahili */
459 	return 1;
460 
461       case 0x1a:		/*Serbian, Croatian, (Bosnian?) */
462 	switch (lid)
463 	  {
464 	  case 0x041a:		/*Croatian */
465 	  case 0x081a:		/*Serbian (Latin) */
466 	    return 1;
467 	  }
468 
469     default:
470       return 0;
471     }
472 
473   return 0;
474 }
475 
476 const char *
wvLIDToCodePageConverter(U16 lid)477 wvLIDToCodePageConverter (U16 lid)
478 {
479     if (lid == 0x0FFF)	/*Macintosh Hack */
480 	  return ("MACINTOSH");
481     else if (wvIsCP1252 (lid))
482       return ("CP1252");
483 
484     switch (lid & 0xff)
485 	  {
486       case 0x01:		/*Arabic */
487 	  return ("CP1256");
488       case 0x02:		/*Bulgarian */
489 	  return ("CP1251");
490       case 0x04:		/*Chinese */
491       switch (lid)
492 		{
493 #if 0
494       	case 0x1404:		/*Chinese (Macau SAR) */
495 #endif
496       	case 0x0c04:		/*Chinese (Hong Kong SAR, PRC) */
497 	  	CPNAME_OR_FALLBACK ("CP950", "BIG5-HKSCS");
498       	case 0x0804:		/*Chinese (PRC) */
499 	  	CPNAME_OR_FALLBACK ("CP936", "GBK");
500 #if 0
501       	case 0x1004:		/*Chinese (Singapore) */
502 #endif
503       	case 0x0404:		/*Chinese (Taiwan) */
504 	  	CPNAME_OR_FALLBACK ("CP950", "BIG5");
505 		}
506       case 0x05:		/*Czech */
507 	  return ("CP1250");
508       case 0x08:		/*Greek */
509 	  return ("CP1253");
510       case 0x0d:		/*Hebrew */
511 	  return ("CP1255");
512       case 0x0e:		/*Hungarian */
513 	  return ("CP1250");
514       case 0x11:		/*Japanese */
515 	  return ("CP932");
516       case 0x12:		/*Korean */
517       switch (lid)
518 		{
519       	case 0x0812:		/*Korean (Johab) */
520 	  	return ("CP1361");
521       	case 0x0412:		/*Korean */
522 	  	return ("CP949");
523 		}
524       case 0x15:		/*Polish */
525 	  return ("CP1250");
526       case 0x18:		/*Romanian */
527 	  return ("CP1250");
528       case 0x19:		/*Russian */
529 	  return ("CP1251");
530       case 0x1a:		/*Serbian, Croatian, (Bosnian?) */
531       switch (lid)
532 		{
533       	case 0x0c1a:		/*Serbian (Cyrillic) */
534 	  	return ("CP1251");
535 		}
536       case 0x1b:		/*Slovak */
537 	  return ("CP1250");
538       case 0x1c:		/*Albanian */
539 	  return ("CP1251");
540       case 0x1e:		/*Thai */
541 	  return ("CP874");
542       case 0x1f:		/*Turkish */
543 	  return ("CP1254");
544       case 0x20:		/*Urdu. This is Unicode only. */
545 	  return ("CP0");
546       case 0x22:		/*Ukrainian */
547 	  return ("CP1251");
548       case 0x23:		/*Byelorussian / Belarusian */
549 	  return ("CP1251");
550       case 0x24:		/*Slovenian */
551 	  return ("CP1250");
552       case 0x25:		/*Estonian */
553 	  return ("CP1257");
554       case 0x26:		/*Latvian */
555 	  return ("CP1257");
556       case 0x27:		/*Lithuanian */
557 	  return ("CP1257");
558       case 0x29:		/*Farsi / Persian. This is Unicode only. */
559 	  return ("CP0");
560       case 0x2a:		/*Vietnamese */
561 	  return ("CP1258");
562       case 0x2b:		/*Windows 2000: Armenian. This is Unicode only. */
563 	  return ("CP0");
564       case 0x2c:		/*Azeri */
565       switch (lid)
566 		{
567       	case 0x082c:		/*Azeri (Cyrillic) */
568 	  	return ("CP1251");
569 #if 0
570       	case 0x042c:		/*Azeri (Latin) */
571 #endif
572 		}
573       case 0x2f:		/*Macedonian */
574 	  return ("CP1251");
575 #if 0
576       case 0x30:		/*Sutu */
577 #endif
578       case 0x37:		/*Windows 2000: Georgian. This is Unicode only. */
579 	  return ("CP0");
580       case 0x39:		/*Windows 2000: Hindi. This is Unicode only. */
581 	  return ("CP0");
582 #if 0
583       case 0x3f:		/*Kazakh */
584 #endif
585       case 0x43:		/*Uzbek */
586       switch (lid)
587 		{
588       	case 0x0843:		/*Uzbek (Cyrillic) */
589 	  	return ("CP1251");
590 #if 0
591       	case 0x0443:		/*Uzbek (Latin) */
592 #endif
593 		}
594 #if 0
595       case 0x44:		/*Tatar */
596 #endif
597       case 0x45:		/*Windows 2000: Bengali. This is Unicode only. */
598       case 0x46:		/*Windows 2000: Punjabi. This is Unicode only. */
599       case 0x47:		/*Windows 2000: Gujarati. This is Unicode only. */
600       case 0x48:		/*Windows 2000: Oriya. This is Unicode only. */
601       case 0x49:		/*Windows 2000: Tamil. This is Unicode only. */
602       case 0x4a:		/*Windows 2000: Telugu. This is Unicode only. */
603       case 0x4b:		/*Windows 2000: Kannada. This is Unicode only. */
604       case 0x4c:		/*Windows 2000: Malayalam. This is Unicode only. */
605       case 0x4d:		/*Windows 2000: Assamese. This is Unicode only. */
606       case 0x4e:		/*Windows 2000: Marathi. This is Unicode only. */
607       case 0x4f:		/*Windows 2000: Sanskrit. This is Unicode only. */
608 	  return ("CP0");
609       case 0x55:		/*Myanmar / Burmese. This is Unicode only. */
610 	  return ("CP0");
611       case 0x57:		/*Windows 2000: Konkani. This is Unicode only. */
612 	  return ("CP0");
613 #if 0
614       case 0x58:		/*Manipuri */
615       case 0x59:		/*Sindhi */
616       case 0x60:		/*Kashmiri (India) */
617 #endif
618       case 0x61:		/*Windows 2000: Nepali (India). This is Unicode only. */
619 	  return ("CP0");
620       };
621 
622 	/* TODO output a warning since this is a guess */
623     return ("CP1252");
624 }
625 
626 static U32
swap_iconv(U16 lid)627 swap_iconv (U16 lid)
628 {
629     GIConv handle;
630     char f_code[33];		/* From CCSID                           */
631     char t_code[33];		/* To CCSID                             */
632     const char *codepage;
633     size_t ibuflen, obuflen;
634 
635     U8 buffer[2];
636     U8 buffer2[2];
637 
638     gchar *ibuf, *obuf;
639 
640     /* do a bit of caching */
641     static U16 lastlid = -1;
642     static U32 ret = -1;
643 
644     /* shortcut */
645     if (ret != -1 && lastlid == lid)
646 	return ret;
647 
648     ibuf = buffer;
649     obuf = buffer2;
650 
651     lastlid = lid;
652     codepage = wvLIDToCodePageConverter (lid);
653 
654     memset (f_code, '\0', 33);
655     memset (t_code, '\0', 33);
656 
657     strcpy (f_code, codepage);
658     strcpy (t_code, "UCS-2");
659 
660     handle = g_iconv_open (t_code, f_code);
661     if (handle == (GIConv)-1)
662             return 0;
663 
664     buffer[0] = 0x20 & 0xff;
665     buffer[1] = 0;
666 
667     ibuflen = obuflen = 2;
668 
669     g_iconv (handle, &ibuf, &ibuflen, &obuf, &obuflen);
670 
671     g_iconv_close (handle);
672 
673     ret = *(U16 *) buffer2 != 0x20;
674     return ret;
675 }
676 
677 U16
wvHandleCodePage(U16 eachchar,U16 lid)678 wvHandleCodePage (U16 eachchar, U16 lid)
679 {
680     char f_code[33];		/* From CCSID                           */
681     char t_code[33];		/* To CCSID                             */
682     const char *codepage;
683     GIConv g_iconv_handle;	/* Conversion Descriptor returned       */
684     /* from g_iconv_open() function           */
685     size_t ibuflen;		/* Length of input buffer               */
686     size_t obuflen;		/* Length of output buffer              */
687 
688     gchar *ibuf;
689     gchar *obuf;			/* Buffer for converted characters      */
690     U8 *p;
691     U8 buffer[2];
692     U8 buffer2[2];
693 
694     U16 rtn;
695 
696     if (wvIsCP1252 (lid) && eachchar <= 0xFF)
697       {
698 	return cp1252_to_ucs2_table[eachchar];
699       }
700 
701     if (eachchar > 0xff)
702       {
703 	  buffer[0] = (char) (eachchar >> 8);
704 	  buffer[1] = (char) eachchar & 0xff;
705       }
706     else
707       {
708 	  buffer[0] = eachchar & 0xff;
709 	  buffer[1] = 0;
710       }
711 
712     ibuf = buffer;
713     obuf = buffer2;
714 
715     codepage = wvLIDToCodePageConverter (lid);
716 
717     /* All reserved positions of from code (last 12 characters) and to code   */
718     /* (last 19 characters) must be set to hexadecimal zeros.                 */
719 
720     memset (f_code, '\0', 33);
721     memset (t_code, '\0', 33);
722 
723     strcpy (f_code, codepage);
724     strcpy (t_code, "UCS-2");
725 
726     g_iconv_handle = g_iconv_open (t_code, f_code);
727     if (g_iconv_handle == (GIConv) - 1)
728       {
729 	  wvError (
730 		   ("g_iconv_open fail: %d, cannot convert %s to unicode\n",
731 		    errno, codepage));
732 	  return ('?');
733       }
734 
735     ibuflen = obuflen = 2;
736     p = obuf;
737 
738     g_iconv (g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
739 
740     /* We might have double byte char here. */
741 
742     if (swap_iconv (lid))
743       {
744 	  rtn = (U16) buffer2[0] << 8;
745 	  rtn |= (U16) buffer2[1];
746       }
747     else
748       {
749 	  rtn = *(U16 *) buffer2;
750       }
751 
752     g_iconv_close (g_iconv_handle);
753 
754     return (rtn);
755 }
756 
757 void
wvOutputFromUnicode(U16 eachchar,char * outputtype)758 wvOutputFromUnicode (U16 eachchar, char *outputtype)
759 {
760     static char cached_outputtype[33];	/* Last outputtype                  */
761     static GIConv g_iconv_handle = (GIConv)-1;	/* Cached iconv descriptor          */
762     static int need_swapping;
763     gchar *ibuf, *obuf;
764     size_t ibuflen, obuflen, len, count, i;
765     U8 buffer[2], buffer2[5];
766 
767     if ((wvConvertUnicodeToEntity != NULL)
768 	&& wvConvertUnicodeToEntity (eachchar))
769 	return;
770 
771     if ((g_iconv_handle == (GIConv)-1) || strcmp (cached_outputtype, outputtype) != 0)
772       {
773 	  if ((g_iconv_handle != (GIConv)-1))
774 	      g_iconv_close (g_iconv_handle);
775 
776 	  g_iconv_handle = g_iconv_open (outputtype, "UCS-2");
777 	  if (g_iconv_handle == (GIConv) - 1)
778 	    {
779 		wvError (
780 			 ("g_iconv_open fail: %d, cannot convert %s to %s\n",
781 			  errno, "UCS-2", outputtype));
782 		printf ("?");
783 		return;
784 	    }
785 
786 	  /* safe to cache the output type here */
787 	  strcpy (cached_outputtype, outputtype);
788 
789 	  /* Determining if unicode biteorder is swapped (glibc < 2.2) */
790 	  need_swapping = 1;
791 
792 	  buffer[0] = 0x20;
793 	  buffer[1] = 0;
794 	  ibuf = buffer;
795 	  obuf = buffer2;
796 	  ibuflen = 2;
797 	  obuflen = 5;
798 
799 	  count = g_iconv (g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
800 	  if (count >= 0)
801 	      need_swapping = buffer2[0] != 0x20;
802       }
803 
804     if (need_swapping)
805       {
806 	  buffer[0] = (eachchar >> 8) & 0x00ff;
807 	  buffer[1] = eachchar & 0x00ff;
808       }
809     else
810       {
811 	  buffer[0] = eachchar & 0x00ff;
812 	  buffer[1] = (eachchar >> 8) & 0x00ff;
813       }
814 
815     ibuf = buffer;
816     obuf = buffer2;
817 
818     ibuflen = 2;
819     len = obuflen = 5;
820 
821     count = g_iconv (g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
822     if (count == (size_t) - 1)
823       {
824 	  wvError (("iconv failed errno: %d, char: 0x%X, %s -> %s\n",
825 		    errno, eachchar, "UCS-2", outputtype));
826 
827 	  /* I'm torn here - do i just announce the failure, continue, or copy over to the other buffer? */
828 
829 	  /* errno is usually 84 (illegal byte sequence)
830 	     should i reverse the bytes and try again? */
831 	  printf ("%c", ibuf[1]);
832       }
833     else
834       {
835 	  len = len - obuflen;
836 
837 	  for (i = 0; i < len; i++)
838 	      printf ("%c", buffer2[i]);
839       }
840 }
841 
842 int
wvHandleElement(wvParseStruct * ps,wvTag tag,void * props,int dirty)843 wvHandleElement (wvParseStruct * ps, wvTag tag, void *props, int dirty)
844 {
845     if (ps->elehandler)
846 	return ((*(ps->elehandler)) (ps, tag, props, dirty));
847     wvError (("No element handler registered!!\n"));
848     return (0);
849 }
850 
851 int
wvHandleDocument(wvParseStruct * ps,wvTag tag)852 wvHandleDocument (wvParseStruct * ps, wvTag tag)
853 {
854     if (ps->dochandler)
855 	return ((*(ps->dochandler)) (ps, tag));
856     wvError (("No dochandler!!\n"));
857     return (0);
858 }
859 
860 void
wvSetCharHandler(wvParseStruct * ps,int (* proc)(wvParseStruct *,U16,U8,U16))861 wvSetCharHandler (wvParseStruct * ps,
862 		  int (*proc) (wvParseStruct *, U16, U8, U16))
863 {
864     ps->charhandler = proc;
865 }
866 
867 void
wvSetSpecialCharHandler(wvParseStruct * ps,int (* proc)(wvParseStruct *,U16,CHP *))868 wvSetSpecialCharHandler (wvParseStruct * ps,
869 			 int (*proc) (wvParseStruct *, U16, CHP *))
870 {
871     ps->scharhandler = proc;
872 }
873 
874 void
wvSetElementHandler(wvParseStruct * ps,int (* proc)(wvParseStruct *,wvTag,void *,int))875 wvSetElementHandler (wvParseStruct * ps,
876 		     int (*proc) (wvParseStruct *, wvTag, void *, int))
877 {
878     ps->elehandler = proc;
879 }
880 
881 void
wvSetDocumentHandler(wvParseStruct * ps,int (* proc)(wvParseStruct *,wvTag))882 wvSetDocumentHandler (wvParseStruct * ps,
883 		      int (*proc) (wvParseStruct *, wvTag))
884 {
885     ps->dochandler = proc;
886 }
887 
888 int
wvConvertUnicodeToLaTeX(U16 char16)889 wvConvertUnicodeToLaTeX (U16 char16)
890 {
891     /*
892        german and scandinavian characters, MV 1.7.2000
893        See man iso_8859_1
894 
895        This requires the inputencoding latin1 package,
896        see latin1.def. Chars in range 160...255 are just
897        put through as these are legal iso-8859-1 symbols.
898        (see above)
899 
900        Best way to do it until LaTeX is Unicode enabled
901        (Omega project).
902        -- MV 4.7.2000
903 
904        We use a separate if-statement here ... the 'case range'
905        construct is gcc specific :-(  -- MV 13/07/2000
906      */
907 
908     if ((char16 >= 0xa0) && (char16 <= 0xff))
909       {
910 	  switch (char16)
911 	    {
912 	    case 0xa0:
913 		printf ("\\ ");	/* hard space */
914 		return (1);
915 
916 		/* Fix up these as math characters: */
917 	    case 0xb1:
918 		printf ("$\\pm$");
919 		return (1);
920 	    case 0xb2:
921 		printf ("$\\mathtwosuperior$");
922 		return (1);
923 	    case 0xb3:
924 		printf ("$\\maththreesuperior$");
925 		return (1);
926 	    case 0xb5:
927 		printf ("$\\mu$");
928 		return (1);
929 	    case 0xb9:
930 		printf ("$\\mathonesuperior$");
931 		return (1);
932 	    case 0xd7:
933 		printf ("$\\times$");
934 		return (1);
935 	    }
936 	  printf ("%c", char16);
937 	  return (1);
938       }
939     switch (char16)
940       {
941       case 37:
942 	  printf ("\\%%");
943 	  return (1);
944       case 10:
945       case 11:
946 	  printf ("\\\\\n");
947 	  return (1);
948       case 31:			/* non-required hyphen */
949 	  printf ("\\-");
950 	  return (1);
951       case 30:			/* non-breaking hyphen */
952 	  printf ("-");
953 	  return (1);
954 
955 	  /* case 45: minus/hyphen, pass through */
956 
957       case 12:
958 	  printf("\\newpage\n");
959 	  return (1);
960       case 13:
961       case 14:
962       case 7:
963 	  return (1);
964       case 9:
965 	  printf ("\\hfill{}");	/* tab -- horrible cludge */
966 	  return (1);
967       case 0xf020:
968 	  printf (" ");		/* Mac specialty ? MV 10.10.2000 */
969 	  return (1);
970       case 0xf02c:
971 	  printf (",");		/* Mac */
972 	  return (1);
973       case 0xf028:
974 	  printf ("(");		/* Mac */
975 	  return (1);
976 
977       case 34:
978 	  printf ("\"");
979 	  return (1);
980       case 35:
981 	  printf ("\\#");	/* MV 14.8.2000 */
982 	  return (1);
983       case 36:
984 	  printf ("\\$");	/* MV 14.8.2000 */
985 	  return (1);
986       case 38:
987 	  printf ("\\&");	/* MV 1.7.2000 */
988 	  return (1);
989       case 92:
990 	  printf ("$\\backslash$");	/* MV 23.9.2000 */
991 	  return (1);
992       case 94:
993 	  printf ("\\^");	/* MV 13.9.2000 */
994 	  return (1);
995       case 95:
996 	  printf ("\\_");	/* MV 13.9.2000 */
997 	  return (1);
998       case 60:
999 	  printf ("<");
1000 	  return (1);
1001       case 0xf03e:		/* Mac */
1002       case 62:
1003 	  printf (">");
1004 	  return (1);
1005 
1006       case 0xF8E7:
1007 	  /* without this, things should work in theory, but not for me */
1008 	  printf ("_");
1009 	  return (1);
1010 
1011 	  /* Added some new Unicode characters. It's probably difficult
1012 	     to write these characters in AbiWord, though ... :(
1013 	     -- 2000-08-11 huftis@bigfoot.com */
1014 
1015       case 0x0100:
1016 	  printf ("\\=A");	/* A with macron */
1017 	  return (1);
1018       case 0x0101:
1019 	  printf ("\\=a");	/* a with macron */
1020 	  return (1);
1021       case 0x0102:
1022 	  printf ("\\u{A}");	/* A with breve */
1023 	  return (1);
1024       case 0x0103:
1025 	  printf ("\\u{a}");	/* a with breve */
1026 	  return (1);
1027       case 0x0104:
1028 	  printf ("\\k{A}");	/* A with ogonek */
1029 	  return (1);
1030       case 0x0105:
1031 	  printf ("\\k{a}");	/* a with ogonek */
1032 	  return (1);
1033       case 0x0106:
1034 	  printf ("\\'C");	/* C with acute */
1035 	  return (1);
1036       case 0x0107:
1037 	  printf ("\\'c");	/* c with acute */
1038 	  return (1);
1039       case 0x0108:
1040 	  printf ("\\^C");	/* C with circumflex */
1041 	  return (1);
1042       case 0x0109:
1043 	  printf ("\\^c");	/* c with circumflex */
1044 	  return (1);
1045       case 0x010A:
1046 	  printf ("\\.C");	/* C with dot above */
1047 	  return (1);
1048       case 0x010B:
1049 	  printf ("\\.c");	/* c with dot above */
1050 	  return (1);
1051       case 0x010C:
1052 	  printf ("\\v{C}");	/* C with caron */
1053 	  return (1);
1054       case 0x010D:
1055 	  printf ("\\v{c}");	/* c with caron */
1056 	  return (1);
1057       case 0x010E:
1058 	  printf ("\\v{D}");	/* D with caron */
1059 	  return (1);
1060       case 0x010F:
1061 	  printf ("\\v{d}");	/* d with caron */
1062 	  return (1);
1063       case 0x0110:
1064 	  printf ("\\DJ{}");	/* D with stroke */
1065 	  return (1);
1066       case 0x0111:
1067 	  printf ("\\dj{}");	/* d with stroke */
1068 	  return (1);
1069       case 0x0112:
1070 	  printf ("\\=E");	/* E with macron */
1071 	  return (1);
1072       case 0x0113:
1073 	  printf ("\\=e");	/* e with macron */
1074 	  return (1);
1075       case 0x0114:
1076 	  printf ("\\u{E}");	/* E with breve */
1077 	  return (1);
1078       case 0x0115:
1079 	  printf ("\\u{e}");	/* e with breve */
1080 	  return (1);
1081       case 0x0116:
1082 	  printf ("\\.E");	/* E with dot above */
1083 	  return (1);
1084       case 0x0117:
1085 	  printf ("\\.e");	/* e with dot above */
1086 	  return (1);
1087       case 0x0118:
1088 	  printf ("\\k{E}");	/* E with ogonek */
1089 	  return (1);
1090       case 0x0119:
1091 	  printf ("\\k{e}");	/* e with ogonek */
1092 	  return (1);
1093       case 0x011A:
1094 	  printf ("\\v{E}");	/* E with caron */
1095 	  return (1);
1096       case 0x011B:
1097 	  printf ("\\v{e}");	/* e with caron */
1098 	  return (1);
1099       case 0x011C:
1100 	  printf ("\\^G");	/* G with circumflex */
1101 	  return (1);
1102       case 0x011D:
1103 	  printf ("\\^g");	/* g with circumflex */
1104 	  return (1);
1105       case 0x011E:
1106 	  printf ("\\u{G}");	/* G with breve */
1107 	  return (1);
1108       case 0x011F:
1109 	  printf ("\\u{g}");	/* g with breve */
1110 	  return (1);
1111       case 0x0120:
1112 	  printf ("\\.G");	/* G with dot above */
1113 	  return (1);
1114       case 0x0121:
1115 	  printf ("\\u{g}");	/* g with dot above */
1116 	  return (1);
1117       case 0x0122:
1118 	  printf ("^H");	/* H with circumflex */
1119 	  return (1);
1120       case 0x0123:
1121 	  printf ("^h");	/* h with circumflex */
1122 	  return (1);
1123 
1124       case 0x0128:
1125 	  printf ("\\~I");	/* I with tilde */
1126 	  return (1);
1127       case 0x0129:
1128 	  printf ("\\~{\\i}");	/* i with tilde (dotless) */
1129 	  return (1);
1130       case 0x012A:
1131 	  printf ("\\=I");	/* I with macron */
1132 	  return (1);
1133       case 0x012B:
1134 	  printf ("\\={\\i}");	/* i with macron (dotless) */
1135 	  return (1);
1136       case 0x012C:
1137 	  printf ("\\u{I}");	/* I with breve */
1138 	  return (1);
1139       case 0x012D:
1140 	  printf ("\\u{\\i}");	/* i with breve */
1141 	  return (1);
1142 
1143       case 0x0130:
1144 	  printf ("\\.I");	/* I with dot above */
1145 	  return (1);
1146       case 0x0131:
1147 	  printf ("\\i{}");	/* dotless i */
1148 	  return (1);
1149       case 0x0132:
1150 	  printf ("IJ");	/* IJ ligature */
1151 	  return (1);
1152       case 0x0133:
1153 	  printf ("ij");	/* ij ligature  */
1154 	  return (1);
1155       case 0x0134:
1156 	  printf ("\\^J");	/* J with circumflex (dotless) */
1157 	  return (1);
1158       case 0x0135:
1159 	  printf ("\\^{\\j}");	/* j with circumflex (dotless) */
1160 	  return (1);
1161       case 0x0136:
1162 	  printf ("\\c{K}");	/* K with cedilla */
1163 	  return (1);
1164       case 0x0137:
1165 	  printf ("\\c{k}");	/* k with cedilla */
1166 	  return (1);
1167 
1168       case 0x0138:
1169 	  printf ("k");		/* NOTE: Not the correct character (kra), but similar */
1170 	  return (1);
1171 
1172       case 0x0139:
1173 	  printf ("\\'L");	/* L with acute */
1174 	  return (1);
1175       case 0x013A:
1176 	  printf ("\\'l");	/* l with acute  */
1177 	  return (1);
1178       case 0x013B:
1179 	  printf ("\\c{L}");	/* L with cedilla */
1180 	  return (1);
1181       case 0x013C:
1182 	  printf ("\\c{l}");	/* l with cedilla */
1183 	  return (1);
1184       case 0x013D:
1185 	  printf ("\\v{L}");	/* L with caron */
1186 	  return (1);
1187       case 0x013E:
1188 	  printf ("\\v{l}");	/* l with caron */
1189 	  return (1);
1190 
1191       case 0x0141:
1192 	  printf ("\\L{}");	/* L with stroke */
1193 	  return (1);
1194       case 0x0142:
1195 	  printf ("\\l{}");	/* l with stroke  */
1196 	  return (1);
1197       case 0x0143:
1198 	  printf ("\\'N");	/* N with acute */
1199 	  return (1);
1200       case 0x0144:
1201 	  printf ("\\'n");	/* n with acute */
1202 	  return (1);
1203       case 0x0145:
1204 	  printf ("\\c{N}");	/* N with cedilla */
1205 	  return (1);
1206       case 0x0146:
1207 	  printf ("\\c{n}");	/* n with cedilla */
1208 	  return (1);
1209       case 0x0147:
1210 	  printf ("\\v{N}");	/* N with caron */
1211 	  return (1);
1212       case 0x0148:
1213 	  printf ("\\v{n}");	/* n with caron */
1214 	  return (1);
1215       case 0x0149:
1216 	  printf ("'n");	/* n preceed with apostroph  */
1217 	  return (1);
1218       case 0x014A:
1219 	  printf ("\\NG{}");	/* ENG character */
1220 	  return (1);
1221       case 0x014B:
1222 	  printf ("\\ng{}");	/* eng character */
1223 	  return (1);
1224       case 0x014C:
1225 	  printf ("\\=O");	/* O with macron */
1226 	  return (1);
1227       case 0x014D:
1228 	  printf ("\\=o");	/* o with macron */
1229 	  return (1);
1230       case 0x014E:
1231 	  printf ("\\u{O}");	/* O with breve */
1232 	  return (1);
1233       case 0x014F:
1234 	  printf ("\\u{o}");	/* o with breve */
1235 	  return (1);
1236       case 0x0150:
1237 	  printf ("\\H{O}");	/* O with double acute */
1238 	  return (1);
1239       case 0x0151:
1240 	  printf ("\\H{o}");	/* o with double acute */
1241 	  return (1);
1242       case 0x0152:
1243 	  printf ("\\OE{}");	/* OE ligature */
1244 	  return (1);
1245       case 0x0153:
1246 	  printf ("\\oe{}");	/* oe ligature */
1247 	  return (1);
1248       case 0x0154:
1249 	  printf ("\\'R");	/* R with acute */
1250 	  return (1);
1251       case 0x0155:
1252 	  printf ("\\'r");	/* r with acute */
1253 	  return (1);
1254       case 0x0156:
1255 	  printf ("\\c{R}");	/* R with cedilla */
1256 	  return (1);
1257       case 0x0157:
1258 	  printf ("\\c{r}");	/* r with cedilla */
1259 	  return (1);
1260       case 0x0158:
1261 	  printf ("\\v{R}");	/* R with caron */
1262 	  return (1);
1263       case 0x0159:
1264 	  printf ("\\v{r}");	/* r with caron */
1265 	  return (1);
1266       case 0x015A:
1267 	  printf ("\\'S");	/* S with acute */
1268 	  return (1);
1269       case 0x015B:
1270 	  printf ("\\'s");	/* s with acute */
1271 	  return (1);
1272       case 0x015C:
1273 	  printf ("\\^S");	/* S with circumflex */
1274 	  return (1);
1275       case 0x015D:
1276 	  printf ("\\^s");	/* c with circumflex */
1277 	  return (1);
1278       case 0x015E:
1279 	  printf ("\\c{S}");	/* S with cedilla */
1280 	  return (1);
1281       case 0x015F:
1282 	  printf ("\\c{s}");	/* s with cedilla */
1283 	  return (1);
1284       case 0x0160:
1285 	  printf ("\\v{S}");	/* S with caron */
1286 	  return (1);
1287       case 0x0161:
1288 	  printf ("\\v{s}");	/* s with caron */
1289 	  return (1);
1290       case 0x0162:
1291 	  printf ("\\c{T}");	/* T with cedilla */
1292 	  return (1);
1293       case 0x0163:
1294 	  printf ("\\c{t}");	/* t with cedilla */
1295 	  return (1);
1296       case 0x0164:
1297 	  printf ("\\v{T}");	/* T with caron */
1298 	  return (1);
1299       case 0x0165:
1300 	  printf ("\\v{t}");	/* t with caron */
1301 	  return (1);
1302 
1303       case 0x0168:
1304 	  printf ("\\~U");	/* U with tilde */
1305 	  return (1);
1306       case 0x0169:
1307 	  printf ("\\~u");	/* u with tilde */
1308 	  return (1);
1309       case 0x016A:
1310 	  printf ("\\=U");	/* U with macron */
1311 	  return (1);
1312 
1313 	  /* Greek (thanks Petr Vanicek!): */
1314       case 0x0391:
1315 	  printf ("$A$");
1316 	  return (1);
1317       case 0x0392:
1318 	  printf ("$B$");
1319 	  return (1);
1320       case 0x0393:
1321 	  printf ("$\\Gamma$");
1322 	  return (1);
1323       case 0xf044:		/* Mac ? */
1324       case 0x2206:		/* Mac */
1325       case 0x0394:
1326 	  printf ("$\\Delta$");
1327 	  return (1);
1328       case 0x0395:
1329 	  printf ("$E$");
1330 	  return (1);
1331       case 0x0396:
1332 	  printf ("$Z$");
1333 	  return (1);
1334       case 0x0397:
1335 	  printf ("$H$");
1336 	  return (1);
1337       case 0x0398:
1338 	  printf ("$\\Theta$");
1339 	  return (1);
1340       case 0x0399:
1341 	  printf ("$I$");
1342 	  return (1);
1343       case 0x039a:
1344 	  printf ("$K$");
1345 	  return (1);
1346       case 0x039b:
1347 	  printf ("$\\Lambda$");
1348 	  return (1);
1349       case 0xf04d:		/* Mac? */
1350       case 0x039c:
1351 	  printf ("$M$");
1352 	  return (1);
1353       case 0x039d:
1354 	  printf ("$N$");
1355 	  return (1);
1356       case 0x039e:
1357 	  printf ("$\\Xi$");
1358 	  return (1);
1359       case 0x039f:
1360 	  printf ("$O$");	/* Omicron */
1361 	  return (1);
1362       case 0x03a0:
1363 	  printf ("$\\Pi$");
1364 	  return (1);
1365       case 0x03a1:
1366 	  printf ("$R$");
1367 	  return (1);
1368 
1369       case 0x03a3:
1370 	  printf ("$\\Sigma$");
1371 	  return (1);
1372       case 0x03a4:
1373 	  printf ("$T$");
1374 	  return (1);
1375       case 0x03a5:
1376 	  printf ("$Y$");
1377 	  return (1);
1378       case 0x03a6:
1379 	  printf ("$\\Phi$");
1380 	  return (1);
1381       case 0x03a7:
1382 	  printf ("$X$");	/* Chi */
1383 	  return (1);
1384       case 0x03a8:
1385 	  printf ("$\\Psi$");
1386 	  return (1);
1387       case 0x2126:		/* Mac */
1388       case 0x03a9:
1389 	  printf ("$\\Omega$");
1390 	  return (1);
1391 
1392 	  /* ...and lower case: */
1393 
1394       case 0x03b1:
1395 	  printf ("$\\alpha$");
1396 	  return (1);
1397       case 0x03b2:
1398 	  printf ("$\\beta$");
1399 	  return (1);
1400       case 0xf067:		/* Mac */
1401       case 0x03b3:
1402 	  printf ("$\\gamma$");
1403 	  return (1);
1404       case 0xf064:		/* Mac */
1405       case 0x03b4:
1406 	  printf ("$\\delta$");
1407 	  return (1);
1408       case 0x03b5:
1409 	  printf ("$\\epsilon$");
1410 	  return (1);
1411       case 0xf04e:		/* Mac? variant? */
1412       case 0xf07a:		/* Mac? */
1413       case 0x03b6:
1414 	  printf ("$\\zeta$");
1415 	  return (1);
1416       case 0x03b7:
1417 	  printf ("$\\eta$");
1418 	  return (1);
1419       case 0x03b8:
1420 	  printf ("$\\theta$");
1421 	  return (1);
1422       case 0x03b9:
1423 	  printf ("$\\iota$");
1424 	  return (1);
1425       case 0x03ba:
1426 	  printf ("$\\kappa$");
1427 	  return (1);
1428       case 0xf06c:		/* Mac? */
1429       case 0x03bb:
1430 	  printf ("$\\lambda$");
1431 	  return (1);
1432       case 0x03bc:
1433 	  printf ("$\\mu$");
1434 	  return (1);
1435       case 0x03bd:
1436 	  printf ("$\\nu$");
1437 	  return (1);
1438       case 0x03be:
1439 	  printf ("$\\xi$");
1440 	  return (1);
1441       case 0x03bf:
1442 	  printf ("$o$");	/* omicron */
1443 	  return (1);
1444       case 0x03c0:
1445 	  printf ("$\\pi$");
1446 	  return (1);
1447       case 0xf072:		/* Mac */
1448 	  printf ("$\\varrho$");
1449 	  return (1);
1450       case 0x03c1:
1451 	  printf ("$\\rho$");
1452 	  return (1);
1453       case 0xf073:		/* Mac */
1454       case 0x03c3:
1455 	  printf ("$\\sigma$");
1456 	  return (1);
1457       case 0x03c4:
1458 	  printf ("$\\tau$");
1459 	  return (1);
1460       case 0x03c5:
1461 	  printf ("$\\upsilon$");
1462 	  return (1);
1463       case 0x03c6:
1464 	  printf ("$\\phi$");
1465 	  return (1);
1466       case 0x03c7:
1467 	  printf ("$\\chi$");
1468 	  return (1);
1469       case 0x03c8:
1470 	  printf ("$\\psi$");
1471 	  return (1);
1472       case 0x03c9:
1473 	  printf ("$\\omega$");
1474 	  return (1);
1475       case 0xf06a:		/* Mac? */
1476       case 0x03d5:
1477 	  printf ("$\\varphi$");	/* ? */
1478 	  return (1);
1479 
1480 	  /* More math, typical inline: */
1481       case 0x2111:
1482 	  printf ("$\\Im$");
1483 	  return (1);
1484       case 0x2118:
1485 	  printf ("$\\wp$");	/* Weierstrass p */
1486 	  return (1);
1487       case 0x211c:
1488 	  printf ("$\\Re$");
1489 	  return (1);
1490       case 0x2135:
1491 	  printf ("$\\aleph$");
1492 	  return (1);
1493 
1494       case 0x2190:
1495 	  printf ("$\\leftarrow$");
1496 	  return (1);
1497       case 0x2191:
1498 	  printf ("$\\uparrow$");
1499 	  return (1);
1500       case 0xf0ae:		/* Mac */
1501       case 0x2192:
1502 	  printf ("$\\rightarrow$");
1503 	  return (1);
1504       case 0x2193:
1505 	  printf ("$\\downarrow$");
1506 	  return (1);
1507       case 0x21d0:
1508 	  printf ("$\\Leftarrow$");
1509 	  return (1);
1510       case 0x21d1:
1511 	  printf ("$\\Uparrow$");
1512 	  return (1);
1513       case 0x21d2:
1514 	  printf ("$\\Rightarrow$");
1515 	  return (1);
1516       case 0x21d3:
1517 	  printf ("$\\Downarrow$");
1518 	  return (1);
1519       case 0x21d4:
1520 	  printf ("$\\Leftrightarrow$");
1521 	  return (1);
1522 
1523       case 0x2200:
1524 	  printf ("$\\forall$");
1525 	  return (1);
1526       case 0xf0b6:		/* Mac */
1527       case 0x2202:
1528 	  printf ("$\\partial$");
1529 	  return (1);
1530       case 0x2203:
1531 	  printf ("$\\exists$");
1532 	  return (1);
1533       case 0x2205:
1534 	  printf ("$\\emptyset$");
1535 	  return (1);
1536       case 0x2207:
1537 	  printf ("$\\nabla$");
1538 	  return (1);
1539       case 0x2208:
1540 	  printf ("$\\in$");	/* element of */
1541 	  return (1);
1542       case 0x2209:
1543 	  printf ("$\\notin$");	/* not an element of */
1544 	  return (1);
1545       case 0x220b:
1546 	  printf ("$\\ni$");	/* contains as member */
1547 	  return (1);
1548       case 0x221a:
1549 	  printf ("$\\surd$");	/* sq root */
1550 	  return (1);
1551       case 0x2212:
1552 	  printf ("$-$");	/* minus */
1553 	  return (1);
1554       case 0x221d:
1555 	  printf ("$\\propto$");
1556 	  return (1);
1557       case 0x221e:
1558 	  printf ("$\\infty$");
1559 	  return (1);
1560       case 0x2220:
1561 	  printf ("$\\angle$");
1562 	  return (1);
1563       case 0x2227:
1564 	  printf ("$\\land$");	/* logical and */
1565 	  return (1);
1566       case 0x2228:
1567 	  printf ("$\\lor$");	/* logical or */
1568 	  return (1);
1569       case 0x2229:
1570 	  printf ("$\\cap$");	/* intersection */
1571 	  return (1);
1572       case 0x222a:
1573 	  printf ("$\\cup$");	/* union */
1574 	  return (1);
1575       case 0x223c:
1576 	  printf ("$\\sim$");	/* similar to  */
1577 	  return (1);
1578       case 0x2248:
1579 	  printf ("$\\approx$");
1580 	  return (1);
1581       case 0x2261:
1582 	  printf ("$\\equiv$");
1583 	  return (1);
1584       case 0x2260:
1585 	  printf ("$\\neq$");
1586 	  return (1);
1587       case 0x2264:
1588 	  printf ("$\\leq$");
1589 	  return (1);
1590       case 0xf0b3:		/* Mac? */
1591       case 0x2265:
1592 	  printf ("$\\geq$");
1593 	  return (1);
1594       case 0x2282:
1595 	  printf ("$\\subset$");
1596 	  return (1);
1597       case 0x2283:
1598 	  printf ("$\\supset$");
1599 	  return (1);
1600       case 0x2284:
1601 	  printf ("$\\notsubset$");
1602 	  return (1);
1603       case 0x2286:
1604 	  printf ("$\\subseteq$");
1605 	  return (1);
1606       case 0x2287:
1607 	  printf ("$\\supseteq$");
1608 	  return (1);
1609       case 0x2295:
1610 	  printf ("$\\oplus$");	/* circled plus */
1611 	  return (1);
1612       case 0x2297:
1613 	  printf ("$\\otimes$");
1614 	  return (1);
1615       case 0x22a5:
1616 	  printf ("$\\perp$");	/* perpendicular */
1617 	  return (1);
1618 
1619 
1620 
1621 
1622       case 0x2660:
1623 	  printf ("$\\spadesuit$");
1624 	  return (1);
1625       case 0x2663:
1626 	  printf ("$\\clubsuit$");
1627 	  return (1);
1628       case 0x2665:
1629 	  printf ("$\\heartsuit$");
1630 	  return (1);
1631       case 0x2666:
1632 	  printf ("$\\diamondsuit$");
1633 	  return (1);
1634 
1635 
1636       case 0x01C7:
1637 	  printf ("LJ");	/* the LJ letter */
1638 	  return (1);
1639       case 0x01C8:
1640 	  printf ("Lj");	/* the Lj letter */
1641 	  return (1);
1642       case 0x01C9:
1643 	  printf ("lj");	/* the lj letter */
1644 	  return (1);
1645       case 0x01CA:
1646 	  printf ("NJ");	/* the NJ letter */
1647 	  return (1);
1648       case 0x01CB:
1649 	  printf ("Nj");	/* the Nj letter */
1650 	  return (1);
1651       case 0x01CC:
1652 	  printf ("nj");	/* the nj letter */
1653 	  return (1);
1654       case 0x01CD:
1655 	  printf ("\\v{A}");	/* A with caron */
1656 	  return (1);
1657       case 0x01CE:
1658 	  printf ("\\v{a}");	/* a with caron */
1659 	  return (1);
1660       case 0x01CF:
1661 	  printf ("\\v{I}");	/* I with caron */
1662 	  return (1);
1663       case 0x01D0:
1664 	  printf ("\\v{\\i}");	/* i with caron (dotless) */
1665 	  return (1);
1666       case 0x01D1:
1667 	  printf ("\\v{O}");	/* O with caron */
1668 	  return (1);
1669       case 0x01D2:
1670 	  printf ("\\v{o}");	/* o with caron */
1671 	  return (1);
1672       case 0x01D3:
1673 	  printf ("\\v{U}");	/* U with caron */
1674 	  return (1);
1675       case 0x01D4:
1676 	  printf ("\\v{u}");	/* u with caron */
1677 	  return (1);
1678 
1679       case 0x01E6:
1680 	  printf ("\\v{G}");	/* G with caron */
1681 	  return (1);
1682       case 0x01E7:
1683 	  printf ("\\v{g}");	/* g with caron */
1684 	  return (1);
1685       case 0x01E8:
1686 	  printf ("\\v{K}");	/* K with caron */
1687 	  return (1);
1688       case 0x01E9:
1689 	  printf ("\\v{k}");	/* k with caron */
1690 	  return (1);
1691 
1692 
1693       case 0x01F0:
1694 	  printf ("\\v{\\j}");	/* j with caron (dotless) */
1695 	  return (1);
1696       case 0x01F1:
1697 	  printf ("DZ");	/* the DZ letter */
1698 	  return (1);
1699       case 0x01F2:
1700 	  printf ("Dz");	/* the Dz letter */
1701 	  return (1);
1702       case 0x01F3:
1703 	  printf ("dz");	/* the dz letter */
1704 	  return (1);
1705       case 0x01F4:
1706 	  printf ("\\'G");	/* G with acute */
1707 	  return (1);
1708       case 0x01F5:
1709 	  printf ("\\'g");	/* g with acute */
1710 	  return (1);
1711 
1712       case 0x01FA:
1713 	  printf ("\\'{\\AA}");	/* � with acute */
1714 	  return (1);
1715       case 0x01FB:
1716 	  printf ("\\'{\\aa}");	/* � with acute */
1717 	  return (1);
1718       case 0x01FC:
1719 	  printf ("\\'{\\AE}");	/* � with acute */
1720 	  return (1);
1721       case 0x01FD:
1722 	  printf ("\\'{\\ae}");	/* � with acute */
1723 	  return (1);
1724       case 0x01FE:
1725 	  printf ("\\'{\\O}");	/* � with acute */
1726 	  return (1);
1727       case 0x01FF:
1728 	  printf ("\\'{\\o}");	/* � with acute */
1729 	  return (1);
1730 
1731       case 0x2010:
1732 	  printf ("-");		/* hyphen */
1733 	  return (1);
1734       case 0x2011:
1735 	  printf ("-");		/* non-breaking hyphen (is there a way to get this in LaTeX?) */
1736 	  return (1);
1737       case 0x2012:
1738 	  printf ("--");	/* figure dash (similar to en-dash) */
1739 	  return (1);
1740       case 0x2013:
1741 	  /*
1742 	     soft-hyphen? Or en-dash? I find that making
1743 	     this a soft-hyphen works very well, but makes
1744 	     the occasional "hard" word-connection hyphen
1745 	     (like the "-" in roller-coaster) disappear.
1746 	     (Are these actually en-dashes? Dunno.)
1747 	     How does MS Word distinguish between the 0x2013's
1748 	     that signify soft hyphens and those that signify
1749 	     word-connection hyphens? wvware should be able
1750 	     to as well. -- MV 8.7.2000
1751 
1752 	     U+2013 is the en-dash character and not a soft
1753 	     hyphen. Soft hyphen is U+00AD. Changing to
1754 	     "--". -- 2000-08-11 huftis@bigfoot.com
1755 	   */
1756 	  printf ("--");
1757 	  return (1);
1758 
1759       case 0x016B:
1760 	  printf ("\\=u");	/* u with macron */
1761 	  return (1);
1762       case 0x016C:
1763 	  printf ("\\u{U}");	/* U with breve */
1764 	  return (1);
1765       case 0x016D:
1766 	  printf ("\\u{u}");	/* u with breve */
1767 	  return (1);
1768       case 0x016E:
1769 	  printf ("\\r{U}");	/* U with ring above */
1770 	  return (1);
1771       case 0x016F:
1772 	  printf ("\\r{u}");	/* u with ring above */
1773 	  return (1);
1774       case 0x0170:
1775 	  printf ("\\H{U}");	/* U with double acute */
1776 	  return (1);
1777       case 0x0171:
1778 	  printf ("\\H{u}");	/* u with double acute */
1779 	  return (1);
1780 
1781       case 0x0174:
1782 	  printf ("\\^W");	/* W with circumflex */
1783 	  return (1);
1784       case 0x0175:
1785 	  printf ("\\^w");	/* w with circumflex */
1786 	  return (1);
1787       case 0x0176:
1788 	  printf ("\\^Y");	/* Y with circumflex */
1789 	  return (1);
1790       case 0x0177:
1791 	  printf ("\\^y");	/* y with circumflex */
1792 	  return (1);
1793       case 0x0178:
1794 	  printf ("\\\"Y");	/* Y with diaeresis */
1795 	  return (1);
1796       case 0x0179:
1797 	  printf ("\\'Z");	/* Z with acute */
1798 	  return (1);
1799       case 0x017A:
1800 	  printf ("\\'z");	/* z with acute */
1801 	  return (1);
1802       case 0x017B:
1803 	  printf ("\\.Z");	/* Z with dot above */
1804 	  return (1);
1805       case 0x017C:
1806 	  printf ("\\.z");	/* z with dot above */
1807 	  return (1);
1808       case 0x017D:
1809 	  printf ("\\v{Z}");	/* Z with caron */
1810 	  return (1);
1811       case 0x017E:
1812 	  printf ("\\v{z}");	/* z with caron */
1813 	  return (1);
1814 
1815 	  /* Windows specials (MV 4.7.2000). More could be added.
1816 	     See http://www.hut.fi/u/jkorpela/www/windows-chars.html
1817 	   */
1818 
1819       case 0x2014:
1820 	  printf ("---");	/* em-dash */
1821 	  return (1);
1822       case 0x2018:
1823 	  printf ("`");		/* left single quote, Win */
1824 	  return (1);
1825       case 0x2019:
1826 	  printf ("'");		/* Right single quote, Win */
1827 	  return (1);
1828       case 0x201A:
1829 	  printf ("\\quotesinglbase{}");	/* single low 99 quotation mark */
1830 	  return (1);
1831       case 0x201C:
1832 	  printf ("``");	/* inverted double quotation mark */
1833 	  return (1);
1834       case 0x201D:
1835 	  printf ("''");	/* double q.m. */
1836 	  return (1);
1837       case 0x201E:
1838 	  printf ("\\quotedblbase{}");	/* double low 99 quotation mark */
1839 	  return (1);
1840       case 0x2020:
1841 	  printf ("\\dag{}");	/* dagger */
1842 	  return (1);
1843       case 0x2021:
1844 	  printf ("\\ddag{}");	/* double dagger */
1845 	  return (1);
1846       case 0x25cf:		/* FilledCircle */
1847       case 0x2022:
1848 	  printf ("$\\bullet$");	/* bullet */
1849 	  return (1);
1850       case 0x2023:
1851 	  printf ("$\\bullet$");	/* NOTE: Not a real triangular bullet */
1852 	  return (1);
1853 
1854       case 0x2024:
1855 	  printf (".");		/* One dot leader (for use in TOCs) */
1856 	  return (1);
1857       case 0x2025:
1858 	  printf ("..");	/* Two dot leader (for use in TOCs) */
1859 	  return (1);
1860       case 0x2026:
1861 	  printf ("\\ldots");	/* ellipsis */
1862 	  return (1);
1863 
1864       case 0x2039:
1865 	  printf ("\\guilsinglleft{}");	/* single left angle quotation mark */
1866 	  return (1);
1867       case 0x203A:
1868 	  printf ("\\guilsinglright{}");	/* single right angle quotation mark */
1869 	  return (1);
1870 
1871       case 0x203C:
1872 	  printf ("!!");	/* double exclamation mark */
1873 	  return (1);
1874 
1875       case 0x2215:
1876 	  printf ("$/$");	/* Division slash */
1877 	  return (1);
1878 
1879       case 0x2030:
1880 	  printf ("o/oo");
1881 	  return (1);
1882 
1883       case 0x20ac:
1884 	  printf ("\\euro");
1885 	  /* No known implementation ;-)
1886 
1887 	     Shouldn't we use the package 'eurofont'?
1888 	     -- 2000-08-15 huftis@bigfoot.com
1889 	   */
1890 	  return (1);
1891 
1892       case 0x2160:
1893 	  printf ("I");		/* Roman numeral I */
1894 	  return (1);
1895       case 0x2161:
1896 	  printf ("II");	/* Roman numeral II */
1897 	  return (1);
1898       case 0x2162:
1899 	  printf ("III");	/* Roman numeral III */
1900 	  return (1);
1901       case 0x2163:
1902 	  printf ("IV");	/* Roman numeral IV */
1903 	  return (1);
1904       case 0x2164:
1905 	  printf ("V");		/* Roman numeral V */
1906 	  return (1);
1907       case 0x2165:
1908 	  printf ("VI");	/* Roman numeral VI */
1909 	  return (1);
1910       case 0x2166:
1911 	  printf ("VII");	/* Roman numeral VII */
1912 	  return (1);
1913       case 0x2167:
1914 	  printf ("VIII");	/* Roman numeral VIII */
1915 	  return (1);
1916       case 0x2168:
1917 	  printf ("IX");	/* Roman numeral IX */
1918 	  return (1);
1919       case 0x2169:
1920 	  printf ("X");		/* Roman numeral X */
1921 	  return (1);
1922       case 0x216A:
1923 	  printf ("XI");	/* Roman numeral XI */
1924 	  return (1);
1925       case 0x216B:
1926 	  printf ("XII");	/* Roman numeral XII */
1927 	  return (1);
1928       case 0x216C:
1929 	  printf ("L");		/* Roman numeral L */
1930 	  return (1);
1931       case 0x216D:
1932 	  printf ("C");		/* Roman numeral C */
1933 	  return (1);
1934       case 0x216E:
1935 	  printf ("D");		/* Roman numeral D */
1936 	  return (1);
1937       case 0x216F:
1938 	  printf ("M");		/* Roman numeral M */
1939 	  return (1);
1940       case 0x2170:
1941 	  printf ("i");		/* Roman numeral i */
1942 	  return (1);
1943       case 0x2171:
1944 	  printf ("ii");	/* Roman numeral ii */
1945 	  return (1);
1946       case 0x2172:
1947 	  printf ("iii");	/* Roman numeral iii */
1948 	  return (1);
1949       case 0x2173:
1950 	  printf ("iv");	/* Roman numeral iv */
1951 	  return (1);
1952       case 0x2174:
1953 	  printf ("v");		/* Roman numeral v */
1954 	  return (1);
1955       case 0x2175:
1956 	  printf ("vi");	/* Roman numeral vi */
1957 	  return (1);
1958       case 0x2176:
1959 	  printf ("vii");	/* Roman numeral vii */
1960 	  return (1);
1961       case 0x2177:
1962 	  printf ("viii");	/* Roman numeral viii */
1963 	  return (1);
1964       case 0x2178:
1965 	  printf ("ix");	/* Roman numeral ix */
1966 	  return (1);
1967       case 0x2179:
1968 	  printf ("x");		/* Roman numeral x */
1969 	  return (1);
1970       case 0x217A:
1971 	  printf ("xi");	/* Roman numeral xi */
1972 	  return (1);
1973       case 0x217B:
1974 	  printf ("xiii");	/* Roman numeral xii */
1975 	  return (1);
1976       case 0x217C:
1977 	  printf ("l");		/* Roman numeral l */
1978 	  return (1);
1979       case 0x217D:
1980 	  printf ("c");		/* Roman numeral c */
1981 	  return (1);
1982       case 0x217E:
1983 	  printf ("d");		/* Roman numeral d */
1984 	  return (1);
1985       case 0x217F:
1986 	  printf ("m");		/* Roman numeral m */
1987 	  return (1);
1988 
1989       }
1990     /* Debugging aid: */
1991     if (char16 >= 0x80)
1992       {
1993 	printf ("[%x]", char16);
1994 	return (1);
1995       }
1996     return (0);
1997 }
1998 
1999 int
wvConvertUnicodeToHtml(U16 char16)2000 wvConvertUnicodeToHtml (U16 char16)
2001 {
2002     switch (char16)
2003       {
2004       case 11:
2005 	  printf ("<br>");
2006 	  return (1);
2007       case 31:        /* non-required hyphen */
2008 	  printf("&shy;"); /*vladimir@lukianov.name HTML 4.01 spec*/
2009 	  return (1);
2010       case 30:
2011       case 45:
2012       case 0x2013:
2013 	  printf ("-");		/* en-dash */
2014 	  return (1);
2015       case 12:
2016       case 13:
2017       case 14:
2018       case 7:
2019 	  return (1);
2020       case 34:
2021 	  printf ("&quot;");
2022 	  return (1);
2023       case 38:
2024 	  printf ("&amp;");
2025 	  return (1);
2026       case 60:
2027 	  printf ("&lt;");
2028 	  return (1);
2029       case 62:
2030 	  printf ("&gt;");
2031 	  return (1);
2032 	  /*
2033 	     german characters, im assured that this is the right way to handle them
2034 	     by Markus Schulte <markus@dom.de>
2035 
2036 	     As the output encoding for HTML was chosen as UTF-8,
2037 	     we don't need &Auml; etc. etc. I removed all but sz
2038 	     -- MV 6.4.2000
2039 	   */
2040 
2041       case 0xdf:
2042 	  printf ("&szlig;");
2043 	  return (1);
2044 	  /* end german characters */
2045       case 0x2026:
2046 #if 0
2047 /*
2048 this just looks awful in netscape 4.5, so im going to do a very foolish
2049 thing and just put ... instead of this
2050 */
2051 	  printf ("&#133;");
2052 /*is there a proper html name for ... &ellipse;? Yes, &hellip; -- MV */
2053 #endif
2054 	  printf ("&hellip;");
2055 	  return (1);
2056       case 0x2019:
2057 	  printf ("'");
2058 	  return (1);
2059       case 0x2215:
2060 	  printf ("/");
2061 	  return (1);
2062       case 0xF8E7:		/* without this, things should work in theory, but not for me */
2063 	  printf ("_");
2064 	  return (1);
2065       case 0x2018:
2066 	  printf ("`");
2067 	  return (1);
2068 
2069 	  /* Windows specials (MV): */
2070       case 0x0160:
2071 	  printf ("&Scaron;");
2072 	  return (1);
2073       case 0x0161:
2074 	  printf ("&scaron;");
2075 	  return (1);
2076       case 0x2014:
2077 	  printf ("&mdash;");
2078 	  return (1);
2079       case 0x201c:
2080 	  printf ("&ldquo;");	/* inverted double quotation mark */
2081 	  return (1);
2082       case 0x201d:
2083 	  printf ("&rdquo;");	/* double q.m. */
2084 	  return (1);
2085       case 0x201e:
2086 	  printf ("&bdquo;");	/* below double q.m. */
2087 	  return (1);
2088       case 0x2020:
2089 	  printf ("&dagger;");
2090 	  return (1);
2091       case 0x2021:
2092 	  printf ("&Dagger;");
2093 	  return (1);
2094       case 0x2022:
2095 	  printf ("&bull;");
2096 	  return (1);
2097       case 0x0152:
2098 	  printf ("&OElig;");
2099 	  return (1);
2100       case 0x0153:
2101 	  printf ("&oelig;");
2102 	  return (1);
2103       case 0x0178:
2104 	  printf ("&Yuml;");
2105 	  return (1);
2106       case 0x2030:
2107 	  printf ("&permil;");
2108 	  return (1);
2109       case 0x20ac:
2110 	  printf ("&euro;");
2111 	  return (1);
2112 
2113 	  /* Mac specials (MV): */
2114 
2115       case 0xf020:
2116 	  printf (" ");
2117 	  return (1);
2118       case 0xf02c:
2119 	  printf (",");
2120 	  return (1);
2121       case 0xf028:
2122 	  printf ("(");
2123 	  return (1);
2124 
2125       case 0xf03e:
2126 	  printf ("&gt;");
2127 	  return (1);
2128       case 0xf067:
2129 	  printf ("&gamma;");
2130 	  return (1);
2131       case 0xf064:
2132 	  printf ("&delta;");
2133 	  return (1);
2134       case 0xf072:
2135 	  printf ("&rho;");
2136 	  return (1);
2137       case 0xf073:
2138 	  printf ("&sigma;");
2139 	  return (1);
2140       case 0xf0ae:
2141 	  printf ("&rarr;");	/* right arrow */
2142 	  return (1);
2143       case 0xf0b6:
2144 	  printf ("&part;");	/* partial deriv. */
2145 	  return (1);
2146       case 0xf0b3:
2147 	  printf ("&ge;");
2148 	  return (1);
2149 
2150       }
2151     /* Debugging aid: */
2152     /* if (char16 >= 0x100) printf("[%x]", char16); */
2153     return (0);
2154 }
2155 
2156 
2157 
2158 int
wvConvertUnicodeToXml(U16 char16)2159 wvConvertUnicodeToXml (U16 char16)
2160 {
2161     switch (char16)
2162       {
2163       case 11:
2164 	  printf ("<br />");
2165 	  return (1);
2166 
2167       case 30:
2168       case 31:
2169       case 12:
2170       case 13:
2171       case 14:
2172       case 7:
2173 	  return (1);
2174 
2175       /* Much simpler here, because XML supports only a few entities */
2176       case 34:
2177 	  printf ("&quot;");
2178 	  return (1);
2179       case 38:
2180 	  printf ("&amp;");
2181 	  return (1);
2182       case 39:
2183 	  printf ("&apos;");
2184 	  return (1);
2185       case 60:
2186 	  printf ("&lt;");
2187 	  return (1);
2188       case 62:
2189 	  printf ("&gt;");
2190 	  return (1);
2191       }
2192 
2193     return (0);
2194 }
2195 
str_copy(char * d,size_t n,char * s)2196 char *str_copy(char *d, size_t n, char *s)
2197 {
2198     strncpy(d, s, n);
2199     d[n-1] = 0;
2200     return d;
2201 }
2202 
str_append(char * d,size_t n,char * s)2203 char *str_append(char *d, size_t n, char *s)
2204 {
2205     int max = n - strlen(d) - 1;
2206     strncat(d, s, max);
2207     d[n-1] = 0;
2208     return d;
2209 }
2210 
2211 #define BUF_COPY(d,s) str_copy(d,sizeof(d),s)
2212 
2213 char *
wvConvertStylename(char * stylename,char * outputtype)2214 wvConvertStylename(char *stylename, char *outputtype)
2215 {
2216     static char cached_outputtype[36];
2217     static GIConv g_iconv_handle = (GIConv)-1;
2218     /**FIXME: 100 is just the size of stylename[] from wv.h**/
2219     static char buffer[100];
2220     char *ibuf, *obuf;
2221     size_t ibuflen, obuflen, len;
2222 
2223     /* Destroy */
2224     if(!outputtype)
2225     {
2226 	if ((g_iconv_handle != (GIConv)-1))
2227 	    g_iconv_close(g_iconv_handle);
2228 	return NULL;
2229     }
2230 
2231     /* Initialize */
2232     if(!g_iconv_handle || strcmp(cached_outputtype, outputtype))
2233     {
2234 	if ((g_iconv_handle != (GIConv)-1))
2235 	    g_iconv_close(g_iconv_handle);
2236 
2237 	/**FIXME: don�t know if ISO-8859-1 is really the correct
2238 	 **charset for style names with eg umlauts.             **/
2239 	g_iconv_handle = g_iconv_open(outputtype, "ISO-8859-1");
2240 	if(g_iconv_handle == (GIConv)-1)
2241 	{
2242 	    wvError(("g_iconv_open fail: %d, cannot convert %s to %s\n",
2243 		     errno, "ISO-8859-1", outputtype));
2244 	    return stylename;
2245 	}
2246 
2247 	BUF_COPY(cached_outputtype, outputtype);
2248     }
2249 
2250     /* Convert */
2251     ibuf    = stylename;
2252     ibuflen = strlen(stylename);
2253     obuf    = buffer;
2254     obuflen = sizeof(buffer) - 1;
2255     len     = g_iconv (g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
2256     *obuf   = 0;
2257     if(len == -1)
2258     {
2259 	wvError(("wvConfig.c: can�t iconv()\n"));
2260 	return stylename;
2261     }
2262 
2263     return buffer;
2264 }
2265