1 /* wvWare
2 * Copyright (C) Caolan McNamara, Dom Lachowicz, and others
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
17 * 02111-1307, USA.
18 */
19
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <errno.h>
28 #include "wv.h"
29 #include <glib.h>
30
31 int (*wvConvertUnicodeToEntity) (U16 char16) = NULL;
32
33 /* enough word docs use the cp1252 encoding and enough iconv
34 * implementations don't include it that it's worth special-
35 * casing */
36 static const U16 cp1252_to_ucs2_table[] = {
37 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007
38 , 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F
39 , 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017
40 , 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F
41 , 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027
42 , 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F
43 , 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037
44 , 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F
45 , 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047
46 , 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F
47 , 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057
48 , 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F
49 , 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067
50 , 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F
51 , 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077
52 , 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F
53 , 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021
54 , 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000
55 , 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014
56 , 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178
57 , 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7
58 , 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF
59 , 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7
60 , 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF
61 , 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7
62 , 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF
63 , 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7
64 , 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF
65 , 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7
66 , 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF
67 , 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7
68 , 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
69 };
70
71 U16
wvnLocaleToLIDConverter(U8 nLocale)72 wvnLocaleToLIDConverter (U8 nLocale)
73 {
74 switch (nLocale)
75 {
76 #if 0
77 /* case 0: */ /* ANSI_CHARSET */
78 /* case 1: */ /* DEFAULT_CHARSET */
79 /* case 2: */ /* SYMBOL_CHARSET */
80 #endif
81 case 77: /* MAC_CHARSET */
82 return (0xFFF); /* This number is a hack */
83 case 128: /* SHIFTJIS_CHARSET */
84 return (0x411); /* Japanese */
85 case 129: /* HANGEUL_CHARSET */
86 return (0x412); /* Korean */
87 case 130: /* JOHAB_CHARSET */
88 return (0x812); /* Korean (Johab) */
89 case 134: /* GB2312_CHARSET - Chinese Simplified */
90 return (0x804); /* China PRC - And others!! */
91 case 136: /* CHINESEBIG5_CHARSET - Chinese Traditional */
92 return (0x404); /* Taiwan - And others!! */
93 case 161: /* GREEK_CHARSET */
94 return (0x408); /* Greek */
95 case 162: /* TURKISH_CHARSET */
96 return (0x41f); /* Turkish */
97 case 163: /* VIETNAMESE_CHARSET */
98 return (0x42a); /* Vietnamese */
99 case 177: /* HEBREW_CHARSET */
100 return (0x40d); /* Hebrew */
101 case 178: /* ARABIC_CHARSET */
102 return (0x01); /* Arabic */
103 case 186: /* BALTIC_CHARSET */
104 return (0x425); /* Estonian - And others!! */
105 case 204: /* RUSSIAN_CHARSET */
106 return (0x419); /* Russian - And others!! */
107 case 222: /* THAI_CHARSET */
108 return (0x41e); /* Thai */
109 case 238: /* EASTEUROPE_CHARSET */
110 return (0x405); /* Czech - And many others!! */
111
112 #if 0
113 /* case 255: */ /* OEM_CHARSET */
114 #endif
115
116 default:
117 return (0x0);
118 }
119 return (0x0);
120 }
121
122 int
wvOutputTextChar(U16 eachchar,U8 chartype,wvParseStruct * ps,CHP * achp)123 wvOutputTextChar (U16 eachchar, U8 chartype, wvParseStruct * ps, CHP * achp)
124 {
125 U16 lid = 0;
126
127 wvVersion v = wvQuerySupported (&ps->fib, NULL);
128
129 /* testing adding a language */
130
131 /* For version <= WORD7, The charset used could
132 * depend on the font's charset.
133 */
134 if ((v <= WORD7) && (!ps->fib.fFarEast))
135 {
136 FFN currentfont;
137
138 if (ps->fonts.ffn == NULL)
139 {
140 lid = 0;
141 }
142 else
143 {
144 currentfont = ps->fonts.ffn[achp->ftc];
145 /* Return 0 if no match */
146 lid = wvnLocaleToLIDConverter (currentfont.chs);
147 }
148 }
149
150 if ((v > WORD6) && !lid)
151 lid = achp->lidDefault;
152
153 /* No lidDefault for ver < WORD6 */
154 if (lid == 0x400 || lid == 0)
155 lid = ps->fib.lid;
156
157 /* end testing adding a language */
158
159 if (achp->fSpec)
160 {
161 /*
162 if the character is still one of the special ones then call this other
163 handler
164 instead
165 */
166 if (ps->scharhandler)
167 return ((*(ps->scharhandler)) (ps, eachchar, achp));
168 }
169 else
170 {
171 /* Most Chars go through this baby */
172 if (ps->charhandler)
173 {
174 if (!((v == WORD7 || v == WORD6) && ps->fib.fFarEast))
175 if (v <= WORD7)
176 {
177 /* versions <= 7 do not use unicode. versions >= 8 always do */
178 /* versions 7 and 6 use unicode iff the far-east flag is set */
179 chartype = 1;
180 }
181
182 return ((*(ps->charhandler)) (ps, eachchar, chartype, lid));
183 }
184 }
185 wvError (("No CharHandler registered, programmer error\n"));
186 return (0);
187 }
188
189 void
wvOutputHtmlChar(U16 eachchar,U8 chartype,char * outputtype,U16 lid)190 wvOutputHtmlChar (U16 eachchar, U8 chartype, char *outputtype, U16 lid)
191 {
192 if (chartype)
193 eachchar = wvHandleCodePage (eachchar, lid);
194 wvOutputFromUnicode (eachchar, outputtype);
195 }
196
197 #define CPNAME_OR_FALLBACK(name,fallbackname) \
198 { \
199 static char* cpname = NULL; \
200 if (!cpname) \
201 { \
202 GIConv cd = g_iconv_open(name,name); \
203 if (cd==(GIConv)-1) \
204 { \
205 cpname = fallbackname; \
206 } \
207 else \
208 { \
209 cpname = name; \
210 g_iconv_close(cd); \
211 } \
212 }; \
213 return cpname; \
214 }
215
216 typedef struct {
217 const char * language_tag ;
218 U16 lid ;
219 } wvLanguageId ;
220
221 static const wvLanguageId mLanguageIds[] =
222 {
223 { "-none-", 0x0000 }, /* none (language neutral) */
224 { "-none-", 0x0400 }, /* none */
225 { "af-ZA", 0x0436 }, /* Afrikaans */
226 { "am", 0x045e }, /* Amharic */
227 { "sq-AL", 0x041c }, /* Albanian */
228 { "ar-SA", 0x0401 }, /* Arabic (Saudi) */
229 { "ar-IQ", 0x0801 }, /* Arabic (Iraq) */
230 { "ar-EG", 0x0c01 }, /* Arabic (Egypt) */
231 { "ar-LY", 0x1001 }, /* Arabic (Libya) */
232 { "ar-DZ", 0x1401 }, /* Arabic (Algeria) */
233 { "ar-MA", 0x1801 }, /* Arabic (Morocco) */
234 { "ar-TN", 0x1c01 }, /* Arabic (Tunisia) */
235 { "ar-OM", 0x2001 }, /* Arabic (Oman) */
236 { "ar-YE", 0x2401 }, /* Arabic (Yemen) */
237 { "ar-SY", 0x2801 }, /* Arabic (Syria) */
238 { "ar-JO", 0x2c01 }, /* Arabic (Jordan) */
239 { "ar-LB", 0x3001 }, /* Arabic (Lebanon) */
240 { "ar-KW", 0x3401 }, /* Arabic (Kuwait) */
241 { "ar-AE", 0x3801 }, /* Arabic (United Arab Emirates) */
242 { "ar-BH", 0x3c01 }, /* Arabic (Bahrain) */
243 { "ar-QA", 0x4001 }, /* Arabic (Qatar) */
244 { "as", 0x044d }, /* Assamese */
245 { "az", 0x042c }, /* Azerbaijani */
246 { "hy-AM", 0x042b }, /* Armenian */
247 { "az", 0x044c }, /* Azeri (Latin) az- */
248 { "az", 0x082c }, /* Azeri (Cyrillic) az- */
249 { "eu-ES", 0x042d }, /* Basque */
250 { "be-BY", 0x0423 }, /* Belarussian */
251 { "bn", 0x0445 }, /* Bengali bn- */
252 { "bg-BG", 0x0402 }, /* Bulgarian */
253 { "ca-ES", 0x0403 }, /* Catalan */
254 { "zh-TW", 0x0404 }, /* Chinese (Taiwan) */
255 { "zh-CN", 0x0804 }, /* Chinese (PRC) */
256 { "zh-HK", 0x0c04 }, /* Chinese (Hong Kong) */
257 { "zh-SG", 0x1004 }, /* Chinese (Singapore) */
258 { "ch-MO", 0x1404 }, /* Chinese (Macau SAR) */
259 { "hr-HR", 0x041a }, /* Croatian */
260 { "cs-CZ", 0x0405 }, /* Czech */
261 { "da-DK", 0x0406 }, /* Danish */
262 { "div", 0x465 }, /* Divehi div-*/
263 { "nl-NL", 0x0413 }, /* Dutch (Netherlands) */
264 { "nl-BE", 0x0813 }, /* Dutch (Belgium) */
265 { "en-US", 0x0409 }, /* English (USA) */
266 { "en-GB", 0x0809 }, /* English (UK) */
267 { "en-AU", 0x0c09 }, /* English (Australia) */
268 { "en-CA", 0x1009 }, /* English (Canada) */
269 { "en-NZ", 0x1409 }, /* English (New Zealand) */
270 { "en-IE", 0x1809 }, /* English (Ireland) */
271 { "en-ZA", 0x1c09 }, /* English (South Africa) */
272 { "en-JM", 0x2009 }, /* English (Jamaica) */
273 { "en", 0x2409 }, /* English (Caribbean) */
274 { "en-BZ", 0x2809 }, /* English (Belize) */
275 { "en-TT", 0x2c09 }, /* English (Trinidad) */
276 { "en-ZW", 0x3009 }, /* English (Zimbabwe) */
277 { "en-PH", 0x3409 }, /* English (Phillipines) */
278 { "et-EE", 0x0425 }, /* Estonian */
279 { "fo", 0x0438 }, /* Faeroese fo- */
280 { "fa-IR", 0x0429 }, /* Farsi */
281 { "fi-FI", 0x040b }, /* Finnish */
282 { "fr-FR", 0x040c }, /* French (France) */
283 { "fr-BE", 0x080c }, /* French (Belgium) */
284 { "fr-CA", 0x0c0c }, /* French (Canada) */
285 { "fr-CH", 0x100c }, /* French (Switzerland) */
286 { "fr-LU", 0x140c }, /* French (Luxembourg) */
287 { "fr-MC", 0x180c }, /* French (Monaco) */
288 { "gl", 0x0456 }, /* Galician gl- */
289 { "ga-IE", 0x083c }, /* Irish Gaelic */
290 { "gd-GB", 0x100c }, /* Scottish Gaelic */
291 { "ka-GE", 0x0437 }, /* Georgian */
292 { "de-DE", 0x0407 }, /* German (Germany) */
293 { "de-CH", 0x0807 }, /* German (Switzerland) */
294 { "de-AT", 0x0c07 }, /* German (Austria) */
295 { "de-LU", 0x1007 }, /* German (Luxembourg) */
296 { "de-LI", 0x1407 }, /* German (Liechtenstein) */
297 { "el-GR", 0x0408 }, /* Greek */
298 { "gu", 0x0447 }, /* Gujarati gu- */
299 { "ha", 0x0468 }, /* Hausa */
300 { "he-IL", 0x040d }, /* Hebrew */
301 { "hi-IN", 0x0439 }, /* Hindi */
302 { "hu-HU", 0x040e }, /* Hungarian */
303 { "is-IS", 0x040f }, /* Icelandic */
304 { "id-ID", 0x0421 }, /* Indonesian */
305 { "iu", 0x045d }, /* Inkutitut */
306 { "it-IT", 0x0410 }, /* Italian (Italy) */
307 { "it-CH", 0x0810 }, /* Italian (Switzerland) */
308 { "ja-JP", 0x0411}, /* Japanese */
309 { "kn", 0x044b }, /* Kannada kn- */
310 { "ks", 0x0860 }, /* Kashmiri (India) ks- */
311 { "kk", 0x043f }, /* Kazakh kk- */
312 { "kok", 0x0457 }, /* Konkani kok- */
313 { "ko-KR", 0x0412 }, /* Korean */
314 { "ko", 0x0812 }, /* Korean (Johab) ko- */
315 { "kir", 0x0440 }, /* Kyrgyz */
316 { "la", 0x0476 }, /* Latin */
317 { "lo", 0x0454 }, /* Laothian */
318 { "lv-LV", 0x0426 }, /* Latvian */
319 { "lt-LT", 0x0427 }, /* Lithuanian */
320 { "lt-LT", 0x0827 }, /* Lithuanian (Classic) */
321 { "mk", 0x042f }, /* FYRO Macedonian */
322 { "my-MY", 0x043e }, /* Malaysian */
323 { "my-BN", 0x083e }, /* Malay Brunei Darussalam */
324 { "ml", 0x044c }, /* Malayalam ml- */
325 { "mr", 0x044e }, /* Marathi mr- */
326 { "mt", 0x043a }, /* Maltese */
327 { "mo", 0x0450 }, /* Mongolian */
328 { "ne-NP", 0x0461 }, /* Napali (Nepal) */
329 { "ne-IN", 0x0861 }, /* Nepali (India) */
330 { "nb-NO", 0x0414 }, /* Norwegian (Bokmai) */
331 { "nn-NO", 0x0814 }, /* Norwegian (Nynorsk) */
332 { "or", 0x0448 }, /* Oriya or- */
333 { "om", 0x0472 }, /* Oromo (Afan, Galla) */
334 { "pl-PL", 0x0415 }, /* Polish */
335 { "pt-BR", 0x0416 }, /* Portuguese (Brazil) */
336 { "pt-PT", 0x0816 }, /* Portuguese (Portugal) */
337 { "pa", 0x0446 }, /* Punjabi pa- */
338 { "ps", 0x0463 }, /* Pashto (Pushto) */
339 { "rm", 0x0417 }, /* Rhaeto-Romanic rm- */
340 { "ro-RO", 0x0418 }, /* Romanian */
341 { "ro-MD", 0x0818 }, /* Romanian (Moldova) */
342 { "ru-RU", 0x0419 }, /* Russian */
343 { "ru-MD", 0x0819 }, /* Russian (Moldova) */
344 { "se", 0x043b }, /* Sami (Lappish) se- */
345 { "sa", 0x044f }, /* Sanskrit sa- */
346 { "sr", 0x0c1a }, /* Serbian (Cyrillic) sr- */
347 { "sr", 0x081a }, /* Serbian (Latin) sr- */
348 { "sd", 0x0459 }, /* Sindhi sd- */
349 { "sk-SK", 0x041b }, /* Slovak */
350 { "sl-SI", 0x0424 }, /* Slovenian */
351 { "wen", 0x042e }, /* Sorbian wen- */
352 { "so", 0x0477 }, /* Somali */
353 { "es-ES", 0x040a }, /* Spanish (Spain, Traditional) */
354 { "es-MX", 0x080a }, /* Spanish (Mexico) */
355 { "es-ES", 0x0c0a }, /* Spanish (Modern) */
356 { "es-GT", 0x100a }, /* Spanish (Guatemala) */
357 { "es-CR", 0x140a }, /* Spanish (Costa Rica) */
358 { "es-PA", 0x180a }, /* Spanish (Panama) */
359 { "es-DO", 0x1c0a }, /* Spanish (Dominican Republic) */
360 { "es-VE", 0x200a }, /* Spanish (Venezuela) */
361 { "es-CO", 0x240a }, /* Spanish (Colombia) */
362 { "es-PE", 0x280a }, /* Spanish (Peru) */
363 { "es-AR", 0x2c0a }, /* Spanish (Argentina) */
364 { "es-EC", 0x300a }, /* Spanish (Ecuador) */
365 { "es-CL", 0x340a }, /* Spanish (Chile) */
366 { "es-UY", 0x380a }, /* Spanish (Uruguay) */
367 { "es-PY", 0x3c0a }, /* Spanish (Paraguay) */
368 { "es-BO", 0x400a }, /* Spanish (Bolivia) */
369 { "es-SV", 0x440a }, /* Spanish (El Salvador) */
370 { "es-HN", 0x480a }, /* Spanish (Honduras) */
371 { "es-NI", 0x4c0a }, /* Spanish (Nicaragua) */
372 { "es-PR", 0x500a }, /* Spanish (Puerto Rico) */
373 { "sx", 0x0430 }, /* Sutu */
374 { "sw", 0x0441 }, /* Swahili (Kiswahili/Kenya) */
375 { "sv-SE", 0x041d }, /* Swedish */
376 { "sv-FI", 0x081d }, /* Swedish (Finland) */
377 { "ta", 0x0449 }, /* Tamil ta- */
378 { "tt", 0x0444 }, /* Tatar (Tatarstan) tt- */
379 { "te", 0x044a }, /* Telugu te- */
380 { "th-TH", 0x041e }, /* Thai */
381 { "ts", 0x0431 }, /* Tsonga ts- */
382 { "tn", 0x0432 }, /* Tswana tn- */
383 { "tr-TR", 0x041f }, /* Turkish */
384 { "tl", 0x0464 }, /* Tagalog */
385 { "tg", 0x0428 }, /* Tajik */
386 { "bo", 0x0451 }, /* Tibetan */
387 { "ti", 0x0473 }, /* Tigrinya */
388 { "uk-UA", 0x0422 }, /* Ukrainian */
389 { "ur-PK", 0x0420 }, /* Urdu (Pakistan) */
390 { "ur-IN", 0x0820 }, /* Urdu (India) */
391 { "uz", 0x0443 }, /* Uzbek (Latin) uz- */
392 { "uz", 0x0843 }, /* Uzbek (Cyrillic) uz- */
393 { "ven", 0x0433 }, /* Venda ven- */
394 { "vi-VN", 0x042a }, /* Vietnamese */
395 { "cy-GB", 0x0452 }, /* Welsh */
396 { "xh", 0x0434 }, /* Xhosa xh */
397 { "yi", 0x043d }, /* Yiddish yi- */
398 { "yo", 0x046a }, /* Yoruba */
399 { "zu", 0x0435 }, /* Zulu zu- */
400 { "en-US", 0x0800 } /* Default */
401 };
402
403 #define NrMappings (sizeof(mLanguageIds)/sizeof(mLanguageIds[0]))
404
wvLangToLIDConverter(const char * lang)405 U16 wvLangToLIDConverter ( const char * lang )
406 {
407 unsigned int i = 0 ;
408
409 if (!lang)
410 return 0x0400; /* return -none- */
411
412 for ( i = 0 ; i < NrMappings ; i++ )
413 if (!strcmp (lang, mLanguageIds[i].language_tag))
414 return mLanguageIds[i].lid ;
415
416 return 0x0400 ; /* return -none- */
417 }
418
419 const char *
wvLIDToLangConverter(U16 lid)420 wvLIDToLangConverter (U16 lid)
421 {
422 unsigned int i = 0 ;
423
424 if ( lid == 0 ) /* language netural */
425 return "-none-" ;
426
427 for ( i = 0 ; i < NrMappings ; i++ )
428 if ( mLanguageIds[i].lid == lid )
429 return mLanguageIds[i].language_tag ;
430
431 return "-none-"; /* default */
432 }
433
434 static int
wvIsCP1252(U16 lid)435 wvIsCP1252 (U16 lid)
436 {
437 switch (lid & 0xff)
438 {
439 case 0x03: /*Catalan */
440 case 0x06: /*Danish */
441 case 0x07: /*German */
442 case 0x09: /*English */
443 case 0x0a: /*Spanish */
444 case 0x0b: /*Finnish */
445 case 0x0c: /*French */
446 case 0x0f: /*Icelandic */
447 case 0x10: /*Italian */
448 case 0x13: /*Dutch */
449 case 0x14: /*Norwegian */
450 case 0x16: /*Portuguese */
451 case 0x17: /*Rhaeto-Romanic */
452 case 0x1d: /*Swedish */
453 case 0x21: /*Bahasa Indonesian */
454 case 0x2d: /*Basque */
455 case 0x36: /*Afrikaans */
456 case 0x38: /*Faeroese */
457 case 0x3E: /*Malaysian / Malay */
458 case 0x41: /*Swahili */
459 return 1;
460
461 case 0x1a: /*Serbian, Croatian, (Bosnian?) */
462 switch (lid)
463 {
464 case 0x041a: /*Croatian */
465 case 0x081a: /*Serbian (Latin) */
466 return 1;
467 }
468
469 default:
470 return 0;
471 }
472
473 return 0;
474 }
475
476 const char *
wvLIDToCodePageConverter(U16 lid)477 wvLIDToCodePageConverter (U16 lid)
478 {
479 if (lid == 0x0FFF) /*Macintosh Hack */
480 return ("MACINTOSH");
481 else if (wvIsCP1252 (lid))
482 return ("CP1252");
483
484 switch (lid & 0xff)
485 {
486 case 0x01: /*Arabic */
487 return ("CP1256");
488 case 0x02: /*Bulgarian */
489 return ("CP1251");
490 case 0x04: /*Chinese */
491 switch (lid)
492 {
493 #if 0
494 case 0x1404: /*Chinese (Macau SAR) */
495 #endif
496 case 0x0c04: /*Chinese (Hong Kong SAR, PRC) */
497 CPNAME_OR_FALLBACK ("CP950", "BIG5-HKSCS");
498 case 0x0804: /*Chinese (PRC) */
499 CPNAME_OR_FALLBACK ("CP936", "GBK");
500 #if 0
501 case 0x1004: /*Chinese (Singapore) */
502 #endif
503 case 0x0404: /*Chinese (Taiwan) */
504 CPNAME_OR_FALLBACK ("CP950", "BIG5");
505 }
506 case 0x05: /*Czech */
507 return ("CP1250");
508 case 0x08: /*Greek */
509 return ("CP1253");
510 case 0x0d: /*Hebrew */
511 return ("CP1255");
512 case 0x0e: /*Hungarian */
513 return ("CP1250");
514 case 0x11: /*Japanese */
515 return ("CP932");
516 case 0x12: /*Korean */
517 switch (lid)
518 {
519 case 0x0812: /*Korean (Johab) */
520 return ("CP1361");
521 case 0x0412: /*Korean */
522 return ("CP949");
523 }
524 case 0x15: /*Polish */
525 return ("CP1250");
526 case 0x18: /*Romanian */
527 return ("CP1250");
528 case 0x19: /*Russian */
529 return ("CP1251");
530 case 0x1a: /*Serbian, Croatian, (Bosnian?) */
531 switch (lid)
532 {
533 case 0x0c1a: /*Serbian (Cyrillic) */
534 return ("CP1251");
535 }
536 case 0x1b: /*Slovak */
537 return ("CP1250");
538 case 0x1c: /*Albanian */
539 return ("CP1251");
540 case 0x1e: /*Thai */
541 return ("CP874");
542 case 0x1f: /*Turkish */
543 return ("CP1254");
544 case 0x20: /*Urdu. This is Unicode only. */
545 return ("CP0");
546 case 0x22: /*Ukrainian */
547 return ("CP1251");
548 case 0x23: /*Byelorussian / Belarusian */
549 return ("CP1251");
550 case 0x24: /*Slovenian */
551 return ("CP1250");
552 case 0x25: /*Estonian */
553 return ("CP1257");
554 case 0x26: /*Latvian */
555 return ("CP1257");
556 case 0x27: /*Lithuanian */
557 return ("CP1257");
558 case 0x29: /*Farsi / Persian. This is Unicode only. */
559 return ("CP0");
560 case 0x2a: /*Vietnamese */
561 return ("CP1258");
562 case 0x2b: /*Windows 2000: Armenian. This is Unicode only. */
563 return ("CP0");
564 case 0x2c: /*Azeri */
565 switch (lid)
566 {
567 case 0x082c: /*Azeri (Cyrillic) */
568 return ("CP1251");
569 #if 0
570 case 0x042c: /*Azeri (Latin) */
571 #endif
572 }
573 case 0x2f: /*Macedonian */
574 return ("CP1251");
575 #if 0
576 case 0x30: /*Sutu */
577 #endif
578 case 0x37: /*Windows 2000: Georgian. This is Unicode only. */
579 return ("CP0");
580 case 0x39: /*Windows 2000: Hindi. This is Unicode only. */
581 return ("CP0");
582 #if 0
583 case 0x3f: /*Kazakh */
584 #endif
585 case 0x43: /*Uzbek */
586 switch (lid)
587 {
588 case 0x0843: /*Uzbek (Cyrillic) */
589 return ("CP1251");
590 #if 0
591 case 0x0443: /*Uzbek (Latin) */
592 #endif
593 }
594 #if 0
595 case 0x44: /*Tatar */
596 #endif
597 case 0x45: /*Windows 2000: Bengali. This is Unicode only. */
598 case 0x46: /*Windows 2000: Punjabi. This is Unicode only. */
599 case 0x47: /*Windows 2000: Gujarati. This is Unicode only. */
600 case 0x48: /*Windows 2000: Oriya. This is Unicode only. */
601 case 0x49: /*Windows 2000: Tamil. This is Unicode only. */
602 case 0x4a: /*Windows 2000: Telugu. This is Unicode only. */
603 case 0x4b: /*Windows 2000: Kannada. This is Unicode only. */
604 case 0x4c: /*Windows 2000: Malayalam. This is Unicode only. */
605 case 0x4d: /*Windows 2000: Assamese. This is Unicode only. */
606 case 0x4e: /*Windows 2000: Marathi. This is Unicode only. */
607 case 0x4f: /*Windows 2000: Sanskrit. This is Unicode only. */
608 return ("CP0");
609 case 0x55: /*Myanmar / Burmese. This is Unicode only. */
610 return ("CP0");
611 case 0x57: /*Windows 2000: Konkani. This is Unicode only. */
612 return ("CP0");
613 #if 0
614 case 0x58: /*Manipuri */
615 case 0x59: /*Sindhi */
616 case 0x60: /*Kashmiri (India) */
617 #endif
618 case 0x61: /*Windows 2000: Nepali (India). This is Unicode only. */
619 return ("CP0");
620 };
621
622 /* TODO output a warning since this is a guess */
623 return ("CP1252");
624 }
625
626 static U32
swap_iconv(U16 lid)627 swap_iconv (U16 lid)
628 {
629 GIConv handle;
630 char f_code[33]; /* From CCSID */
631 char t_code[33]; /* To CCSID */
632 const char *codepage;
633 size_t ibuflen, obuflen;
634
635 U8 buffer[2];
636 U8 buffer2[2];
637
638 gchar *ibuf, *obuf;
639
640 /* do a bit of caching */
641 static U16 lastlid = -1;
642 static U32 ret = -1;
643
644 /* shortcut */
645 if (ret != -1 && lastlid == lid)
646 return ret;
647
648 ibuf = buffer;
649 obuf = buffer2;
650
651 lastlid = lid;
652 codepage = wvLIDToCodePageConverter (lid);
653
654 memset (f_code, '\0', 33);
655 memset (t_code, '\0', 33);
656
657 strcpy (f_code, codepage);
658 strcpy (t_code, "UCS-2");
659
660 handle = g_iconv_open (t_code, f_code);
661 if (handle == (GIConv)-1)
662 return 0;
663
664 buffer[0] = 0x20 & 0xff;
665 buffer[1] = 0;
666
667 ibuflen = obuflen = 2;
668
669 g_iconv (handle, &ibuf, &ibuflen, &obuf, &obuflen);
670
671 g_iconv_close (handle);
672
673 ret = *(U16 *) buffer2 != 0x20;
674 return ret;
675 }
676
677 U16
wvHandleCodePage(U16 eachchar,U16 lid)678 wvHandleCodePage (U16 eachchar, U16 lid)
679 {
680 char f_code[33]; /* From CCSID */
681 char t_code[33]; /* To CCSID */
682 const char *codepage;
683 GIConv g_iconv_handle; /* Conversion Descriptor returned */
684 /* from g_iconv_open() function */
685 size_t ibuflen; /* Length of input buffer */
686 size_t obuflen; /* Length of output buffer */
687
688 gchar *ibuf;
689 gchar *obuf; /* Buffer for converted characters */
690 U8 *p;
691 U8 buffer[2];
692 U8 buffer2[2];
693
694 U16 rtn;
695
696 if (wvIsCP1252 (lid) && eachchar <= 0xFF)
697 {
698 return cp1252_to_ucs2_table[eachchar];
699 }
700
701 if (eachchar > 0xff)
702 {
703 buffer[0] = (char) (eachchar >> 8);
704 buffer[1] = (char) eachchar & 0xff;
705 }
706 else
707 {
708 buffer[0] = eachchar & 0xff;
709 buffer[1] = 0;
710 }
711
712 ibuf = buffer;
713 obuf = buffer2;
714
715 codepage = wvLIDToCodePageConverter (lid);
716
717 /* All reserved positions of from code (last 12 characters) and to code */
718 /* (last 19 characters) must be set to hexadecimal zeros. */
719
720 memset (f_code, '\0', 33);
721 memset (t_code, '\0', 33);
722
723 strcpy (f_code, codepage);
724 strcpy (t_code, "UCS-2");
725
726 g_iconv_handle = g_iconv_open (t_code, f_code);
727 if (g_iconv_handle == (GIConv) - 1)
728 {
729 wvError (
730 ("g_iconv_open fail: %d, cannot convert %s to unicode\n",
731 errno, codepage));
732 return ('?');
733 }
734
735 ibuflen = obuflen = 2;
736 p = obuf;
737
738 g_iconv (g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
739
740 /* We might have double byte char here. */
741
742 if (swap_iconv (lid))
743 {
744 rtn = (U16) buffer2[0] << 8;
745 rtn |= (U16) buffer2[1];
746 }
747 else
748 {
749 rtn = *(U16 *) buffer2;
750 }
751
752 g_iconv_close (g_iconv_handle);
753
754 return (rtn);
755 }
756
757 void
wvOutputFromUnicode(U16 eachchar,char * outputtype)758 wvOutputFromUnicode (U16 eachchar, char *outputtype)
759 {
760 static char cached_outputtype[33]; /* Last outputtype */
761 static GIConv g_iconv_handle = (GIConv)-1; /* Cached iconv descriptor */
762 static int need_swapping;
763 gchar *ibuf, *obuf;
764 size_t ibuflen, obuflen, len, count, i;
765 U8 buffer[2], buffer2[5];
766
767 if ((wvConvertUnicodeToEntity != NULL)
768 && wvConvertUnicodeToEntity (eachchar))
769 return;
770
771 if ((g_iconv_handle == (GIConv)-1) || strcmp (cached_outputtype, outputtype) != 0)
772 {
773 if ((g_iconv_handle != (GIConv)-1))
774 g_iconv_close (g_iconv_handle);
775
776 g_iconv_handle = g_iconv_open (outputtype, "UCS-2");
777 if (g_iconv_handle == (GIConv) - 1)
778 {
779 wvError (
780 ("g_iconv_open fail: %d, cannot convert %s to %s\n",
781 errno, "UCS-2", outputtype));
782 printf ("?");
783 return;
784 }
785
786 /* safe to cache the output type here */
787 strcpy (cached_outputtype, outputtype);
788
789 /* Determining if unicode biteorder is swapped (glibc < 2.2) */
790 need_swapping = 1;
791
792 buffer[0] = 0x20;
793 buffer[1] = 0;
794 ibuf = buffer;
795 obuf = buffer2;
796 ibuflen = 2;
797 obuflen = 5;
798
799 count = g_iconv (g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
800 if (count >= 0)
801 need_swapping = buffer2[0] != 0x20;
802 }
803
804 if (need_swapping)
805 {
806 buffer[0] = (eachchar >> 8) & 0x00ff;
807 buffer[1] = eachchar & 0x00ff;
808 }
809 else
810 {
811 buffer[0] = eachchar & 0x00ff;
812 buffer[1] = (eachchar >> 8) & 0x00ff;
813 }
814
815 ibuf = buffer;
816 obuf = buffer2;
817
818 ibuflen = 2;
819 len = obuflen = 5;
820
821 count = g_iconv (g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
822 if (count == (size_t) - 1)
823 {
824 wvError (("iconv failed errno: %d, char: 0x%X, %s -> %s\n",
825 errno, eachchar, "UCS-2", outputtype));
826
827 /* I'm torn here - do i just announce the failure, continue, or copy over to the other buffer? */
828
829 /* errno is usually 84 (illegal byte sequence)
830 should i reverse the bytes and try again? */
831 printf ("%c", ibuf[1]);
832 }
833 else
834 {
835 len = len - obuflen;
836
837 for (i = 0; i < len; i++)
838 printf ("%c", buffer2[i]);
839 }
840 }
841
842 int
wvHandleElement(wvParseStruct * ps,wvTag tag,void * props,int dirty)843 wvHandleElement (wvParseStruct * ps, wvTag tag, void *props, int dirty)
844 {
845 if (ps->elehandler)
846 return ((*(ps->elehandler)) (ps, tag, props, dirty));
847 wvError (("No element handler registered!!\n"));
848 return (0);
849 }
850
851 int
wvHandleDocument(wvParseStruct * ps,wvTag tag)852 wvHandleDocument (wvParseStruct * ps, wvTag tag)
853 {
854 if (ps->dochandler)
855 return ((*(ps->dochandler)) (ps, tag));
856 wvError (("No dochandler!!\n"));
857 return (0);
858 }
859
860 void
wvSetCharHandler(wvParseStruct * ps,int (* proc)(wvParseStruct *,U16,U8,U16))861 wvSetCharHandler (wvParseStruct * ps,
862 int (*proc) (wvParseStruct *, U16, U8, U16))
863 {
864 ps->charhandler = proc;
865 }
866
867 void
wvSetSpecialCharHandler(wvParseStruct * ps,int (* proc)(wvParseStruct *,U16,CHP *))868 wvSetSpecialCharHandler (wvParseStruct * ps,
869 int (*proc) (wvParseStruct *, U16, CHP *))
870 {
871 ps->scharhandler = proc;
872 }
873
874 void
wvSetElementHandler(wvParseStruct * ps,int (* proc)(wvParseStruct *,wvTag,void *,int))875 wvSetElementHandler (wvParseStruct * ps,
876 int (*proc) (wvParseStruct *, wvTag, void *, int))
877 {
878 ps->elehandler = proc;
879 }
880
881 void
wvSetDocumentHandler(wvParseStruct * ps,int (* proc)(wvParseStruct *,wvTag))882 wvSetDocumentHandler (wvParseStruct * ps,
883 int (*proc) (wvParseStruct *, wvTag))
884 {
885 ps->dochandler = proc;
886 }
887
888 int
wvConvertUnicodeToLaTeX(U16 char16)889 wvConvertUnicodeToLaTeX (U16 char16)
890 {
891 /*
892 german and scandinavian characters, MV 1.7.2000
893 See man iso_8859_1
894
895 This requires the inputencoding latin1 package,
896 see latin1.def. Chars in range 160...255 are just
897 put through as these are legal iso-8859-1 symbols.
898 (see above)
899
900 Best way to do it until LaTeX is Unicode enabled
901 (Omega project).
902 -- MV 4.7.2000
903
904 We use a separate if-statement here ... the 'case range'
905 construct is gcc specific :-( -- MV 13/07/2000
906 */
907
908 if ((char16 >= 0xa0) && (char16 <= 0xff))
909 {
910 switch (char16)
911 {
912 case 0xa0:
913 printf ("\\ "); /* hard space */
914 return (1);
915
916 /* Fix up these as math characters: */
917 case 0xb1:
918 printf ("$\\pm$");
919 return (1);
920 case 0xb2:
921 printf ("$\\mathtwosuperior$");
922 return (1);
923 case 0xb3:
924 printf ("$\\maththreesuperior$");
925 return (1);
926 case 0xb5:
927 printf ("$\\mu$");
928 return (1);
929 case 0xb9:
930 printf ("$\\mathonesuperior$");
931 return (1);
932 case 0xd7:
933 printf ("$\\times$");
934 return (1);
935 }
936 printf ("%c", char16);
937 return (1);
938 }
939 switch (char16)
940 {
941 case 37:
942 printf ("\\%%");
943 return (1);
944 case 10:
945 case 11:
946 printf ("\\\\\n");
947 return (1);
948 case 31: /* non-required hyphen */
949 printf ("\\-");
950 return (1);
951 case 30: /* non-breaking hyphen */
952 printf ("-");
953 return (1);
954
955 /* case 45: minus/hyphen, pass through */
956
957 case 12:
958 printf("\\newpage\n");
959 return (1);
960 case 13:
961 case 14:
962 case 7:
963 return (1);
964 case 9:
965 printf ("\\hfill{}"); /* tab -- horrible cludge */
966 return (1);
967 case 0xf020:
968 printf (" "); /* Mac specialty ? MV 10.10.2000 */
969 return (1);
970 case 0xf02c:
971 printf (","); /* Mac */
972 return (1);
973 case 0xf028:
974 printf ("("); /* Mac */
975 return (1);
976
977 case 34:
978 printf ("\"");
979 return (1);
980 case 35:
981 printf ("\\#"); /* MV 14.8.2000 */
982 return (1);
983 case 36:
984 printf ("\\$"); /* MV 14.8.2000 */
985 return (1);
986 case 38:
987 printf ("\\&"); /* MV 1.7.2000 */
988 return (1);
989 case 92:
990 printf ("$\\backslash$"); /* MV 23.9.2000 */
991 return (1);
992 case 94:
993 printf ("\\^"); /* MV 13.9.2000 */
994 return (1);
995 case 95:
996 printf ("\\_"); /* MV 13.9.2000 */
997 return (1);
998 case 60:
999 printf ("<");
1000 return (1);
1001 case 0xf03e: /* Mac */
1002 case 62:
1003 printf (">");
1004 return (1);
1005
1006 case 0xF8E7:
1007 /* without this, things should work in theory, but not for me */
1008 printf ("_");
1009 return (1);
1010
1011 /* Added some new Unicode characters. It's probably difficult
1012 to write these characters in AbiWord, though ... :(
1013 -- 2000-08-11 huftis@bigfoot.com */
1014
1015 case 0x0100:
1016 printf ("\\=A"); /* A with macron */
1017 return (1);
1018 case 0x0101:
1019 printf ("\\=a"); /* a with macron */
1020 return (1);
1021 case 0x0102:
1022 printf ("\\u{A}"); /* A with breve */
1023 return (1);
1024 case 0x0103:
1025 printf ("\\u{a}"); /* a with breve */
1026 return (1);
1027 case 0x0104:
1028 printf ("\\k{A}"); /* A with ogonek */
1029 return (1);
1030 case 0x0105:
1031 printf ("\\k{a}"); /* a with ogonek */
1032 return (1);
1033 case 0x0106:
1034 printf ("\\'C"); /* C with acute */
1035 return (1);
1036 case 0x0107:
1037 printf ("\\'c"); /* c with acute */
1038 return (1);
1039 case 0x0108:
1040 printf ("\\^C"); /* C with circumflex */
1041 return (1);
1042 case 0x0109:
1043 printf ("\\^c"); /* c with circumflex */
1044 return (1);
1045 case 0x010A:
1046 printf ("\\.C"); /* C with dot above */
1047 return (1);
1048 case 0x010B:
1049 printf ("\\.c"); /* c with dot above */
1050 return (1);
1051 case 0x010C:
1052 printf ("\\v{C}"); /* C with caron */
1053 return (1);
1054 case 0x010D:
1055 printf ("\\v{c}"); /* c with caron */
1056 return (1);
1057 case 0x010E:
1058 printf ("\\v{D}"); /* D with caron */
1059 return (1);
1060 case 0x010F:
1061 printf ("\\v{d}"); /* d with caron */
1062 return (1);
1063 case 0x0110:
1064 printf ("\\DJ{}"); /* D with stroke */
1065 return (1);
1066 case 0x0111:
1067 printf ("\\dj{}"); /* d with stroke */
1068 return (1);
1069 case 0x0112:
1070 printf ("\\=E"); /* E with macron */
1071 return (1);
1072 case 0x0113:
1073 printf ("\\=e"); /* e with macron */
1074 return (1);
1075 case 0x0114:
1076 printf ("\\u{E}"); /* E with breve */
1077 return (1);
1078 case 0x0115:
1079 printf ("\\u{e}"); /* e with breve */
1080 return (1);
1081 case 0x0116:
1082 printf ("\\.E"); /* E with dot above */
1083 return (1);
1084 case 0x0117:
1085 printf ("\\.e"); /* e with dot above */
1086 return (1);
1087 case 0x0118:
1088 printf ("\\k{E}"); /* E with ogonek */
1089 return (1);
1090 case 0x0119:
1091 printf ("\\k{e}"); /* e with ogonek */
1092 return (1);
1093 case 0x011A:
1094 printf ("\\v{E}"); /* E with caron */
1095 return (1);
1096 case 0x011B:
1097 printf ("\\v{e}"); /* e with caron */
1098 return (1);
1099 case 0x011C:
1100 printf ("\\^G"); /* G with circumflex */
1101 return (1);
1102 case 0x011D:
1103 printf ("\\^g"); /* g with circumflex */
1104 return (1);
1105 case 0x011E:
1106 printf ("\\u{G}"); /* G with breve */
1107 return (1);
1108 case 0x011F:
1109 printf ("\\u{g}"); /* g with breve */
1110 return (1);
1111 case 0x0120:
1112 printf ("\\.G"); /* G with dot above */
1113 return (1);
1114 case 0x0121:
1115 printf ("\\u{g}"); /* g with dot above */
1116 return (1);
1117 case 0x0122:
1118 printf ("^H"); /* H with circumflex */
1119 return (1);
1120 case 0x0123:
1121 printf ("^h"); /* h with circumflex */
1122 return (1);
1123
1124 case 0x0128:
1125 printf ("\\~I"); /* I with tilde */
1126 return (1);
1127 case 0x0129:
1128 printf ("\\~{\\i}"); /* i with tilde (dotless) */
1129 return (1);
1130 case 0x012A:
1131 printf ("\\=I"); /* I with macron */
1132 return (1);
1133 case 0x012B:
1134 printf ("\\={\\i}"); /* i with macron (dotless) */
1135 return (1);
1136 case 0x012C:
1137 printf ("\\u{I}"); /* I with breve */
1138 return (1);
1139 case 0x012D:
1140 printf ("\\u{\\i}"); /* i with breve */
1141 return (1);
1142
1143 case 0x0130:
1144 printf ("\\.I"); /* I with dot above */
1145 return (1);
1146 case 0x0131:
1147 printf ("\\i{}"); /* dotless i */
1148 return (1);
1149 case 0x0132:
1150 printf ("IJ"); /* IJ ligature */
1151 return (1);
1152 case 0x0133:
1153 printf ("ij"); /* ij ligature */
1154 return (1);
1155 case 0x0134:
1156 printf ("\\^J"); /* J with circumflex (dotless) */
1157 return (1);
1158 case 0x0135:
1159 printf ("\\^{\\j}"); /* j with circumflex (dotless) */
1160 return (1);
1161 case 0x0136:
1162 printf ("\\c{K}"); /* K with cedilla */
1163 return (1);
1164 case 0x0137:
1165 printf ("\\c{k}"); /* k with cedilla */
1166 return (1);
1167
1168 case 0x0138:
1169 printf ("k"); /* NOTE: Not the correct character (kra), but similar */
1170 return (1);
1171
1172 case 0x0139:
1173 printf ("\\'L"); /* L with acute */
1174 return (1);
1175 case 0x013A:
1176 printf ("\\'l"); /* l with acute */
1177 return (1);
1178 case 0x013B:
1179 printf ("\\c{L}"); /* L with cedilla */
1180 return (1);
1181 case 0x013C:
1182 printf ("\\c{l}"); /* l with cedilla */
1183 return (1);
1184 case 0x013D:
1185 printf ("\\v{L}"); /* L with caron */
1186 return (1);
1187 case 0x013E:
1188 printf ("\\v{l}"); /* l with caron */
1189 return (1);
1190
1191 case 0x0141:
1192 printf ("\\L{}"); /* L with stroke */
1193 return (1);
1194 case 0x0142:
1195 printf ("\\l{}"); /* l with stroke */
1196 return (1);
1197 case 0x0143:
1198 printf ("\\'N"); /* N with acute */
1199 return (1);
1200 case 0x0144:
1201 printf ("\\'n"); /* n with acute */
1202 return (1);
1203 case 0x0145:
1204 printf ("\\c{N}"); /* N with cedilla */
1205 return (1);
1206 case 0x0146:
1207 printf ("\\c{n}"); /* n with cedilla */
1208 return (1);
1209 case 0x0147:
1210 printf ("\\v{N}"); /* N with caron */
1211 return (1);
1212 case 0x0148:
1213 printf ("\\v{n}"); /* n with caron */
1214 return (1);
1215 case 0x0149:
1216 printf ("'n"); /* n preceed with apostroph */
1217 return (1);
1218 case 0x014A:
1219 printf ("\\NG{}"); /* ENG character */
1220 return (1);
1221 case 0x014B:
1222 printf ("\\ng{}"); /* eng character */
1223 return (1);
1224 case 0x014C:
1225 printf ("\\=O"); /* O with macron */
1226 return (1);
1227 case 0x014D:
1228 printf ("\\=o"); /* o with macron */
1229 return (1);
1230 case 0x014E:
1231 printf ("\\u{O}"); /* O with breve */
1232 return (1);
1233 case 0x014F:
1234 printf ("\\u{o}"); /* o with breve */
1235 return (1);
1236 case 0x0150:
1237 printf ("\\H{O}"); /* O with double acute */
1238 return (1);
1239 case 0x0151:
1240 printf ("\\H{o}"); /* o with double acute */
1241 return (1);
1242 case 0x0152:
1243 printf ("\\OE{}"); /* OE ligature */
1244 return (1);
1245 case 0x0153:
1246 printf ("\\oe{}"); /* oe ligature */
1247 return (1);
1248 case 0x0154:
1249 printf ("\\'R"); /* R with acute */
1250 return (1);
1251 case 0x0155:
1252 printf ("\\'r"); /* r with acute */
1253 return (1);
1254 case 0x0156:
1255 printf ("\\c{R}"); /* R with cedilla */
1256 return (1);
1257 case 0x0157:
1258 printf ("\\c{r}"); /* r with cedilla */
1259 return (1);
1260 case 0x0158:
1261 printf ("\\v{R}"); /* R with caron */
1262 return (1);
1263 case 0x0159:
1264 printf ("\\v{r}"); /* r with caron */
1265 return (1);
1266 case 0x015A:
1267 printf ("\\'S"); /* S with acute */
1268 return (1);
1269 case 0x015B:
1270 printf ("\\'s"); /* s with acute */
1271 return (1);
1272 case 0x015C:
1273 printf ("\\^S"); /* S with circumflex */
1274 return (1);
1275 case 0x015D:
1276 printf ("\\^s"); /* c with circumflex */
1277 return (1);
1278 case 0x015E:
1279 printf ("\\c{S}"); /* S with cedilla */
1280 return (1);
1281 case 0x015F:
1282 printf ("\\c{s}"); /* s with cedilla */
1283 return (1);
1284 case 0x0160:
1285 printf ("\\v{S}"); /* S with caron */
1286 return (1);
1287 case 0x0161:
1288 printf ("\\v{s}"); /* s with caron */
1289 return (1);
1290 case 0x0162:
1291 printf ("\\c{T}"); /* T with cedilla */
1292 return (1);
1293 case 0x0163:
1294 printf ("\\c{t}"); /* t with cedilla */
1295 return (1);
1296 case 0x0164:
1297 printf ("\\v{T}"); /* T with caron */
1298 return (1);
1299 case 0x0165:
1300 printf ("\\v{t}"); /* t with caron */
1301 return (1);
1302
1303 case 0x0168:
1304 printf ("\\~U"); /* U with tilde */
1305 return (1);
1306 case 0x0169:
1307 printf ("\\~u"); /* u with tilde */
1308 return (1);
1309 case 0x016A:
1310 printf ("\\=U"); /* U with macron */
1311 return (1);
1312
1313 /* Greek (thanks Petr Vanicek!): */
1314 case 0x0391:
1315 printf ("$A$");
1316 return (1);
1317 case 0x0392:
1318 printf ("$B$");
1319 return (1);
1320 case 0x0393:
1321 printf ("$\\Gamma$");
1322 return (1);
1323 case 0xf044: /* Mac ? */
1324 case 0x2206: /* Mac */
1325 case 0x0394:
1326 printf ("$\\Delta$");
1327 return (1);
1328 case 0x0395:
1329 printf ("$E$");
1330 return (1);
1331 case 0x0396:
1332 printf ("$Z$");
1333 return (1);
1334 case 0x0397:
1335 printf ("$H$");
1336 return (1);
1337 case 0x0398:
1338 printf ("$\\Theta$");
1339 return (1);
1340 case 0x0399:
1341 printf ("$I$");
1342 return (1);
1343 case 0x039a:
1344 printf ("$K$");
1345 return (1);
1346 case 0x039b:
1347 printf ("$\\Lambda$");
1348 return (1);
1349 case 0xf04d: /* Mac? */
1350 case 0x039c:
1351 printf ("$M$");
1352 return (1);
1353 case 0x039d:
1354 printf ("$N$");
1355 return (1);
1356 case 0x039e:
1357 printf ("$\\Xi$");
1358 return (1);
1359 case 0x039f:
1360 printf ("$O$"); /* Omicron */
1361 return (1);
1362 case 0x03a0:
1363 printf ("$\\Pi$");
1364 return (1);
1365 case 0x03a1:
1366 printf ("$R$");
1367 return (1);
1368
1369 case 0x03a3:
1370 printf ("$\\Sigma$");
1371 return (1);
1372 case 0x03a4:
1373 printf ("$T$");
1374 return (1);
1375 case 0x03a5:
1376 printf ("$Y$");
1377 return (1);
1378 case 0x03a6:
1379 printf ("$\\Phi$");
1380 return (1);
1381 case 0x03a7:
1382 printf ("$X$"); /* Chi */
1383 return (1);
1384 case 0x03a8:
1385 printf ("$\\Psi$");
1386 return (1);
1387 case 0x2126: /* Mac */
1388 case 0x03a9:
1389 printf ("$\\Omega$");
1390 return (1);
1391
1392 /* ...and lower case: */
1393
1394 case 0x03b1:
1395 printf ("$\\alpha$");
1396 return (1);
1397 case 0x03b2:
1398 printf ("$\\beta$");
1399 return (1);
1400 case 0xf067: /* Mac */
1401 case 0x03b3:
1402 printf ("$\\gamma$");
1403 return (1);
1404 case 0xf064: /* Mac */
1405 case 0x03b4:
1406 printf ("$\\delta$");
1407 return (1);
1408 case 0x03b5:
1409 printf ("$\\epsilon$");
1410 return (1);
1411 case 0xf04e: /* Mac? variant? */
1412 case 0xf07a: /* Mac? */
1413 case 0x03b6:
1414 printf ("$\\zeta$");
1415 return (1);
1416 case 0x03b7:
1417 printf ("$\\eta$");
1418 return (1);
1419 case 0x03b8:
1420 printf ("$\\theta$");
1421 return (1);
1422 case 0x03b9:
1423 printf ("$\\iota$");
1424 return (1);
1425 case 0x03ba:
1426 printf ("$\\kappa$");
1427 return (1);
1428 case 0xf06c: /* Mac? */
1429 case 0x03bb:
1430 printf ("$\\lambda$");
1431 return (1);
1432 case 0x03bc:
1433 printf ("$\\mu$");
1434 return (1);
1435 case 0x03bd:
1436 printf ("$\\nu$");
1437 return (1);
1438 case 0x03be:
1439 printf ("$\\xi$");
1440 return (1);
1441 case 0x03bf:
1442 printf ("$o$"); /* omicron */
1443 return (1);
1444 case 0x03c0:
1445 printf ("$\\pi$");
1446 return (1);
1447 case 0xf072: /* Mac */
1448 printf ("$\\varrho$");
1449 return (1);
1450 case 0x03c1:
1451 printf ("$\\rho$");
1452 return (1);
1453 case 0xf073: /* Mac */
1454 case 0x03c3:
1455 printf ("$\\sigma$");
1456 return (1);
1457 case 0x03c4:
1458 printf ("$\\tau$");
1459 return (1);
1460 case 0x03c5:
1461 printf ("$\\upsilon$");
1462 return (1);
1463 case 0x03c6:
1464 printf ("$\\phi$");
1465 return (1);
1466 case 0x03c7:
1467 printf ("$\\chi$");
1468 return (1);
1469 case 0x03c8:
1470 printf ("$\\psi$");
1471 return (1);
1472 case 0x03c9:
1473 printf ("$\\omega$");
1474 return (1);
1475 case 0xf06a: /* Mac? */
1476 case 0x03d5:
1477 printf ("$\\varphi$"); /* ? */
1478 return (1);
1479
1480 /* More math, typical inline: */
1481 case 0x2111:
1482 printf ("$\\Im$");
1483 return (1);
1484 case 0x2118:
1485 printf ("$\\wp$"); /* Weierstrass p */
1486 return (1);
1487 case 0x211c:
1488 printf ("$\\Re$");
1489 return (1);
1490 case 0x2135:
1491 printf ("$\\aleph$");
1492 return (1);
1493
1494 case 0x2190:
1495 printf ("$\\leftarrow$");
1496 return (1);
1497 case 0x2191:
1498 printf ("$\\uparrow$");
1499 return (1);
1500 case 0xf0ae: /* Mac */
1501 case 0x2192:
1502 printf ("$\\rightarrow$");
1503 return (1);
1504 case 0x2193:
1505 printf ("$\\downarrow$");
1506 return (1);
1507 case 0x21d0:
1508 printf ("$\\Leftarrow$");
1509 return (1);
1510 case 0x21d1:
1511 printf ("$\\Uparrow$");
1512 return (1);
1513 case 0x21d2:
1514 printf ("$\\Rightarrow$");
1515 return (1);
1516 case 0x21d3:
1517 printf ("$\\Downarrow$");
1518 return (1);
1519 case 0x21d4:
1520 printf ("$\\Leftrightarrow$");
1521 return (1);
1522
1523 case 0x2200:
1524 printf ("$\\forall$");
1525 return (1);
1526 case 0xf0b6: /* Mac */
1527 case 0x2202:
1528 printf ("$\\partial$");
1529 return (1);
1530 case 0x2203:
1531 printf ("$\\exists$");
1532 return (1);
1533 case 0x2205:
1534 printf ("$\\emptyset$");
1535 return (1);
1536 case 0x2207:
1537 printf ("$\\nabla$");
1538 return (1);
1539 case 0x2208:
1540 printf ("$\\in$"); /* element of */
1541 return (1);
1542 case 0x2209:
1543 printf ("$\\notin$"); /* not an element of */
1544 return (1);
1545 case 0x220b:
1546 printf ("$\\ni$"); /* contains as member */
1547 return (1);
1548 case 0x221a:
1549 printf ("$\\surd$"); /* sq root */
1550 return (1);
1551 case 0x2212:
1552 printf ("$-$"); /* minus */
1553 return (1);
1554 case 0x221d:
1555 printf ("$\\propto$");
1556 return (1);
1557 case 0x221e:
1558 printf ("$\\infty$");
1559 return (1);
1560 case 0x2220:
1561 printf ("$\\angle$");
1562 return (1);
1563 case 0x2227:
1564 printf ("$\\land$"); /* logical and */
1565 return (1);
1566 case 0x2228:
1567 printf ("$\\lor$"); /* logical or */
1568 return (1);
1569 case 0x2229:
1570 printf ("$\\cap$"); /* intersection */
1571 return (1);
1572 case 0x222a:
1573 printf ("$\\cup$"); /* union */
1574 return (1);
1575 case 0x223c:
1576 printf ("$\\sim$"); /* similar to */
1577 return (1);
1578 case 0x2248:
1579 printf ("$\\approx$");
1580 return (1);
1581 case 0x2261:
1582 printf ("$\\equiv$");
1583 return (1);
1584 case 0x2260:
1585 printf ("$\\neq$");
1586 return (1);
1587 case 0x2264:
1588 printf ("$\\leq$");
1589 return (1);
1590 case 0xf0b3: /* Mac? */
1591 case 0x2265:
1592 printf ("$\\geq$");
1593 return (1);
1594 case 0x2282:
1595 printf ("$\\subset$");
1596 return (1);
1597 case 0x2283:
1598 printf ("$\\supset$");
1599 return (1);
1600 case 0x2284:
1601 printf ("$\\notsubset$");
1602 return (1);
1603 case 0x2286:
1604 printf ("$\\subseteq$");
1605 return (1);
1606 case 0x2287:
1607 printf ("$\\supseteq$");
1608 return (1);
1609 case 0x2295:
1610 printf ("$\\oplus$"); /* circled plus */
1611 return (1);
1612 case 0x2297:
1613 printf ("$\\otimes$");
1614 return (1);
1615 case 0x22a5:
1616 printf ("$\\perp$"); /* perpendicular */
1617 return (1);
1618
1619
1620
1621
1622 case 0x2660:
1623 printf ("$\\spadesuit$");
1624 return (1);
1625 case 0x2663:
1626 printf ("$\\clubsuit$");
1627 return (1);
1628 case 0x2665:
1629 printf ("$\\heartsuit$");
1630 return (1);
1631 case 0x2666:
1632 printf ("$\\diamondsuit$");
1633 return (1);
1634
1635
1636 case 0x01C7:
1637 printf ("LJ"); /* the LJ letter */
1638 return (1);
1639 case 0x01C8:
1640 printf ("Lj"); /* the Lj letter */
1641 return (1);
1642 case 0x01C9:
1643 printf ("lj"); /* the lj letter */
1644 return (1);
1645 case 0x01CA:
1646 printf ("NJ"); /* the NJ letter */
1647 return (1);
1648 case 0x01CB:
1649 printf ("Nj"); /* the Nj letter */
1650 return (1);
1651 case 0x01CC:
1652 printf ("nj"); /* the nj letter */
1653 return (1);
1654 case 0x01CD:
1655 printf ("\\v{A}"); /* A with caron */
1656 return (1);
1657 case 0x01CE:
1658 printf ("\\v{a}"); /* a with caron */
1659 return (1);
1660 case 0x01CF:
1661 printf ("\\v{I}"); /* I with caron */
1662 return (1);
1663 case 0x01D0:
1664 printf ("\\v{\\i}"); /* i with caron (dotless) */
1665 return (1);
1666 case 0x01D1:
1667 printf ("\\v{O}"); /* O with caron */
1668 return (1);
1669 case 0x01D2:
1670 printf ("\\v{o}"); /* o with caron */
1671 return (1);
1672 case 0x01D3:
1673 printf ("\\v{U}"); /* U with caron */
1674 return (1);
1675 case 0x01D4:
1676 printf ("\\v{u}"); /* u with caron */
1677 return (1);
1678
1679 case 0x01E6:
1680 printf ("\\v{G}"); /* G with caron */
1681 return (1);
1682 case 0x01E7:
1683 printf ("\\v{g}"); /* g with caron */
1684 return (1);
1685 case 0x01E8:
1686 printf ("\\v{K}"); /* K with caron */
1687 return (1);
1688 case 0x01E9:
1689 printf ("\\v{k}"); /* k with caron */
1690 return (1);
1691
1692
1693 case 0x01F0:
1694 printf ("\\v{\\j}"); /* j with caron (dotless) */
1695 return (1);
1696 case 0x01F1:
1697 printf ("DZ"); /* the DZ letter */
1698 return (1);
1699 case 0x01F2:
1700 printf ("Dz"); /* the Dz letter */
1701 return (1);
1702 case 0x01F3:
1703 printf ("dz"); /* the dz letter */
1704 return (1);
1705 case 0x01F4:
1706 printf ("\\'G"); /* G with acute */
1707 return (1);
1708 case 0x01F5:
1709 printf ("\\'g"); /* g with acute */
1710 return (1);
1711
1712 case 0x01FA:
1713 printf ("\\'{\\AA}"); /* � with acute */
1714 return (1);
1715 case 0x01FB:
1716 printf ("\\'{\\aa}"); /* � with acute */
1717 return (1);
1718 case 0x01FC:
1719 printf ("\\'{\\AE}"); /* � with acute */
1720 return (1);
1721 case 0x01FD:
1722 printf ("\\'{\\ae}"); /* � with acute */
1723 return (1);
1724 case 0x01FE:
1725 printf ("\\'{\\O}"); /* � with acute */
1726 return (1);
1727 case 0x01FF:
1728 printf ("\\'{\\o}"); /* � with acute */
1729 return (1);
1730
1731 case 0x2010:
1732 printf ("-"); /* hyphen */
1733 return (1);
1734 case 0x2011:
1735 printf ("-"); /* non-breaking hyphen (is there a way to get this in LaTeX?) */
1736 return (1);
1737 case 0x2012:
1738 printf ("--"); /* figure dash (similar to en-dash) */
1739 return (1);
1740 case 0x2013:
1741 /*
1742 soft-hyphen? Or en-dash? I find that making
1743 this a soft-hyphen works very well, but makes
1744 the occasional "hard" word-connection hyphen
1745 (like the "-" in roller-coaster) disappear.
1746 (Are these actually en-dashes? Dunno.)
1747 How does MS Word distinguish between the 0x2013's
1748 that signify soft hyphens and those that signify
1749 word-connection hyphens? wvware should be able
1750 to as well. -- MV 8.7.2000
1751
1752 U+2013 is the en-dash character and not a soft
1753 hyphen. Soft hyphen is U+00AD. Changing to
1754 "--". -- 2000-08-11 huftis@bigfoot.com
1755 */
1756 printf ("--");
1757 return (1);
1758
1759 case 0x016B:
1760 printf ("\\=u"); /* u with macron */
1761 return (1);
1762 case 0x016C:
1763 printf ("\\u{U}"); /* U with breve */
1764 return (1);
1765 case 0x016D:
1766 printf ("\\u{u}"); /* u with breve */
1767 return (1);
1768 case 0x016E:
1769 printf ("\\r{U}"); /* U with ring above */
1770 return (1);
1771 case 0x016F:
1772 printf ("\\r{u}"); /* u with ring above */
1773 return (1);
1774 case 0x0170:
1775 printf ("\\H{U}"); /* U with double acute */
1776 return (1);
1777 case 0x0171:
1778 printf ("\\H{u}"); /* u with double acute */
1779 return (1);
1780
1781 case 0x0174:
1782 printf ("\\^W"); /* W with circumflex */
1783 return (1);
1784 case 0x0175:
1785 printf ("\\^w"); /* w with circumflex */
1786 return (1);
1787 case 0x0176:
1788 printf ("\\^Y"); /* Y with circumflex */
1789 return (1);
1790 case 0x0177:
1791 printf ("\\^y"); /* y with circumflex */
1792 return (1);
1793 case 0x0178:
1794 printf ("\\\"Y"); /* Y with diaeresis */
1795 return (1);
1796 case 0x0179:
1797 printf ("\\'Z"); /* Z with acute */
1798 return (1);
1799 case 0x017A:
1800 printf ("\\'z"); /* z with acute */
1801 return (1);
1802 case 0x017B:
1803 printf ("\\.Z"); /* Z with dot above */
1804 return (1);
1805 case 0x017C:
1806 printf ("\\.z"); /* z with dot above */
1807 return (1);
1808 case 0x017D:
1809 printf ("\\v{Z}"); /* Z with caron */
1810 return (1);
1811 case 0x017E:
1812 printf ("\\v{z}"); /* z with caron */
1813 return (1);
1814
1815 /* Windows specials (MV 4.7.2000). More could be added.
1816 See http://www.hut.fi/u/jkorpela/www/windows-chars.html
1817 */
1818
1819 case 0x2014:
1820 printf ("---"); /* em-dash */
1821 return (1);
1822 case 0x2018:
1823 printf ("`"); /* left single quote, Win */
1824 return (1);
1825 case 0x2019:
1826 printf ("'"); /* Right single quote, Win */
1827 return (1);
1828 case 0x201A:
1829 printf ("\\quotesinglbase{}"); /* single low 99 quotation mark */
1830 return (1);
1831 case 0x201C:
1832 printf ("``"); /* inverted double quotation mark */
1833 return (1);
1834 case 0x201D:
1835 printf ("''"); /* double q.m. */
1836 return (1);
1837 case 0x201E:
1838 printf ("\\quotedblbase{}"); /* double low 99 quotation mark */
1839 return (1);
1840 case 0x2020:
1841 printf ("\\dag{}"); /* dagger */
1842 return (1);
1843 case 0x2021:
1844 printf ("\\ddag{}"); /* double dagger */
1845 return (1);
1846 case 0x25cf: /* FilledCircle */
1847 case 0x2022:
1848 printf ("$\\bullet$"); /* bullet */
1849 return (1);
1850 case 0x2023:
1851 printf ("$\\bullet$"); /* NOTE: Not a real triangular bullet */
1852 return (1);
1853
1854 case 0x2024:
1855 printf ("."); /* One dot leader (for use in TOCs) */
1856 return (1);
1857 case 0x2025:
1858 printf (".."); /* Two dot leader (for use in TOCs) */
1859 return (1);
1860 case 0x2026:
1861 printf ("\\ldots"); /* ellipsis */
1862 return (1);
1863
1864 case 0x2039:
1865 printf ("\\guilsinglleft{}"); /* single left angle quotation mark */
1866 return (1);
1867 case 0x203A:
1868 printf ("\\guilsinglright{}"); /* single right angle quotation mark */
1869 return (1);
1870
1871 case 0x203C:
1872 printf ("!!"); /* double exclamation mark */
1873 return (1);
1874
1875 case 0x2215:
1876 printf ("$/$"); /* Division slash */
1877 return (1);
1878
1879 case 0x2030:
1880 printf ("o/oo");
1881 return (1);
1882
1883 case 0x20ac:
1884 printf ("\\euro");
1885 /* No known implementation ;-)
1886
1887 Shouldn't we use the package 'eurofont'?
1888 -- 2000-08-15 huftis@bigfoot.com
1889 */
1890 return (1);
1891
1892 case 0x2160:
1893 printf ("I"); /* Roman numeral I */
1894 return (1);
1895 case 0x2161:
1896 printf ("II"); /* Roman numeral II */
1897 return (1);
1898 case 0x2162:
1899 printf ("III"); /* Roman numeral III */
1900 return (1);
1901 case 0x2163:
1902 printf ("IV"); /* Roman numeral IV */
1903 return (1);
1904 case 0x2164:
1905 printf ("V"); /* Roman numeral V */
1906 return (1);
1907 case 0x2165:
1908 printf ("VI"); /* Roman numeral VI */
1909 return (1);
1910 case 0x2166:
1911 printf ("VII"); /* Roman numeral VII */
1912 return (1);
1913 case 0x2167:
1914 printf ("VIII"); /* Roman numeral VIII */
1915 return (1);
1916 case 0x2168:
1917 printf ("IX"); /* Roman numeral IX */
1918 return (1);
1919 case 0x2169:
1920 printf ("X"); /* Roman numeral X */
1921 return (1);
1922 case 0x216A:
1923 printf ("XI"); /* Roman numeral XI */
1924 return (1);
1925 case 0x216B:
1926 printf ("XII"); /* Roman numeral XII */
1927 return (1);
1928 case 0x216C:
1929 printf ("L"); /* Roman numeral L */
1930 return (1);
1931 case 0x216D:
1932 printf ("C"); /* Roman numeral C */
1933 return (1);
1934 case 0x216E:
1935 printf ("D"); /* Roman numeral D */
1936 return (1);
1937 case 0x216F:
1938 printf ("M"); /* Roman numeral M */
1939 return (1);
1940 case 0x2170:
1941 printf ("i"); /* Roman numeral i */
1942 return (1);
1943 case 0x2171:
1944 printf ("ii"); /* Roman numeral ii */
1945 return (1);
1946 case 0x2172:
1947 printf ("iii"); /* Roman numeral iii */
1948 return (1);
1949 case 0x2173:
1950 printf ("iv"); /* Roman numeral iv */
1951 return (1);
1952 case 0x2174:
1953 printf ("v"); /* Roman numeral v */
1954 return (1);
1955 case 0x2175:
1956 printf ("vi"); /* Roman numeral vi */
1957 return (1);
1958 case 0x2176:
1959 printf ("vii"); /* Roman numeral vii */
1960 return (1);
1961 case 0x2177:
1962 printf ("viii"); /* Roman numeral viii */
1963 return (1);
1964 case 0x2178:
1965 printf ("ix"); /* Roman numeral ix */
1966 return (1);
1967 case 0x2179:
1968 printf ("x"); /* Roman numeral x */
1969 return (1);
1970 case 0x217A:
1971 printf ("xi"); /* Roman numeral xi */
1972 return (1);
1973 case 0x217B:
1974 printf ("xiii"); /* Roman numeral xii */
1975 return (1);
1976 case 0x217C:
1977 printf ("l"); /* Roman numeral l */
1978 return (1);
1979 case 0x217D:
1980 printf ("c"); /* Roman numeral c */
1981 return (1);
1982 case 0x217E:
1983 printf ("d"); /* Roman numeral d */
1984 return (1);
1985 case 0x217F:
1986 printf ("m"); /* Roman numeral m */
1987 return (1);
1988
1989 }
1990 /* Debugging aid: */
1991 if (char16 >= 0x80)
1992 {
1993 printf ("[%x]", char16);
1994 return (1);
1995 }
1996 return (0);
1997 }
1998
1999 int
wvConvertUnicodeToHtml(U16 char16)2000 wvConvertUnicodeToHtml (U16 char16)
2001 {
2002 switch (char16)
2003 {
2004 case 11:
2005 printf ("<br>");
2006 return (1);
2007 case 31: /* non-required hyphen */
2008 printf("­"); /*vladimir@lukianov.name HTML 4.01 spec*/
2009 return (1);
2010 case 30:
2011 case 45:
2012 case 0x2013:
2013 printf ("-"); /* en-dash */
2014 return (1);
2015 case 12:
2016 case 13:
2017 case 14:
2018 case 7:
2019 return (1);
2020 case 34:
2021 printf (""");
2022 return (1);
2023 case 38:
2024 printf ("&");
2025 return (1);
2026 case 60:
2027 printf ("<");
2028 return (1);
2029 case 62:
2030 printf (">");
2031 return (1);
2032 /*
2033 german characters, im assured that this is the right way to handle them
2034 by Markus Schulte <markus@dom.de>
2035
2036 As the output encoding for HTML was chosen as UTF-8,
2037 we don't need Ä etc. etc. I removed all but sz
2038 -- MV 6.4.2000
2039 */
2040
2041 case 0xdf:
2042 printf ("ß");
2043 return (1);
2044 /* end german characters */
2045 case 0x2026:
2046 #if 0
2047 /*
2048 this just looks awful in netscape 4.5, so im going to do a very foolish
2049 thing and just put ... instead of this
2050 */
2051 printf ("…");
2052 /*is there a proper html name for ... &ellipse;? Yes, … -- MV */
2053 #endif
2054 printf ("…");
2055 return (1);
2056 case 0x2019:
2057 printf ("'");
2058 return (1);
2059 case 0x2215:
2060 printf ("/");
2061 return (1);
2062 case 0xF8E7: /* without this, things should work in theory, but not for me */
2063 printf ("_");
2064 return (1);
2065 case 0x2018:
2066 printf ("`");
2067 return (1);
2068
2069 /* Windows specials (MV): */
2070 case 0x0160:
2071 printf ("Š");
2072 return (1);
2073 case 0x0161:
2074 printf ("š");
2075 return (1);
2076 case 0x2014:
2077 printf ("—");
2078 return (1);
2079 case 0x201c:
2080 printf ("“"); /* inverted double quotation mark */
2081 return (1);
2082 case 0x201d:
2083 printf ("”"); /* double q.m. */
2084 return (1);
2085 case 0x201e:
2086 printf ("„"); /* below double q.m. */
2087 return (1);
2088 case 0x2020:
2089 printf ("†");
2090 return (1);
2091 case 0x2021:
2092 printf ("‡");
2093 return (1);
2094 case 0x2022:
2095 printf ("•");
2096 return (1);
2097 case 0x0152:
2098 printf ("Œ");
2099 return (1);
2100 case 0x0153:
2101 printf ("œ");
2102 return (1);
2103 case 0x0178:
2104 printf ("Ÿ");
2105 return (1);
2106 case 0x2030:
2107 printf ("‰");
2108 return (1);
2109 case 0x20ac:
2110 printf ("€");
2111 return (1);
2112
2113 /* Mac specials (MV): */
2114
2115 case 0xf020:
2116 printf (" ");
2117 return (1);
2118 case 0xf02c:
2119 printf (",");
2120 return (1);
2121 case 0xf028:
2122 printf ("(");
2123 return (1);
2124
2125 case 0xf03e:
2126 printf (">");
2127 return (1);
2128 case 0xf067:
2129 printf ("γ");
2130 return (1);
2131 case 0xf064:
2132 printf ("δ");
2133 return (1);
2134 case 0xf072:
2135 printf ("ρ");
2136 return (1);
2137 case 0xf073:
2138 printf ("σ");
2139 return (1);
2140 case 0xf0ae:
2141 printf ("→"); /* right arrow */
2142 return (1);
2143 case 0xf0b6:
2144 printf ("∂"); /* partial deriv. */
2145 return (1);
2146 case 0xf0b3:
2147 printf ("≥");
2148 return (1);
2149
2150 }
2151 /* Debugging aid: */
2152 /* if (char16 >= 0x100) printf("[%x]", char16); */
2153 return (0);
2154 }
2155
2156
2157
2158 int
wvConvertUnicodeToXml(U16 char16)2159 wvConvertUnicodeToXml (U16 char16)
2160 {
2161 switch (char16)
2162 {
2163 case 11:
2164 printf ("<br />");
2165 return (1);
2166
2167 case 30:
2168 case 31:
2169 case 12:
2170 case 13:
2171 case 14:
2172 case 7:
2173 return (1);
2174
2175 /* Much simpler here, because XML supports only a few entities */
2176 case 34:
2177 printf (""");
2178 return (1);
2179 case 38:
2180 printf ("&");
2181 return (1);
2182 case 39:
2183 printf ("'");
2184 return (1);
2185 case 60:
2186 printf ("<");
2187 return (1);
2188 case 62:
2189 printf (">");
2190 return (1);
2191 }
2192
2193 return (0);
2194 }
2195
str_copy(char * d,size_t n,char * s)2196 char *str_copy(char *d, size_t n, char *s)
2197 {
2198 strncpy(d, s, n);
2199 d[n-1] = 0;
2200 return d;
2201 }
2202
str_append(char * d,size_t n,char * s)2203 char *str_append(char *d, size_t n, char *s)
2204 {
2205 int max = n - strlen(d) - 1;
2206 strncat(d, s, max);
2207 d[n-1] = 0;
2208 return d;
2209 }
2210
2211 #define BUF_COPY(d,s) str_copy(d,sizeof(d),s)
2212
2213 char *
wvConvertStylename(char * stylename,char * outputtype)2214 wvConvertStylename(char *stylename, char *outputtype)
2215 {
2216 static char cached_outputtype[36];
2217 static GIConv g_iconv_handle = (GIConv)-1;
2218 /**FIXME: 100 is just the size of stylename[] from wv.h**/
2219 static char buffer[100];
2220 char *ibuf, *obuf;
2221 size_t ibuflen, obuflen, len;
2222
2223 /* Destroy */
2224 if(!outputtype)
2225 {
2226 if ((g_iconv_handle != (GIConv)-1))
2227 g_iconv_close(g_iconv_handle);
2228 return NULL;
2229 }
2230
2231 /* Initialize */
2232 if(!g_iconv_handle || strcmp(cached_outputtype, outputtype))
2233 {
2234 if ((g_iconv_handle != (GIConv)-1))
2235 g_iconv_close(g_iconv_handle);
2236
2237 /**FIXME: don�t know if ISO-8859-1 is really the correct
2238 **charset for style names with eg umlauts. **/
2239 g_iconv_handle = g_iconv_open(outputtype, "ISO-8859-1");
2240 if(g_iconv_handle == (GIConv)-1)
2241 {
2242 wvError(("g_iconv_open fail: %d, cannot convert %s to %s\n",
2243 errno, "ISO-8859-1", outputtype));
2244 return stylename;
2245 }
2246
2247 BUF_COPY(cached_outputtype, outputtype);
2248 }
2249
2250 /* Convert */
2251 ibuf = stylename;
2252 ibuflen = strlen(stylename);
2253 obuf = buffer;
2254 obuflen = sizeof(buffer) - 1;
2255 len = g_iconv (g_iconv_handle, &ibuf, &ibuflen, &obuf, &obuflen);
2256 *obuf = 0;
2257 if(len == -1)
2258 {
2259 wvError(("wvConfig.c: can�t iconv()\n"));
2260 return stylename;
2261 }
2262
2263 return buffer;
2264 }
2265