1 /*
2 * $LynxId: LYCharSets.c,v 1.71 2021/06/29 22:01:12 tom Exp $
3 */
4 #include <HTUtils.h>
5 #include <HTCJK.h>
6 #include <HTMLDTD.h>
7
8 #include <LYGlobalDefs.h>
9 #include <UCMap.h>
10 #include <UCdomap.h>
11 #include <UCDefs.h>
12 #include <LYCharSets.h>
13 #include <GridText.h>
14 #include <LYCurses.h>
15 #include <LYStrings.h>
16
17 #include <LYLeaks.h>
18
19 HTkcode kanji_code = NOKANJI;
20 BOOLEAN LYHaveCJKCharacterSet = FALSE;
21 BOOLEAN DisplayCharsetMatchLocale = TRUE;
22 BOOL force_old_UCLYhndl_on_reload = FALSE;
23 int forced_UCLYhdnl;
24 int LYNumCharsets = 0; /* Will be initialized later by UC_Register. */
25 int current_char_set = -1; /* will be initialized later in LYMain.c */
26 int linedrawing_char_set = -1;
27 STRING2PTR p_entity_values = NULL; /* Pointer, for HTML_put_entity() */
28
29 /* obsolete and probably not used(???) */
30 /* will be initialized in HTMLUseCharacterSet */
31 #ifdef USE_CHARSET_CHOICE
32 charset_subset_t charset_subsets[MAXCHARSETS];
33 BOOL custom_display_charset = FALSE;
34 BOOL custom_assumed_doc_charset = FALSE;
35
36 #ifndef ALL_CHARSETS_IN_O_MENU_SCREEN
37 int display_charset_map[MAXCHARSETS];
38 int assumed_doc_charset_map[MAXCHARSETS];
39
40 const char *display_charset_choices[MAXCHARSETS + 1];
41 const char *assumed_charset_choices[MAXCHARSETS + 1];
42 int displayed_display_charset_idx;
43 #endif
44 #endif /* USE_CHARSET_CHOICE */
45
46 /*
47 * New character sets now declared with UCInit() in UCdomap.c
48 *
49 * INSTRUCTIONS for adding new character sets which do not have
50 * Unicode tables now in UCdomap.h
51 *
52 *
53 * [We hope you need not correct/add old-style mapping below as in ISO_LATIN1[]
54 * or SevenBitApproximations[] any more - it works now via new chartrans
55 * mechanism, but kept for compatibility only: we should cleanup the stuff,
56 * but this is not so easy...]
57 *
58 * Currently we only declare some charset's properties here (such as MIME
59 * names, etc.), it does not include real mapping.
60 *
61 * There is a place marked "Add your new character sets HERE" in this file.
62 * Make up a character set and add it in the same style as the ISO_LATIN1 set
63 * below, giving it a unique name.
64 *
65 * Add the name of the set to LYCharSets. Similarly add the appropriate
66 * information to the tables below: LYchar_set_names, LYCharSet_UC,
67 * LYlowest_eightbit. These 4 tables all MUST have the same order. (And this
68 * is the order you will see in Lynx Options Menu, which is why few
69 * unicode-based charsets are listed here).
70 *
71 */
72
73 /* Entity values -- for ISO Latin 1 local representation
74 *
75 * This MUST match exactly the table referred to in the DTD!
76 */
77 static const char *ISO_Latin1[] =
78 {
79 "\306", /* capital AE diphthong (ligature) (Æ) - AElig */
80 "\301", /* capital A, acute accent (Á) - Aacute */
81 "\302", /* capital A, circumflex accent (Â) - Acirc */
82 "\300", /* capital A, grave accent (À) - Agrave */
83 "\305", /* capital A, ring - Aring (Å) */
84 "\303", /* capital A, tilde - Atilde (Ã) */
85 "\304", /* capital A, dieresis or umlaut mark (Ä) - Auml */
86 "\307", /* capital C, cedilla - Ccedil (Ç) */
87 "\320", /* capital Eth or D with stroke (Ð) - Dstrok */
88 "\320", /* capital Eth, Icelandic (Ð) - ETH */
89 "\311", /* capital E, acute accent (É) - Eacute */
90 "\312", /* capital E, circumflex accent (Ê) - Ecirc */
91 "\310", /* capital E, grave accent (È) - Egrave */
92 "\313", /* capital E, dieresis or umlaut mark (Ë) - Euml */
93 "\315", /* capital I, acute accent (Í) - Iacute */
94 "\316", /* capital I, circumflex accent (Î) - Icirc */
95 "\314", /* capital I, grave accent (Ì) - Igrave */
96 "\317", /* capital I, dieresis or umlaut mark (Ï) - Iuml */
97 "\321", /* capital N, tilde (Ñ) - Ntilde */
98 "\323", /* capital O, acute accent (Ó) - Oacute */
99 "\324", /* capital O, circumflex accent (Ô) - Ocirc */
100 "\322", /* capital O, grave accent (Ò) - Ograve */
101 "\330", /* capital O, slash (Ø) - Oslash */
102 "\325", /* capital O, tilde (Õ) - Otilde */
103 "\326", /* capital O, dieresis or umlaut mark (Ö) - Ouml */
104 "\336", /* capital THORN, Icelandic (Þ) - THORN */
105 "\332", /* capital U, acute accent (Ú) - Uacute */
106 "\333", /* capital U, circumflex accent (Û) - Ucirc */
107 "\331", /* capital U, grave accent (Ù) - Ugrave */
108 "\334", /* capital U, dieresis or umlaut mark (Ü) - Uuml */
109 "\335", /* capital Y, acute accent (Ý) - Yacute */
110 "\341", /* small a, acute accent (á) - aacute */
111 "\342", /* small a, circumflex accent (â) - acirc */
112 "\264", /* spacing acute (´) - acute */
113 "\346", /* small ae diphthong (ligature) (æ) - aelig */
114 "\340", /* small a, grave accent (à) - agrave */
115 "\046", /* ampersand (&) - amp */
116 "\345", /* small a, ring (å) - aring */
117 "\343", /* small a, tilde (ã) - atilde */
118 "\344", /* small a, dieresis or umlaut mark (ä) - auml */
119 "\246", /* broken vertical bar (¦) - brkbar */
120 "\246", /* broken vertical bar (¦) - brvbar */
121 "\347", /* small c, cedilla (ç) - ccedil */
122 "\270", /* spacing cedilla (¸) - cedil */
123 "\242", /* cent sign (¢) - cent */
124 "\251", /* copyright sign (©) - copy */
125 "\244", /* currency sign (¤) - curren */
126 "\260", /* degree sign (°) - deg */
127 "\250", /* spacing dieresis (¨) - die */
128 "\367", /* division sign (÷) - divide */
129 "\351", /* small e, acute accent (é) - eacute */
130 "\352", /* small e, circumflex accent (ê) - ecirc */
131 "\350", /* small e, grave accent (è) - egrave */
132 "-", /* dash the width of emsp - emdash */
133 "\002", /* emsp, em space - not collapsed NEVER CHANGE THIS - emsp */
134 "-", /* dash the width of ensp - endash */
135 "\002", /* ensp, en space - not collapsed NEVER CHANGE THIS - ensp */
136 "\360", /* small eth, Icelandic (ð) - eth */
137 "\353", /* small e, dieresis or umlaut mark (ë) - euml */
138 "\275", /* fraction 1/2 (½) - frac12 */
139 "\274", /* fraction 1/4 (¼) - frac14 */
140 "\276", /* fraction 3/4 (¾) - frac34 */
141 "\076", /* greater than (>) - gt */
142 "\257", /* spacing macron (¯) - hibar */
143 "\355", /* small i, acute accent (í) - iacute */
144 "\356", /* small i, circumflex accent (î) - icirc */
145 "\241", /* inverted exclamation mark (¡) - iexcl */
146 "\354", /* small i, grave accent (ì) - igrave */
147 "\277", /* inverted question mark (¿) - iquest */
148 "\357", /* small i, dieresis or umlaut mark (ï) - iuml */
149 "\253", /* angle quotation mark, left («) - laquo */
150 "\074", /* less than (<) - lt */
151 "\257", /* spacing macron (¯) - macr */
152 "-", /* dash the width of emsp - mdash */
153 "\265", /* micro sign (µ) - micro */
154 "\267", /* middle dot (·) - middot */
155 "\001", /* nbsp non-breaking space NEVER CHANGE THIS - nbsp */
156 "-", /* dash the width of ensp - ndash */
157 "\254", /* negation sign (¬) - not */
158 "\361", /* small n, tilde (ñ) - ntilde */
159 "\363", /* small o, acute accent (ó) - oacute */
160 "\364", /* small o, circumflex accent (ô) - ocirc */
161 "\362", /* small o, grave accent (ò) - ograve */
162 "\252", /* feminine ordinal indicator (ª) - ordf */
163 "\272", /* masculine ordinal indicator (º) - ordm */
164 "\370", /* small o, slash (ø) - oslash */
165 "\365", /* small o, tilde (õ) - otilde */
166 "\366", /* small o, dieresis or umlaut mark (ö) - ouml */
167 "\266", /* paragraph sign (¶) - para */
168 "\261", /* plus-or-minus sign (±) - plusmn */
169 "\243", /* pound sign (£) - pound */
170 "\042", /* quote '"' (") - quot */
171 "\273", /* angle quotation mark, right (») - raquo */
172 "\256", /* circled R registered sign (®) - reg */
173 "\247", /* section sign (§) - sect */
174 "\007", /* soft hyphen (­) NEVER CHANGE THIS - shy */
175 "\271", /* superscript 1 (¹) - sup1 */
176 "\262", /* superscript 2 (²) - sup2 */
177 "\263", /* superscript 3 (³) - sup3 */
178 "\337", /* small sharp s, German (sz ligature) (ß) - szlig */
179 "\002", /* thin space - not collapsed NEVER CHANGE THIS - thinsp */
180 "\376", /* small thorn, Icelandic (þ) - thorn */
181 "\327", /* multiplication sign (×) - times */
182 "(TM)", /* circled TM trade mark sign (™) - trade */
183 "\372", /* small u, acute accent (ú) - uacute */
184 "\373", /* small u, circumflex accent (û) - ucirc */
185 "\371", /* small u, grave accent (ù) - ugrave */
186 "\250", /* spacing dieresis (¨) - uml */
187 "\374", /* small u, dieresis or umlaut mark (ü) - uuml */
188 "\375", /* small y, acute accent (ý) - yacute */
189 "\245", /* yen sign (¥) - yen */
190 "\377", /* small y, dieresis or umlaut mark (ÿ) - yuml */
191 };
192
193 /* Entity values -- 7 bit character approximations
194 *
195 * This MUST match exactly the table referred to in the DTD!
196 */
197 const char *SevenBitApproximations[] =
198 {
199 "AE", /* capital AE diphthong (ligature) (Æ) - AElig */
200 "A", /* capital A, acute accent (Á) - Aacute */
201 "A", /* capital A, circumflex accent (Â) - Acirc */
202 "A", /* capital A, grave accent (À) - Agrave */
203 "A", /* capital A, ring - Aring (Å) */
204 "A", /* capital A, tilde - Atilde (Ã) */
205 #ifdef LY_UMLAUT
206 "Ae", /* capital A, dieresis or umlaut mark (Ä) - Auml */
207 #else
208 "A", /* capital A, dieresis or umlaut mark (Ä) - Auml */
209 #endif /* LY_UMLAUT */
210 "C", /* capital C, cedilla (Ç) - Ccedil */
211 "Dj", /* capital D with stroke (Ð) - Dstrok */
212 "DH", /* capital Eth, Icelandic (Ð) - ETH */
213 "E", /* capital E, acute accent (É) - Eacute */
214 "E", /* capital E, circumflex accent (Ê) - Ecirc */
215 "E", /* capital E, grave accent (È) - Egrave */
216 "E", /* capital E, dieresis or umlaut mark (Ë) - Euml */
217 "I", /* capital I, acute accent (Í) - Iacute */
218 "I", /* capital I, circumflex accent (Î) - Icirc */
219 "I", /* capital I, grave accent (Ì) - Igrave */
220 "I", /* capital I, dieresis or umlaut mark (Ï) - Iuml */
221 "N", /* capital N, tilde - Ntilde (Ñ) */
222 "O", /* capital O, acute accent (Ó) - Oacute */
223 "O", /* capital O, circumflex accent (Ô) - Ocirc */
224 "O", /* capital O, grave accent (Ò) - Ograve */
225 "O", /* capital O, slash (Ø) - Oslash */
226 "O", /* capital O, tilde (Õ) - Otilde */
227 #ifdef LY_UMLAUT
228 "Oe", /* capital O, dieresis or umlaut mark (Ö) - Ouml */
229 #else
230 "O", /* capital O, dieresis or umlaut mark (Ö) - Ouml */
231 #endif /* LY_UMLAUT */
232 "P", /* capital THORN, Icelandic (Þ) - THORN */
233 "U", /* capital U, acute accent (Ú) - Uacute */
234 "U", /* capital U, circumflex accent (Û) - Ucirc */
235 "U", /* capital U, grave accent (Ù) - Ugrave */
236 #ifdef LY_UMLAUT
237 "Ue", /* capital U, dieresis or umlaut mark (Ü) - Uuml */
238 #else
239 "U", /* capital U, dieresis or umlaut mark (Ü) - Uuml */
240 #endif /* LY_UMLAUT */
241 "Y", /* capital Y, acute accent (Ý) - Yacute */
242 "a", /* small a, acute accent (á) - aacute */
243 "a", /* small a, circumflex accent (â) - acirc */
244 "'", /* spacing acute (´) - acute */
245 "ae", /* small ae diphthong (ligature) (æ) - aelig */
246 "`a", /* small a, grave accent (è) - agrave */
247 "&", /* ampersand (&) - amp */
248 "a", /* small a, ring (å) - aring */
249 "a", /* small a, tilde (ã) - atilde */
250 #ifdef LY_UMLAUT
251 "ae", /* small a, dieresis or umlaut mark (ä) - auml */
252 #else
253 "a", /* small a, dieresis or umlaut mark (ä) - auml */
254 #endif /* LY_UMLAUT */
255 "|", /* broken vertical bar (¦) - brkbar */
256 "|", /* broken vertical bar (¦) - brvbar */
257 "c", /* small c, cedilla (ç) - ccedil */
258 ",", /* spacing cedilla (¸) - cedil */
259 "-c-", /* cent sign (¢) - cent */
260 "(c)", /* copyright sign (©) - copy */
261 "CUR", /* currency sign (¤) - curren */
262 "DEG", /* degree sign (°) - deg */
263 "\042", /* spacing dieresis (¨) - die */
264 "/", /* division sign (÷) - divide */
265 "e", /* small e, acute accent (é) - eacute */
266 "e", /* small e, circumflex accent (ê) - ecirc */
267 "e", /* small e, grave accent (è) - egrave */
268 "-", /* dash the width of emsp - emdash */
269 "\002", /* emsp NEVER CHANGE THIS - emsp */
270 "-", /* dash the width of ensp - endash */
271 "\002", /* ensp NEVER CHANGE THIS - ensp */
272 "dh", /* small eth, Icelandic eth (ð) */
273 "e", /* small e, dieresis or umlaut mark (ë) - euml */
274 " 1/2", /* fraction 1/2 (½) - frac12 */
275 " 1/4", /* fraction 1/4 (¼) - frac14 */
276 " 3/4", /* fraction 3/4 (¾) - frac34 */
277 ">", /* greater than (>) - gt */
278 "-", /* spacing macron (¯) - hibar */
279 "i", /* small i, acute accent (í) - iacute */
280 "i", /* small i, circumflex accent (î) - icirc */
281 "!", /* inverted exclamation mark (¡) - iexcl */
282 "`i", /* small i, grave accent (ì) - igrave */
283 "?", /* inverted question mark (¿) - iquest */
284 "i", /* small i, dieresis or umlaut mark (ï) - iuml */
285 "<<", /* angle quotation mark, left («) - laquo */
286 "<", /* less than - lt (<) */
287 "-", /* spacing macron (¯) - macr */
288 "-", /* dash the width of emsp - mdash */
289 "u", /* micro sign (µ) - micro */
290 ".", /* middle dot (·) - middot */
291 "\001", /* nbsp non-breaking space NEVER CHANGE THIS - nbsp */
292 "-", /* dash the width of ensp - ndash */
293 "NOT", /* negation sign (¬) - not */
294 "n", /* small n, tilde (ñ) - ntilde */
295 "o", /* small o, acute accent (ó) - oacute */
296 "o", /* small o, circumflex accent (ô) - ocirc */
297 "o", /* small o, grave accent (ò) - ograve */
298 "-a", /* feminine ordinal indicator (ª) - ordf */
299 "-o", /* masculine ordinal indicator (º) - ordm */
300 "o", /* small o, slash (ø) - oslash */
301 "o", /* small o, tilde (õ) - otilde */
302 #ifdef LY_UMLAUT
303 "oe", /* small o, dieresis or umlaut mark (ö) - ouml */
304 #else
305 "o", /* small o, dieresis or umlaut mark (ö) - ouml */
306 #endif /* LY_UMLAUT */
307 "P:", /* paragraph sign (¶) - para */
308 "+-", /* plus-or-minus sign (±) - plusmn */
309 "-L-", /* pound sign (£) - pound */
310 "\"", /* quote '"' (") - quot */
311 ">>", /* angle quotation mark, right (») - raquo */
312 "(R)", /* circled R registered sign (®) - reg */
313 "S:", /* section sign (§) - sect */
314 "\007", /* soft hyphen (­) NEVER CHANGE THIS - shy */
315 "^1", /* superscript 1 (¹) - sup1 */
316 "^2", /* superscript 2 (²) - sup2 */
317 "^3", /* superscript 3 (³) - sup3 */
318 "ss", /* small sharp s, German (sz ligature) (ß) - szlig */
319 "\002", /* thin space - not collapsed NEVER CHANGE THIS - thinsp */
320 "p", /* small thorn, Icelandic (þ) - thorn */
321 "*", /* multiplication sign (×) - times */
322 "(TM)", /* circled TM trade mark sign (™) - trade */
323 "u", /* small u, acute accent (ú) - uacute */
324 "u", /* small u, circumflex accent (û) - ucirc */
325 "u", /* small u, grave accent (ù) - ugrave */
326 "\042", /* spacing dieresis (¨) - uml */
327 #ifdef LY_UMLAUT
328 "ue", /* small u, dieresis or umlaut mark (ü) - uuml */
329 #else
330 "u", /* small u, dieresis or umlaut mark (ü) - uuml */
331 #endif /* LY_UMLAUT */
332 "y", /* small y, acute accent (ý) - yacute */
333 "YEN", /* yen sign (¥) - yen */
334 "y", /* small y, dieresis or umlaut mark (ÿ) - yuml */
335 };
336
337 /*
338 * Add your new character sets HERE (but only if you can't construct Unicode
339 * tables for them). - FM
340 */
341
342 /*
343 * Add the array name to LYCharSets
344 */
345 STRING2PTR LYCharSets[MAXCHARSETS] =
346 {
347 ISO_Latin1, /* ISO Latin 1 */
348 SevenBitApproximations, /* 7 Bit Approximations */
349 };
350
351 /*
352 * Add the name that the user will see below. The order of LYCharSets and
353 * LYchar_set_names MUST be the same
354 */
355 const char *LYchar_set_names[MAXCHARSETS + 1] =
356 {
357 "Western (ISO-8859-1)",
358 "7 bit approximations (US-ASCII)",
359 (char *) 0
360 };
361
362 /*
363 * Associate additional pieces of info with each of the charsets listed above.
364 * Will be automatically modified (and extended) by charset translations which
365 * are loaded using the chartrans mechanism. Most important piece of info to
366 * put here is a MIME charset name. Used for chartrans (see UCDefs.h). The
367 * order of LYCharSets and LYCharSet_UC MUST be the same.
368 *
369 * Note that most of the charsets added by the new mechanism in src/chrtrans
370 * don't show up here at all. They don't have to.
371 */
372 LYUCcharset LYCharSet_UC[MAXCHARSETS] =
373 {
374 /*
375 * Zero position placeholder and HTMLGetEntityUCValue() reference. - FM
376 */
377 {-1, "iso-8859-1", UCT_ENC_8BIT, 0,
378 UCT_REP_IS_LAT1,
379 UCT_CP_IS_LAT1, UCT_R_LAT1, UCT_R_LAT1},
380
381 /*
382 * Placeholders for Unicode tables. - FM
383 */
384 {-1, "us-ascii", UCT_ENC_7BIT, 0,
385 UCT_REP_SUBSETOF_LAT1,
386 UCT_CP_SUBSETOF_LAT1, UCT_R_ASCII, UCT_R_ASCII},
387
388 };
389
390 /*
391 * Add the code of the the lowest character with the high bit set that can be
392 * directly displayed. The order of LYCharSets and LYlowest_eightbit MUST be
393 * the same.
394 *
395 * (If charset have chartrans unicode table, LYlowest_eightbit will be
396 * verified/modified anyway.)
397 */
398 int LYlowest_eightbit[MAXCHARSETS] =
399 {
400 160, /* ISO Latin 1 */
401 999, /* 7 bit approximations */
402 };
403
404 /*
405 * Function to set the handling of selected character sets based on the current
406 * LYUseDefaultRawMode value. - FM
407 */
HTMLSetCharacterHandling(int i)408 void HTMLSetCharacterHandling(int i)
409 {
410 int chndl = safeUCGetLYhndl_byMIME(UCAssume_MIMEcharset);
411 BOOLEAN LYRawMode_flag = LYRawMode;
412 int UCLYhndl_for_unspec_flag = UCLYhndl_for_unspec;
413
414 if (LYCharSet_UC[i].enc != UCT_ENC_CJK) {
415 HTCJK = NOCJK;
416 kanji_code = NOKANJI;
417 if (i == chndl)
418 LYRawMode = LYUseDefaultRawMode;
419 else
420 LYRawMode = (BOOL) (!LYUseDefaultRawMode);
421
422 HTPassEightBitNum = (BOOL) ((LYCharSet_UC[i].codepoints & UCT_CP_SUPERSETOF_LAT1)
423 || (LYCharSet_UC[i].like8859 & UCT_R_HIGH8BIT));
424
425 if (LYRawMode) {
426 HTPassEightBitRaw = (BOOL) (LYlowest_eightbit[i] <= 160);
427 } else {
428 HTPassEightBitRaw = FALSE;
429 }
430 if (LYRawMode || i == chndl) {
431 HTPassHighCtrlRaw = (BOOL) (LYlowest_eightbit[i] <= 130);
432 } else {
433 HTPassHighCtrlRaw = FALSE;
434 }
435
436 HTPassHighCtrlNum = FALSE;
437
438 } else { /* CJK encoding: */
439 const char *mime = LYCharSet_UC[i].MIMEname;
440
441 if (!strcmp(mime, "euc-cn")) {
442 HTCJK = CHINESE;
443 kanji_code = EUC;
444 } else if (!strcmp(mime, "euc-jp")) {
445 HTCJK = JAPANESE;
446 kanji_code = EUC;
447 } else if (!strcmp(mime, "shift_jis")) {
448 HTCJK = JAPANESE;
449 kanji_code = SJIS;
450 } else if (!strcmp(mime, "euc-kr")) {
451 HTCJK = KOREAN;
452 kanji_code = EUC;
453 } else if (!strcmp(mime, "big5")) {
454 HTCJK = TAIPEI;
455 kanji_code = EUC;
456 }
457
458 /* for any CJK: */
459 if (!LYUseDefaultRawMode)
460 HTCJK = NOCJK;
461 LYRawMode = (BOOL) (IS_CJK_TTY ? TRUE : FALSE);
462 HTPassEightBitRaw = FALSE;
463 HTPassEightBitNum = FALSE;
464 HTPassHighCtrlRaw = (BOOL) (IS_CJK_TTY ? TRUE : FALSE);
465 HTPassHighCtrlNum = FALSE;
466 }
467
468 /*
469 * Comment for coding below:
470 * UCLYhndl_for_unspec is "current" state with LYRawMode, but
471 * UCAssume_MIMEcharset is independent from LYRawMode: holds the history
472 * and may be changed from 'O'ptions menu only. - LP
473 */
474 if (LYRawMode) {
475 UCLYhndl_for_unspec = i; /* UCAssume_MIMEcharset not changed! */
476 } else {
477 if (chndl != i &&
478 (LYCharSet_UC[i].enc != UCT_ENC_CJK ||
479 LYCharSet_UC[chndl].enc != UCT_ENC_CJK)) {
480 UCLYhndl_for_unspec = chndl; /* fall to UCAssume_MIMEcharset */
481 } else {
482 UCLYhndl_for_unspec = LATIN1; /* UCAssume_MIMEcharset not changed! */
483 }
484 }
485
486 #ifdef USE_SLANG
487 if (LYlowest_eightbit[i] > 191) {
488 /*
489 * Higher than this may output cntrl chars to screen. - KW
490 */
491 SLsmg_Display_Eight_Bit = 191;
492 } else {
493 SLsmg_Display_Eight_Bit = LYlowest_eightbit[i];
494 }
495 #endif /* USE_SLANG */
496
497 ena_csi(LYlowest_eightbit[current_char_set] > 155);
498
499 /* some diagnostics */
500 if (TRACE) {
501 if (LYRawMode_flag != LYRawMode)
502 CTRACE((tfp,
503 "HTMLSetCharacterHandling: LYRawMode changed %s -> %s\n",
504 (LYRawMode_flag ? "ON" : "OFF"),
505 (LYRawMode ? "ON" : "OFF")));
506 if (UCLYhndl_for_unspec_flag != UCLYhndl_for_unspec)
507 CTRACE((tfp,
508 "HTMLSetCharacterHandling: UCLYhndl_for_unspec changed %d -> %d\n",
509 UCLYhndl_for_unspec_flag,
510 UCLYhndl_for_unspec));
511 }
512
513 return;
514 }
515
516 /*
517 * Function to set HTCJK based on "in" and "out" charsets.
518 */
Set_HTCJK(const char * inMIMEname,const char * outMIMEname)519 void Set_HTCJK(const char *inMIMEname,
520 const char *outMIMEname)
521 {
522 /* need not check for synonyms: MIMEname's got from LYCharSet_UC */
523
524 if (LYRawMode) {
525 if ((!strcmp(inMIMEname, "euc-jp") ||
526 #ifdef USE_JAPANESEUTF8_SUPPORT
527 !strcmp(inMIMEname, "utf-8") ||
528 #endif
529 !strcmp(inMIMEname, "shift_jis")) &&
530 (!strcmp(outMIMEname, "euc-jp") ||
531 !strcmp(outMIMEname, "shift_jis"))) {
532 HTCJK = JAPANESE;
533 } else if (!strcmp(inMIMEname, "euc-cn") &&
534 !strcmp(outMIMEname, "euc-cn")) {
535 HTCJK = CHINESE;
536 } else if (!strcmp(inMIMEname, "big5") &&
537 !strcmp(outMIMEname, "big5")) {
538 HTCJK = TAIPEI;
539 } else if (!strcmp(inMIMEname, "euc-kr") &&
540 !strcmp(outMIMEname, "euc-kr")) {
541 HTCJK = KOREAN;
542 } else {
543 HTCJK = NOCJK;
544 }
545 } else {
546 HTCJK = NOCJK;
547 }
548 }
549
550 /*
551 * Function to set the LYDefaultRawMode value based on the selected character
552 * set. - FM
553 *
554 * Currently unused: the default value so obvious that LYUseDefaultRawMode
555 * utilized directly by someone's mistake. - LP
556 */
HTMLSetRawModeDefault(int i)557 static void HTMLSetRawModeDefault(int i)
558 {
559 LYDefaultRawMode = (BOOL) (LYCharSet_UC[i].enc == UCT_ENC_CJK);
560 return;
561 }
562
563 /*
564 * Function to set the LYUseDefaultRawMode value based on the selected
565 * character set and the current LYRawMode value. - FM
566 */
HTMLSetUseDefaultRawMode(int i,int modeflag)567 void HTMLSetUseDefaultRawMode(int i,
568 int modeflag)
569 {
570 if (LYCharSet_UC[i].enc != UCT_ENC_CJK) {
571
572 int chndl = safeUCGetLYhndl_byMIME(UCAssume_MIMEcharset);
573
574 if (i == chndl)
575 LYUseDefaultRawMode = (BOOLEAN) modeflag;
576 else
577 LYUseDefaultRawMode = (BOOL) (!modeflag);
578 } else /* CJK encoding: */
579 LYUseDefaultRawMode = (BOOLEAN) modeflag;
580
581 return;
582 }
583
584 /*
585 * Function to set the LYHaveCJKCharacterSet value based on the selected
586 * character set. - FM
587 */
HTMLSetHaveCJKCharacterSet(int i)588 static void HTMLSetHaveCJKCharacterSet(int i)
589 {
590 LYHaveCJKCharacterSet = (BOOL) (LYCharSet_UC[i].enc == UCT_ENC_CJK);
591 return;
592 }
593
594 /*
595 * Function to set the DisplayCharsetMatchLocale value based on the selected
596 * character set. It is used in UPPER8 for 8bit case-insensitive search by
597 * matching def7_uni.tbl images. - LP
598 */
HTMLSetDisplayCharsetMatchLocale(int i)599 static void HTMLSetDisplayCharsetMatchLocale(int i)
600 {
601 BOOLEAN match;
602
603 if (LYHaveCJKCharacterSet) {
604 /*
605 * We have no intention to pass CJK via UCTransChar if that happened.
606 * Let someone from CJK correct this if necessary.
607 */
608 DisplayCharsetMatchLocale = TRUE; /* old-style */
609 return;
610
611 } else if (strncasecomp(LYCharSet_UC[i].MIMEname, "cp", 2) ||
612 strncasecomp(LYCharSet_UC[i].MIMEname, "windows", 7)) {
613 /*
614 * Assume dos/windows displays usually on remote terminal, hence it
615 * rarely matches locale. (In fact, MS Windows codepoints locale are
616 * never seen on UNIX).
617 */
618 match = FALSE;
619 } else {
620 match = TRUE; /* guess, but see below */
621
622 #if !defined(LOCALE)
623 if (LYCharSet_UC[i].enc != UCT_ENC_UTF8)
624 /*
625 * Leave true for utf-8 display - the code doesn't deal very well
626 * with this case. - kw
627 */
628 match = FALSE;
629 #else
630 if (UCForce8bitTOUPPER) {
631 /*
632 * Force disable locale (from lynx.cfg)
633 */
634 match = FALSE;
635 }
636 #endif
637 }
638
639 DisplayCharsetMatchLocale = match;
640 return;
641 }
642
643 /*
644 * lynx 2.8/2.7.2(and more early) compatibility code: "human-readable" charset
645 * names changes with time so we map that history names to MIME here to get old
646 * lynx.cfg and (especially) .lynxrc always recognized. Please update this
647 * table when you change "fullname" of any present charset.
648 */
649 typedef struct _names_pairs {
650 const char *fullname;
651 const char *MIMEname;
652 } names_pairs;
653 /* *INDENT-OFF* */
654 static const names_pairs OLD_charset_names[] =
655 {
656 {"ISO Latin 1", "iso-8859-1"},
657 {"ISO Latin 2", "iso-8859-2"},
658 {"WinLatin1 (cp1252)", "windows-1252"},
659 {"DEC Multinational", "dec-mcs"},
660 {"Macintosh (8 bit)", "macintosh"},
661 {"NeXT character set", "next"},
662 {"KOI8-R Cyrillic", "koi8-r"},
663 {"Chinese", "euc-cn"},
664 {"Japanese (EUC)", "euc-jp"},
665 {"Japanese (SJIS)", "shift_jis"},
666 {"Korean", "euc-kr"},
667 {"Taipei (Big5)", "big5"},
668 {"Vietnamese (VISCII)", "viscii"},
669 {"7 bit approximations", "us-ascii"},
670 {"Transparent", "x-transparent"},
671 {"DosLatinUS (cp437)", "cp437"},
672 {"IBM PC character set", "cp437"},
673 {"DosLatin1 (cp850)", "cp850"},
674 {"IBM PC codepage 850", "cp850"},
675 {"DosLatin2 (cp852)", "cp852"},
676 {"PC Latin2 CP 852", "cp852"},
677 {"DosCyrillic (cp866)", "cp866"},
678 {"DosArabic (cp864)", "cp864"},
679 {"DosGreek (cp737)", "cp737"},
680 {"DosBaltRim (cp775)", "cp775"},
681 {"DosGreek2 (cp869)", "cp869"},
682 {"DosHebrew (cp862)", "cp862"},
683 {"WinLatin2 (cp1250)", "windows-1250"},
684 {"WinCyrillic (cp1251)", "windows-1251"},
685 {"WinGreek (cp1253)", "windows-1253"},
686 {"WinHebrew (cp1255)", "windows-1255"},
687 {"WinArabic (cp1256)", "windows-1256"},
688 {"WinBaltRim (cp1257)", "windows-1257"},
689 {"ISO Latin 3", "iso-8859-3"},
690 {"ISO Latin 4", "iso-8859-4"},
691 {"ISO 8859-5 Cyrillic", "iso-8859-5"},
692 {"ISO 8859-6 Arabic", "iso-8859-6"},
693 {"ISO 8859-7 Greek", "iso-8859-7"},
694 {"ISO 8859-8 Hebrew", "iso-8859-8"},
695 {"ISO-8859-8-I", "iso-8859-8"},
696 {"ISO-8859-8-E", "iso-8859-8"},
697 {"ISO 8859-9 (Latin 5)", "iso-8859-9"},
698 {"ISO 8859-10", "iso-8859-10"},
699 {"UNICODE UTF 8", "utf-8"},
700 {"RFC 1345 w/o Intro", "mnemonic+ascii+0"},
701 {"RFC 1345 Mnemonic", "mnemonic"},
702 {NULL, NULL}, /* terminated with NULL */
703 };
704 /* *INDENT-ON* */
705
706 /*
707 * lynx 2.8/2.7.2 compatibility code: read "character_set" parameter from
708 * lynx.cfg and .lynxrc in both MIME name and "human-readable" name (old and
709 * new style). Returns -1 if not recognized.
710 */
UCGetLYhndl_byAnyName(char * value)711 int UCGetLYhndl_byAnyName(char *value)
712 {
713 int i;
714
715 if (value == NULL)
716 return -1;
717
718 LYTrimTrailing(value);
719 CTRACE((tfp, "UCGetLYhndl_byAnyName(%s)\n", value));
720
721 /* search by name */
722 for (i = 0; (i < MAXCHARSETS && LYchar_set_names[i]); i++) {
723 if (!strcmp(value, LYchar_set_names[i])) {
724 return i; /* OK */
725 }
726 }
727
728 /* search by old name from 2.8/2.7.2 version */
729 for (i = 0; (OLD_charset_names[i].fullname); i++) {
730 if (!strcmp(value, OLD_charset_names[i].fullname)) {
731 return UCGetLYhndl_byMIME(OLD_charset_names[i].MIMEname); /* OK */
732 }
733 }
734
735 return UCGetLYhndl_byMIME(value); /* by MIME */
736 }
737
738 /*
739 * Entity names -- Ordered by ISO Latin 1 value.
740 * ---------------------------------------------
741 * For conversions of DECIMAL escaped entities.
742 * Must be in order of ascending value.
743 */
744 static const char *LYEntityNames[] =
745 {
746 /* NAME DECIMAL VALUE */
747 "nbsp", /* 160, non breaking space */
748 "iexcl", /* 161, inverted exclamation mark */
749 "cent", /* 162, cent sign */
750 "pound", /* 163, pound sign */
751 "curren", /* 164, currency sign */
752 "yen", /* 165, yen sign */
753 "brvbar", /* 166, broken vertical bar, (brkbar) */
754 "sect", /* 167, section sign */
755 "uml", /* 168, spacing dieresis */
756 "copy", /* 169, copyright sign */
757 "ordf", /* 170, feminine ordinal indicator */
758 "laquo", /* 171, angle quotation mark, left */
759 "not", /* 172, negation sign */
760 "shy", /* 173, soft hyphen */
761 "reg", /* 174, circled R registered sign */
762 "hibar", /* 175, spacing macron */
763 "deg", /* 176, degree sign */
764 "plusmn", /* 177, plus-or-minus sign */
765 "sup2", /* 178, superscript 2 */
766 "sup3", /* 179, superscript 3 */
767 "acute", /* 180, spacing acute (96) */
768 "micro", /* 181, micro sign */
769 "para", /* 182, paragraph sign */
770 "middot", /* 183, middle dot */
771 "cedil", /* 184, spacing cedilla */
772 "sup1", /* 185, superscript 1 */
773 "ordm", /* 186, masculine ordinal indicator */
774 "raquo", /* 187, angle quotation mark, right */
775 "frac14", /* 188, fraction 1/4 */
776 "frac12", /* 189, fraction 1/2 */
777 "frac34", /* 190, fraction 3/4 */
778 "iquest", /* 191, inverted question mark */
779 "Agrave", /* 192, capital A, grave accent */
780 "Aacute", /* 193, capital A, acute accent */
781 "Acirc", /* 194, capital A, circumflex accent */
782 "Atilde", /* 195, capital A, tilde */
783 "Auml", /* 196, capital A, dieresis or umlaut mark */
784 "Aring", /* 197, capital A, ring */
785 "AElig", /* 198, capital AE diphthong (ligature) */
786 "Ccedil", /* 199, capital C, cedilla */
787 "Egrave", /* 200, capital E, grave accent */
788 "Eacute", /* 201, capital E, acute accent */
789 "Ecirc", /* 202, capital E, circumflex accent */
790 "Euml", /* 203, capital E, dieresis or umlaut mark */
791 "Igrave", /* 204, capital I, grave accent */
792 "Iacute", /* 205, capital I, acute accent */
793 "Icirc", /* 206, capital I, circumflex accent */
794 "Iuml", /* 207, capital I, dieresis or umlaut mark */
795 "ETH", /* 208, capital Eth, Icelandic (or Latin2 Dstrok) */
796 "Ntilde", /* 209, capital N, tilde */
797 "Ograve", /* 210, capital O, grave accent */
798 "Oacute", /* 211, capital O, acute accent */
799 "Ocirc", /* 212, capital O, circumflex accent */
800 "Otilde", /* 213, capital O, tilde */
801 "Ouml", /* 214, capital O, dieresis or umlaut mark */
802 "times", /* 215, multiplication sign */
803 "Oslash", /* 216, capital O, slash */
804 "Ugrave", /* 217, capital U, grave accent */
805 "Uacute", /* 218, capital U, acute accent */
806 "Ucirc", /* 219, capital U, circumflex accent */
807 "Uuml", /* 220, capital U, dieresis or umlaut mark */
808 "Yacute", /* 221, capital Y, acute accent */
809 "THORN", /* 222, capital THORN, Icelandic */
810 "szlig", /* 223, small sharp s, German (sz ligature) */
811 "agrave", /* 224, small a, grave accent */
812 "aacute", /* 225, small a, acute accent */
813 "acirc", /* 226, small a, circumflex accent */
814 "atilde", /* 227, small a, tilde */
815 "auml", /* 228, small a, dieresis or umlaut mark */
816 "aring", /* 229, small a, ring */
817 "aelig", /* 230, small ae diphthong (ligature) */
818 "ccedil", /* 231, small c, cedilla */
819 "egrave", /* 232, small e, grave accent */
820 "eacute", /* 233, small e, acute accent */
821 "ecirc", /* 234, small e, circumflex accent */
822 "euml", /* 235, small e, dieresis or umlaut mark */
823 "igrave", /* 236, small i, grave accent */
824 "iacute", /* 237, small i, acute accent */
825 "icirc", /* 238, small i, circumflex accent */
826 "iuml", /* 239, small i, dieresis or umlaut mark */
827 "eth", /* 240, small eth, Icelandic */
828 "ntilde", /* 241, small n, tilde */
829 "ograve", /* 242, small o, grave accent */
830 "oacute", /* 243, small o, acute accent */
831 "ocirc", /* 244, small o, circumflex accent */
832 "otilde", /* 245, small o, tilde */
833 "ouml", /* 246, small o, dieresis or umlaut mark */
834 "divide", /* 247, division sign */
835 "oslash", /* 248, small o, slash */
836 "ugrave", /* 249, small u, grave accent */
837 "uacute", /* 250, small u, acute accent */
838 "ucirc", /* 251, small u, circumflex accent */
839 "uuml", /* 252, small u, dieresis or umlaut mark */
840 "yacute", /* 253, small y, acute accent */
841 "thorn", /* 254, small thorn, Icelandic */
842 "yuml", /* 255, small y, dieresis or umlaut mark */
843 };
844
845 /*
846 * Function to return the entity names of ISO-8859-1 8-bit characters. - FM
847 */
HTMLGetEntityName(UCode_t code)848 const char *HTMLGetEntityName(UCode_t code)
849 {
850 #define IntValue code
851 int MaxValue = (TABLESIZE(LYEntityNames) - 1);
852
853 if (IntValue < 0 || IntValue > MaxValue) {
854 return "";
855 }
856
857 return LYEntityNames[IntValue];
858 }
859
860 /*
861 * Function to return the UCode_t (long int) value for entity names. It
862 * returns 0 if not found.
863 *
864 * unicode_entities[] handles all the names from old style entities[] too.
865 * Lynx now calls unicode_entities[] only through this function:
866 * HTMLGetEntityUCValue(). Note, we need not check for special characters here
867 * in function or even before it, we should check them *after* invoking this
868 * function, see put_special_unicodes() in SGML.c.
869 *
870 * In the future we will try to isolate all calls to entities[] in favor of new
871 * unicode-based chartrans scheme. - LP
872 */
HTMLGetEntityUCValue(const char * name)873 UCode_t HTMLGetEntityUCValue(const char *name)
874 {
875 #include <entities.h>
876
877 UCode_t value = 0;
878 size_t i, high, low;
879 int diff = 0;
880 size_t number_of_unicode_entities = TABLESIZE(unicode_entities);
881
882 /*
883 * Make sure we have a non-zero length name. - FM
884 */
885 if (isEmpty(name))
886 return (value);
887
888 /*
889 * Try UC_entity_info unicode_entities[].
890 */
891 for (low = 0, high = number_of_unicode_entities;
892 high > low;
893 diff < 0 ? (low = i + 1) : (high = i)) {
894 /*
895 * Binary search.
896 */
897 i = (low + (high - low) / 2);
898 diff = AS_cmp(unicode_entities[i].name, name); /* Case sensitive! */
899 if (diff == 0) {
900 value = unicode_entities[i].code;
901 break;
902 }
903 }
904 return (value);
905 }
906
907 /*
908 * Original comment -
909 * Assume these are Microsoft code points, inflicted on us by FrontPage. - FM
910 *
911 * MS FrontPage uses syntax like ™ in 128-159 range and doesn't follow
912 * Unicode standards for this area. Windows-1252 codepoints are assumed here.
913 *
914 * However see -
915 * http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#character-encodings-0
916 */
LYcp1252ToUnicode(UCode_t code)917 UCode_t LYcp1252ToUnicode(UCode_t code)
918 {
919 if ((code == 1) ||
920 (code > 127 && code < 160)) {
921 switch (code) {
922 case 1:
923 /*
924 * WHITE SMILING FACE
925 */
926 code = 0x263a;
927 break;
928 case 128:
929 /*
930 * EURO currency sign
931 */
932 code = 0x20ac;
933 break;
934 case 130:
935 /*
936 * SINGLE LOW-9 QUOTATION MARK (sbquo)
937 */
938 code = 0x201a;
939 break;
940 case 131:
941 /*
942 * LATIN SMALL LETTER F WITH HOOK
943 */
944 code = 0x192;
945 break;
946 case 132:
947 /*
948 * DOUBLE LOW-9 QUOTATION MARK (bdquo)
949 */
950 code = 0x201e;
951 break;
952 case 133:
953 /*
954 * HORIZONTAL ELLIPSIS (hellip)
955 */
956 code = 0x2026;
957 break;
958 case 134:
959 /*
960 * DAGGER (dagger)
961 */
962 code = 0x2020;
963 break;
964 case 135:
965 /*
966 * DOUBLE DAGGER (Dagger)
967 */
968 code = 0x2021;
969 break;
970 case 136:
971 /*
972 * MODIFIER LETTER CIRCUMFLEX ACCENT
973 */
974 code = 0x2c6;
975 break;
976 case 137:
977 /*
978 * PER MILLE SIGN (permil)
979 */
980 code = 0x2030;
981 break;
982 case 138:
983 /*
984 * LATIN CAPITAL LETTER S WITH CARON
985 */
986 code = 0x160;
987 break;
988 case 139:
989 /*
990 * SINGLE LEFT-POINTING ANGLE QUOTATION MARK (lsaquo)
991 */
992 code = 0x2039;
993 break;
994 case 140:
995 /*
996 * LATIN CAPITAL LIGATURE OE
997 */
998 code = 0x152;
999 break;
1000 case 142:
1001 /*
1002 * LATIN CAPITAL LETTER Z WITH CARON
1003 */
1004 code = 0x17d;
1005 break;
1006 case 145:
1007 /*
1008 * LEFT SINGLE QUOTATION MARK (lsquo)
1009 */
1010 code = 0x2018;
1011 break;
1012 case 146:
1013 /*
1014 * RIGHT SINGLE QUOTATION MARK (rsquo)
1015 */
1016 code = 0x2019;
1017 break;
1018 case 147:
1019 /*
1020 * LEFT DOUBLE QUOTATION MARK (ldquo)
1021 */
1022 code = 0x201c;
1023 break;
1024 case 148:
1025 /*
1026 * RIGHT DOUBLE QUOTATION MARK (rdquo)
1027 */
1028 code = 0x201d;
1029 break;
1030 case 149:
1031 /*
1032 * BULLET (bull)
1033 */
1034 code = 0x2022;
1035 break;
1036 case 150:
1037 /*
1038 * EN DASH (ndash)
1039 */
1040 code = 0x2013;
1041 break;
1042 case 151:
1043 /*
1044 * EM DASH (mdash)
1045 */
1046 code = 0x2014;
1047 break;
1048 case 152:
1049 /*
1050 * SMALL TILDE (tilde)
1051 */
1052 code = 0x02dc;
1053 break;
1054 case 153:
1055 /*
1056 * TRADE MARK SIGN (trade)
1057 */
1058 code = 0x2122;
1059 break;
1060 case 154:
1061 /*
1062 * LATIN SMALL LETTER S WITH CARON
1063 */
1064 code = 0x161;
1065 break;
1066 case 155:
1067 /*
1068 * SINGLE RIGHT-POINTING ANGLE QUOTATION MARK (rsaquo)
1069 */
1070 code = 0x203a;
1071 break;
1072 case 156:
1073 /*
1074 * LATIN SMALL LIGATURE OE
1075 */
1076 code = 0x153;
1077 break;
1078 case 158:
1079 /*
1080 * LATIN SMALL LETTER Z WITH CARON
1081 */
1082 code = 0x17e;
1083 break;
1084 case 159:
1085 /*
1086 * LATIN CAPITAL LETTER Y WITH DIAERESIS
1087 */
1088 code = 0x178;
1089 break;
1090 default:
1091 /*
1092 * Undefined (by convention, use the replacement character).
1093 */
1094 code = UCS_REPL;
1095 break;
1096 }
1097 }
1098 return code;
1099 }
1100
1101 /*
1102 * Function to select a character set and then set the character handling and
1103 * LYHaveCJKCharacterSet flag. - FM
1104 */
HTMLUseCharacterSet(int i)1105 void HTMLUseCharacterSet(int i)
1106 {
1107 HTMLSetRawModeDefault(i);
1108 p_entity_values = LYCharSets[i];
1109 HTMLSetCharacterHandling(i); /* set LYRawMode and CJK attributes */
1110 HTMLSetHaveCJKCharacterSet(i);
1111 HTMLSetDisplayCharsetMatchLocale(i);
1112 return;
1113 }
1114
1115 /*
1116 * Initializer, calls initialization function for the CHARTRANS handling. - KW
1117 */
LYCharSetsDeclared(void)1118 int LYCharSetsDeclared(void)
1119 {
1120 UCInit();
1121
1122 return UCInitialized;
1123 }
1124
1125 #ifdef USE_CHARSET_CHOICE
init_charset_subsets(void)1126 void init_charset_subsets(void)
1127 {
1128 int i, n;
1129 int cur_display = 0;
1130 int cur_assumed = 0;
1131
1132 /* add them to displayed values */
1133 charset_subsets[UCLYhndl_for_unspec].hide_assumed = FALSE;
1134 charset_subsets[current_char_set].hide_display = FALSE;
1135
1136 #ifndef ALL_CHARSETS_IN_O_MENU_SCREEN
1137 /*all this stuff is for supporting old menu screen... */
1138 for (i = 0; i < LYNumCharsets; ++i) {
1139 if (charset_subsets[i].hide_display == FALSE) {
1140 n = cur_display++;
1141 if (i == current_char_set)
1142 displayed_display_charset_idx = n;
1143 display_charset_map[n] = i;
1144 display_charset_choices[n] = LYchar_set_names[i];
1145 }
1146 if (charset_subsets[i].hide_assumed == FALSE) {
1147 n = cur_assumed++;
1148 assumed_doc_charset_map[n] = i;
1149 assumed_charset_choices[n] = LYCharSet_UC[i].MIMEname;
1150 charset_subsets[i].assumed_idx = n;
1151 }
1152 display_charset_choices[cur_display] = NULL;
1153 assumed_charset_choices[cur_assumed] = NULL;
1154 }
1155 #endif
1156 }
1157 #endif /* USE_CHARSET_CHOICE */
1158