1 /* Copyright (c) 1993-2003
2  *      Juergen Weigert (jnweiger@immd4.informatik.uni-erlangen.de)
3  *      Michael Schroeder (mlschroe@immd4.informatik.uni-erlangen.de)
4  * Copyright (c) 1987 Oliver Laumann
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3, or (at your option)
9  * any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program (see the file COPYING); if not, see
18  * https://www.gnu.org/licenses/, or contact Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA  02111-1301  USA
20  *
21  ****************************************************************
22  */
23 
24 #include <sys/types.h>
25 
26 #include "config.h"
27 #include "screen.h"
28 #include "extern.h"
29 
30 #ifdef ENCODINGS
31 
32 extern unsigned char *null;
33 extern struct display *display, *displays;
34 extern struct layer *flayer;
35 
36 extern char *screenencodings;
37 
38 #ifdef DW_CHARS
39 extern int cjkwidth;
40 #endif
41 
42 static int  encmatch __P((char *, char *));
43 # ifdef UTF8
44 static int   recode_char __P((int, int, int));
45 static int   recode_char_to_encoding __P((int, int));
46 static void  comb_tofront __P((int));
47 #  ifdef DW_CHARS
48 static int   recode_char_dw __P((int, int *, int, int));
49 static int   recode_char_dw_to_encoding __P((int, int *, int));
50 #  endif
51 # endif
52 
53 struct encoding {
54   char *name;
55   char *charsets;
56   int  deffont;
57   int  usegr;
58   int  noc1;
59   char *fontlist;
60 };
61 
62 /* big5 font:   ^X */
63 /* KOI8-R font: 96 ! */
64 /* CP1251 font: 96 ? */
65 
66 struct encoding encodings[] = {
67   { "C",		0,		0,		0, 0, 0 },
68   { "eucJP",		"B\002I\00401",	0,		1, 0, "\002\004I" },
69   { "SJIS",		"BIBB01",	0,		1, 1, "\002I" },
70   { "eucKR",		"B\003BB01",	0,		1, 0, "\003" },
71   { "eucCN",		"B\001BB01",	0,		1, 0, "\001" },
72   { "Big5",		"B\030BB01",	0,		1, 0, "\030" },
73   { "KOI8-R",		0,		0x80|'!',	0, 1, 0 },
74   { "CP1251",		0,		0x80|'?',	0, 1, 0 },
75   { "UTF-8",		0,		-1,		0, 0, 0 },
76   { "ISO8859-2",	0,		0x80|'B',	0, 0, 0 },
77   { "ISO8859-3",	0,		0x80|'C',	0, 0, 0 },
78   { "ISO8859-4",	0,		0x80|'D',	0, 0, 0 },
79   { "ISO8859-5",	0,		0x80|'L',	0, 0, 0 },
80   { "ISO8859-6",	0,		0x80|'G',	0, 0, 0 },
81   { "ISO8859-7",	0,		0x80|'F',	0, 0, 0 },
82   { "ISO8859-8",	0,		0x80|'H',	0, 0, 0 },
83   { "ISO8859-9",	0,		0x80|'M',	0, 0, 0 },
84   { "ISO8859-10",	0,		0x80|'V',	0, 0, 0 },
85   { "ISO8859-15",	0,		0x80|'b',	0, 0, 0 },
86   { "jis",		0,		0,		0, 0, "\002\004I" },
87   { "GBK",		"B\031BB01",	0x80|'b',	1, 1, "\031" }
88 };
89 
90 #ifdef UTF8
91 
92 static unsigned short builtin_tabs[][2] = {
93   { 0x30, 0 },		/* 0: special graphics (line drawing) */
94   { 0x005f, 0x25AE },
95   { 0x0060, 0x25C6 },
96   { 0x0061, 0x2592 },
97   { 0x0062, 0x2409 },
98   { 0x0063, 0x240C },
99   { 0x0064, 0x240D },
100   { 0x0065, 0x240A },
101   { 0x0066, 0x00B0 },
102   { 0x0067, 0x00B1 },
103   { 0x0068, 0x2424 },
104   { 0x0069, 0x240B },
105   { 0x006a, 0x2518 },
106   { 0x006b, 0x2510 },
107   { 0x006c, 0x250C },
108   { 0x006d, 0x2514 },
109   { 0x006e, 0x253C },
110   { 0x006f, 0x23BA },
111   { 0x0070, 0x23BB },
112   { 0x0071, 0x2500 },
113   { 0x0072, 0x23BC },
114   { 0x0073, 0x23BD },
115   { 0x0074, 0x251C },
116   { 0x0075, 0x2524 },
117   { 0x0076, 0x2534 },
118   { 0x0077, 0x252C },
119   { 0x0078, 0x2502 },
120   { 0x0079, 0x2264 },
121   { 0x007a, 0x2265 },
122   { 0x007b, 0x03C0 },
123   { 0x007c, 0x2260 },
124   { 0x007d, 0x00A3 },
125   { 0x007e, 0x00B7 },
126   { 0, 0},
127 
128   { 0x34, 0 },		/* 4: Dutch */
129   { 0x0023, 0x00a3 },
130   { 0x0040, 0x00be },
131   { 0x005b, 0x00ff },
132   { 0x005c, 0x00bd },
133   { 0x005d, 0x007c },
134   { 0x007b, 0x00a8 },
135   { 0x007c, 0x0066 },
136   { 0x007d, 0x00bc },
137   { 0x007e, 0x00b4 },
138   { 0, 0},
139 
140   { 0x35, 0 },		/* 5: Finnish */
141   { 0x005b, 0x00c4 },
142   { 0x005c, 0x00d6 },
143   { 0x005d, 0x00c5 },
144   { 0x005e, 0x00dc },
145   { 0x0060, 0x00e9 },
146   { 0x007b, 0x00e4 },
147   { 0x007c, 0x00f6 },
148   { 0x007d, 0x00e5 },
149   { 0x007e, 0x00fc },
150   { 0, 0},
151 
152   { 0x36, 0 },		/* 6: Norwegian/Danish */
153   { 0x0040, 0x00c4 },
154   { 0x005b, 0x00c6 },
155   { 0x005c, 0x00d8 },
156   { 0x005d, 0x00c5 },
157   { 0x005e, 0x00dc },
158   { 0x0060, 0x00e4 },
159   { 0x007b, 0x00e6 },
160   { 0x007c, 0x00f8 },
161   { 0x007d, 0x00e5 },
162   { 0x007e, 0x00fc },
163   { 0, 0},
164 
165   { 0x37, 0 },		/* 7: Swedish */
166   { 0x0040, 0x00c9 },
167   { 0x005b, 0x00c4 },
168   { 0x005c, 0x00d6 },
169   { 0x005d, 0x00c5 },
170   { 0x005e, 0x00dc },
171   { 0x0060, 0x00e9 },
172   { 0x007b, 0x00e4 },
173   { 0x007c, 0x00f6 },
174   { 0x007d, 0x00e5 },
175   { 0x007e, 0x00fc },
176   { 0, 0},
177 
178   { 0x3d, 0},		/* =: Swiss */
179   { 0x0023, 0x00f9 },
180   { 0x0040, 0x00e0 },
181   { 0x005b, 0x00e9 },
182   { 0x005c, 0x00e7 },
183   { 0x005d, 0x00ea },
184   { 0x005e, 0x00ee },
185   { 0x005f, 0x00e8 },
186   { 0x0060, 0x00f4 },
187   { 0x007b, 0x00e4 },
188   { 0x007c, 0x00f6 },
189   { 0x007d, 0x00fc },
190   { 0x007e, 0x00fb },
191   { 0, 0},
192 
193   { 0x41, 0},		/* A: UK */
194   { 0x0023, 0x00a3 },
195   { 0, 0},
196 
197   { 0x4b, 0},		/* K: German */
198   { 0x0040, 0x00a7 },
199   { 0x005b, 0x00c4 },
200   { 0x005c, 0x00d6 },
201   { 0x005d, 0x00dc },
202   { 0x007b, 0x00e4 },
203   { 0x007c, 0x00f6 },
204   { 0x007d, 0x00fc },
205   { 0x007e, 0x00df },
206   { 0, 0},
207 
208   { 0x51, 0},		/* Q: French Canadian */
209   { 0x0040, 0x00e0 },
210   { 0x005b, 0x00e2 },
211   { 0x005c, 0x00e7 },
212   { 0x005d, 0x00ea },
213   { 0x005e, 0x00ee },
214   { 0x0060, 0x00f4 },
215   { 0x007b, 0x00e9 },
216   { 0x007c, 0x00f9 },
217   { 0x007d, 0x00e8 },
218   { 0x007e, 0x00fb },
219   { 0, 0},
220 
221   { 0x52, 0},		/* R: French */
222   { 0x0023, 0x00a3 },
223   { 0x0040, 0x00e0 },
224   { 0x005b, 0x00b0 },
225   { 0x005c, 0x00e7 },
226   { 0x005d, 0x00a7 },
227   { 0x007b, 0x00e9 },
228   { 0x007c, 0x00f9 },
229   { 0x007d, 0x00e8 },
230   { 0x007e, 0x00a8 },
231   { 0, 0},
232 
233   { 0x59, 0},		/* Y: Italian */
234   { 0x0023, 0x00a3 },
235   { 0x0040, 0x00a7 },
236   { 0x005b, 0x00b0 },
237   { 0x005c, 0x00e7 },
238   { 0x005d, 0x00e9 },
239   { 0x0060, 0x00f9 },
240   { 0x007b, 0x00e0 },
241   { 0x007c, 0x00f2 },
242   { 0x007d, 0x00e8 },
243   { 0x007e, 0x00ec },
244   { 0, 0},
245 
246   { 0x5a, 0},		/* Z: Spanish */
247   { 0x0023, 0x00a3 },
248   { 0x0040, 0x00a7 },
249   { 0x005b, 0x00a1 },
250   { 0x005c, 0x00d1 },
251   { 0x005d, 0x00bf },
252   { 0x007b, 0x00b0 },
253   { 0x007c, 0x00f1 },
254   { 0x007d, 0x00e7 },
255   { 0, 0},
256 
257   { 0xe2, 0},		/* 96-b: ISO-8859-15 */
258   { 0x00a4, 0x20ac },
259   { 0x00a6, 0x0160 },
260   { 0x00a8, 0x0161 },
261   { 0x00b4, 0x017D },
262   { 0x00b8, 0x017E },
263   { 0x00bc, 0x0152 },
264   { 0x00bd, 0x0153 },
265   { 0x00be, 0x0178 },
266   { 0, 0},
267 
268   { 0x4a, 0},		/* J: JIS 0201 Roman */
269   { 0x005c, 0x00a5 },
270   { 0x007e, 0x203e },
271   { 0, 0},
272 
273   { 0x49, 0},		/* I: halfwidth katakana */
274   { 0x0021, 0xff61 },
275   { 0x005f|0x8000, 0xff9f },
276   { 0, 0},
277 
278   { 0, 0}
279 };
280 
281 struct recodetab
282 {
283   unsigned short (*tab)[2];
284   int flags;
285 };
286 
287 #define RECODETAB_ALLOCED	1
288 #define RECODETAB_BUILTIN	2
289 #define RECODETAB_TRIED		4
290 
291 static struct recodetab recodetabs[256];
292 
293 void
InitBuiltinTabs()294 InitBuiltinTabs()
295 {
296   unsigned short (*p)[2];
297   for (p = builtin_tabs; (*p)[0]; p++)
298     {
299       recodetabs[(*p)[0]].flags = RECODETAB_BUILTIN;
300       recodetabs[(*p)[0]].tab = p + 1;
301       p++;
302       while((*p)[0])
303 	p++;
304     }
305 }
306 
307 static int
recode_char(c,to_utf,font)308 recode_char(c, to_utf, font)
309 int c, to_utf, font;
310 {
311   int f;
312   unsigned short (*p)[2];
313 
314   if (to_utf)
315     {
316       if (c < 256)
317 	return c;
318       f = (c >> 8) & 0xff;
319       c &= 0xff;
320       /* map aliases to keep the table small */
321       switch (f)
322 	{
323 	  case 'C':
324 	    f ^= ('C' ^ '5');
325 	    break;
326 	  case 'E':
327 	    f ^= ('E' ^ '6');
328 	    break;
329 	  case 'H':
330 	    f ^= ('H' ^ '7');
331 	    break;
332 	  default:
333 	    break;
334 	}
335       p = recodetabs[f].tab;
336       if (p == 0 && recodetabs[f].flags == 0)
337 	{
338 	  LoadFontTranslation(f, 0);
339           p = recodetabs[f].tab;
340 	}
341       if (p)
342         for (; (*p)[0]; p++)
343 	  {
344 	    if ((p[0][0] & 0x8000) && (c <= (p[0][0] & 0x7fff)) && c >= p[-1][0])
345 	      return c - p[-1][0] + p[-1][1];
346 	    if ((*p)[0] == c)
347 	      return (*p)[1];
348 	  }
349       return c & 0xff;	/* map to latin1 */
350     }
351   if (font == -1)
352     {
353       if (c < 256)
354 	return c;	/* latin1 */
355       for (font = 32; font < 128; font++)
356 	{
357 	  p = recodetabs[font].tab;
358 	  if (p)
359 	    for (; (*p)[1]; p++)
360 	      {
361 		if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])
362 		  return (c - p[-1][1] + p[-1][0]) | (font << 8);
363 	        if ((*p)[1] == c)
364 		  return (*p)[0] | (font << 8);
365 	      }
366 	}
367       return '?';
368     }
369   if (c < 128 && (font & 128) != 0)
370     return c;
371   if (font >= 32)
372     {
373       p = recodetabs[font].tab;
374       if (p == 0 && recodetabs[font].flags == 0)
375 	{
376 	  LoadFontTranslation(font, 0);
377           p = recodetabs[font].tab;
378 	}
379       if (p)
380 	for (; (*p)[1]; p++)
381 	  {
382 	    if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])
383 	      return (c - p[-1][1] + p[-1][0]) | (font & 128 ? 0 : font << 8);
384 	    if ((*p)[1] == c)
385 	      return (*p)[0] | (font & 128 ? 0 : font << 8);
386 	  }
387     }
388   return -1;
389 }
390 
391 
392 #ifdef DW_CHARS
393 static int
recode_char_dw(c,c2p,to_utf,font)394 recode_char_dw(c, c2p, to_utf, font)
395 int c, *c2p, to_utf, font;
396 {
397   int f;
398   unsigned short (*p)[2];
399 
400   if (to_utf)
401     {
402       f = (c >> 8) & 0xff;
403       c = (c & 255) << 8 | (*c2p & 255);
404       *c2p = 0xffff;
405       p = recodetabs[f].tab;
406       if (p == 0 && recodetabs[f].flags == 0)
407 	{
408 	  LoadFontTranslation(f, 0);
409           p = recodetabs[f].tab;
410 	}
411       if (p)
412         for (; (*p)[0]; p++)
413 	  if ((*p)[0] == c)
414 	    {
415 #ifdef DW_CHARS
416 	      if (!utf8_isdouble((*p)[1]))
417 		*c2p = ' ';
418 #endif
419 	      return (*p)[1];
420 	    }
421       return UCS_REPL_DW;
422     }
423   if (font == -1)
424     {
425       for (font = 0; font < 030; font++)
426 	{
427 	  p = recodetabs[font].tab;
428 	  if (p)
429 	    for (; (*p)[1]; p++)
430 	      if ((*p)[1] == c)
431 		{
432 		  *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
433 		  return ((*p)[0] >> 8) | font << 8;
434 		}
435 	}
436       *c2p = '?';
437       return '?';
438     }
439   if (font < 32)
440     {
441       p = recodetabs[font].tab;
442       if (p == 0 && recodetabs[font].flags == 0)
443 	{
444 	  LoadFontTranslation(font, 0);
445           p = recodetabs[font].tab;
446 	}
447       if (p)
448 	for (; (*p)[1]; p++)
449 	  if ((*p)[1] == c)
450 	    {
451 	      *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
452 	      return ((*p)[0] >> 8) | font << 8;
453 	    }
454     }
455   return -1;
456 }
457 #endif
458 
459 static int
recode_char_to_encoding(c,encoding)460 recode_char_to_encoding(c, encoding)
461 int c, encoding;
462 {
463   char *fp;
464   int x;
465 
466   if (encoding == UTF8)
467     return recode_char(c, 1, -1);
468   if ((fp = encodings[encoding].fontlist) != 0)
469     while(*fp)
470       if ((x = recode_char(c, 0, (unsigned char)*fp++)) != -1)
471         return x;
472   if (encodings[encoding].deffont)
473     if ((x = recode_char(c, 0, encodings[encoding].deffont)) != -1)
474       return x;
475   return recode_char(c, 0, -1);
476 }
477 
478 #ifdef DW_CHARS
479 static int
recode_char_dw_to_encoding(c,c2p,encoding)480 recode_char_dw_to_encoding(c, c2p, encoding)
481 int c, *c2p, encoding;
482 {
483   char *fp;
484   int x;
485 
486   if (encoding == UTF8)
487     return recode_char_dw(c, c2p, 1, -1);
488   if ((fp = encodings[encoding].fontlist) != 0)
489     while(*fp)
490       if ((x = recode_char_dw(c, c2p, 0, (unsigned char)*fp++)) != -1)
491         return x;
492   if (encodings[encoding].deffont)
493     if ((x = recode_char_dw(c, c2p, 0, encodings[encoding].deffont)) != -1)
494       return x;
495   return recode_char_dw(c, c2p, 0, -1);
496 }
497 #endif
498 
499 
500 struct mchar *
recode_mchar(mc,from,to)501 recode_mchar(mc, from, to)
502 struct mchar *mc;
503 int from, to;
504 {
505   static struct mchar rmc;
506   int c;
507 
508   debug3("recode_mchar %02x from %d to %d\n", mc->image, from, to);
509   if (from == to || (from != UTF8 && to != UTF8))
510     return mc;
511   rmc = *mc;
512   if (rmc.font == 0 && from != UTF8)
513     rmc.font = encodings[from].deffont;
514   if (rmc.font == 0)	/* latin1 is the same in unicode */
515     return mc;
516   c = rmc.image | (rmc.font << 8);
517   if (from == UTF8)
518     c |= rmc.fontx << 16;
519 #ifdef DW_CHARS
520   if (rmc.mbcs)
521     {
522       int c2 = rmc.mbcs;
523       c = recode_char_dw_to_encoding(c, &c2, to);
524       rmc.mbcs = c2;
525     }
526   else
527 #endif
528     c = recode_char_to_encoding(c, to);
529   rmc.image = c & 255;
530   rmc.font = c >> 8 & 255;
531   if (to == UTF8)
532     rmc.fontx = c >> 16 & 255;
533   return &rmc;
534 }
535 
536 struct mline *
recode_mline(ml,w,from,to)537 recode_mline(ml, w, from, to)
538 struct mline *ml;
539 int w;
540 int from, to;
541 {
542   static int maxlen;
543   static int last;
544   static struct mline rml[2], *rl;
545   int i, c;
546 
547   if (from == to || (from != UTF8 && to != UTF8) || w == 0)
548     return ml;
549   if (ml->font == null && ml->fontx == null && encodings[from].deffont == 0)
550     return ml;
551   if (w > maxlen)
552     {
553       for (i = 0; i < 2; i++)
554 	{
555 	  if (rml[i].image == 0)
556 	    rml[i].image = malloc(w);
557 	  else
558 	    rml[i].image = realloc(rml[i].image, w);
559 	  if (rml[i].font == 0)
560 	    rml[i].font = malloc(w);
561 	  else
562 	    rml[i].font = realloc(rml[i].font, w);
563 	  if (rml[i].fontx == 0)
564 	    rml[i].fontx = malloc(w);
565 	  else
566 	    rml[i].fontx = realloc(rml[i].fontx, w);
567 	  if (rml[i].image == 0 || rml[i].font == 0 || rml[i].fontx == 0)
568 	    {
569 	      maxlen = 0;
570 	      return ml;	/* sorry */
571 	    }
572 	}
573       maxlen = w;
574     }
575 
576   debug("recode_mline: from\n");
577   for (i = 0; i < w; i++)
578     debug1("%c", "0123456789abcdef"[(ml->image[i] >> 4) & 15]);
579   debug("\n");
580   for (i = 0; i < w; i++)
581     debug1("%c", "0123456789abcdef"[(ml->image[i]     ) & 15]);
582   debug("\n");
583   for (i = 0; i < w; i++)
584     debug1("%c", "0123456789abcdef"[(ml->font[i] >> 4) & 15]);
585   debug("\n");
586   for (i = 0; i < w; i++)
587     debug1("%c", "0123456789abcdef"[(ml->font[i]     ) & 15]);
588   debug("\n");
589   for (i = 0; i < w; i++)
590     debug1("%c", "0123456789abcdef"[(ml->fontx[i] >> 4) & 15]);
591   debug("\n");
592   for (i = 0; i < w; i++)
593     debug1("%c", "0123456789abcdef"[(ml->fontx[i]     ) & 15]);
594   debug("\n");
595 
596   rl = rml + last;
597   rl->attr = ml->attr;
598 #ifdef COLOR
599   rl->color = ml->color;
600 # ifdef COLORS256
601   rl->colorx = ml->colorx;
602 # endif
603 #endif
604   for (i = 0; i < w; i++)
605     {
606       c = ml->image[i] | (ml->font[i] << 8);
607       if (from == UTF8)
608 	c |= ml->fontx[i] << 16;
609       if (from != UTF8 && c < 256)
610 	c |= encodings[from].deffont << 8;
611 #ifdef DW_CHARS
612       if ((from != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (from == UTF8 && utf8_isdouble(c)))
613 	{
614 	  if (i + 1 == w)
615 	    c = '?';
616 	  else
617 	    {
618 	      int c2;
619 	      i++;
620 	      c2 = ml->image[i] | (ml->font[i] << 8);
621 	      c = recode_char_dw_to_encoding(c, &c2, to);
622 	      if (to == UTF8)
623 	        rl->fontx[i - 1]  = c >> 16 & 255;
624 	      rl->font[i - 1]  = c >> 8 & 255;
625 	      rl->image[i - 1] = c      & 255;
626 	      c = c2;
627 	    }
628 	}
629       else
630 #endif
631         c = recode_char_to_encoding(c, to);
632       rl->image[i] = c & 255;
633       rl->font[i] = c >> 8 & 255;
634       if (to == UTF8)
635         rl->fontx[i] = c >> 16 & 255;
636     }
637   last ^= 1;
638   debug("recode_mline: to\n");
639   for (i = 0; i < w; i++)
640     debug1("%c", "0123456789abcdef"[(rl->image[i] >> 4) & 15]);
641   debug("\n");
642   for (i = 0; i < w; i++)
643     debug1("%c", "0123456789abcdef"[(rl->image[i]     ) & 15]);
644   debug("\n");
645   for (i = 0; i < w; i++)
646     debug1("%c", "0123456789abcdef"[(rl->font[i] >> 4) & 15]);
647   debug("\n");
648   for (i = 0; i < w; i++)
649     debug1("%c", "0123456789abcdef"[(rl->font[i]     ) & 15]);
650   debug("\n");
651   for (i = 0; i < w; i++)
652     debug1("%c", "0123456789abcdef"[(rl->fontx[i] >> 4) & 15]);
653   debug("\n");
654   for (i = 0; i < w; i++)
655     debug1("%c", "0123456789abcdef"[(rl->fontx[i]     ) & 15]);
656   debug("\n");
657   return rl;
658 }
659 
660 struct combchar {
661   unsigned int c1;
662   unsigned int c2;
663   unsigned int next;
664   unsigned int prev;
665 };
666 struct combchar **combchars;
667 
668 void
AddUtf8(c)669 AddUtf8(c)
670 int c;
671 {
672   ASSERT(D_encoding == UTF8);
673   if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])
674     {
675       AddUtf8(combchars[c - 0xd800]->c1);
676       c = combchars[c - 0xd800]->c2;
677     }
678   if (c >= 0x10000)
679     {
680       if (c >= 0x200000)
681 	{
682 	  AddChar((c & 0x3000000) >> 12 ^ 0xf8);
683 	  c = (c & 0xffffff) ^ ((0xf0 ^ 0x80) << 18);
684 	}
685       AddChar((c & 0x1fc0000) >> 18 ^ 0xf0);
686       c = (c & 0x3ffff) ^ ((0xe0 ^ 0x80) << 12);
687     }
688   if (c >= 0x800)
689     {
690       AddChar((c & 0x7f000) >> 12 ^ 0xe0);
691       c = (c & 0x0fff) ^ ((0xc0 ^ 0x80) << 6);
692     }
693   if (c >= 0x80)
694     {
695       AddChar((c & 0x1fc0) >> 6 ^ 0xc0);
696       c = (c & 0x3f) | 0x80;
697     }
698   AddChar(c);
699 }
700 
701 int
ToUtf8_comb(p,c)702 ToUtf8_comb(p, c)
703 char *p;
704 int c;
705 {
706   int l;
707 
708   if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])
709     {
710       l = ToUtf8_comb(p, combchars[c - 0xd800]->c1);
711       return l + ToUtf8(p ? p + l : 0, combchars[c - 0xd800]->c2);
712     }
713   return ToUtf8(p, c);
714 }
715 
716 int
ToUtf8(p,c)717 ToUtf8(p, c)
718 char *p;
719 int c;
720 {
721   int l = 1;
722   if (c >= 0x10000)
723     {
724       if (c >= 0x200000)
725 	{
726 	  if (p)
727 	    *p++ = (c & 0x3000000) >> 12 ^ 0xf8;
728 	  l++;
729 	  c = (c & 0xffffff) ^ ((0xf0 ^ 0x80) << 18);
730 	}
731       if (p)
732         *p++ = (c & 0x1fc0000) >> 18 ^ 0xf0;
733       l++;
734       c = (c & 0x3ffff) ^ ((0xe0 ^ 0x80) << 12);
735     }
736   if (c >= 0x800)
737     {
738       if (p)
739 	*p++ = (c & 0x7f000) >> 12 ^ 0xe0;
740       l++;
741       c = (c & 0x0fff) | 0x1000;
742     }
743   if (c >= 0x80)
744     {
745       if (p)
746 	*p++ = (c & 0x1fc0) >> 6 ^ 0xc0;
747       l++;
748       c = (c & 0x3f) | 0x80;
749     }
750   if (p)
751     *p++ = c;
752   return l;
753 }
754 
755 /*
756  * returns:
757  * -1: need more bytes, sequence not finished
758  * -2: corrupt sequence found, redo last char
759  * >= 0: decoded character
760  */
761 int
FromUtf8(c,utf8charp)762 FromUtf8(c, utf8charp)
763 int c, *utf8charp;
764 {
765   int utf8char = *utf8charp;
766   if (utf8char)
767     {
768       if ((c & 0xc0) != 0x80)
769 	{
770 	  *utf8charp = 0;
771 	  return -2; /* corrupt sequence! */
772 	}
773       else
774 	c = (c & 0x3f) | (utf8char << 6);
775       if (!(utf8char & 0x40000000))
776 	{
777 	  /* check for overlong sequences */
778 	  if ((c & 0x820823e0) == 0x80000000)
779 	    c = 0xfdffffff;
780 	  else if ((c & 0x020821f0) == 0x02000000)
781 	    c = 0xfff7ffff;
782 	  else if ((c & 0x000820f8) == 0x00080000)
783 	    c = 0xffffd000;
784 	  else if ((c & 0x0000207c) == 0x00002000)
785 	    c = 0xffffff70;
786 	}
787     }
788   else
789     {
790       /* new sequence */
791       if (c >= 0xfe)
792 	c = UCS_REPL;
793       else if (c >= 0xfc)
794 	c = (c & 0x01) | 0xbffffffc;	/* 5 bytes to follow */
795       else if (c >= 0xf8)
796 	c = (c & 0x03) | 0xbfffff00;	/* 4 */
797       else if (c >= 0xf0)
798 	c = (c & 0x07) | 0xbfffc000;	/* 3 */
799       else if (c >= 0xe0)
800 	c = (c & 0x0f) | 0xbff00000;	/* 2 */
801       else if (c >= 0xc2)
802 	c = (c & 0x1f) | 0xfc000000;	/* 1 */
803       else if (c >= 0xc0)
804 	c = 0xfdffffff;		/* overlong */
805       else if (c >= 0x80)
806 	c = UCS_REPL;
807     }
808   *utf8charp = utf8char = (c & 0x80000000) ? c : 0;
809   if (utf8char)
810     return -1;
811 #if 0
812   if (c & 0xffff0000)
813     c = UCS_REPL;	/* sorry, only know 16bit Unicode */
814 #else
815   if (c & 0xff800000)
816     c = UCS_REPL;	/* sorry, only know 23bit Unicode */
817 #endif
818   if (c >= 0xd800 && (c <= 0xdfff || c == 0xfffe || c == 0xffff))
819     c = UCS_REPL;	/* illegal code */
820   return c;
821 }
822 
823 
824 void
WinSwitchEncoding(p,encoding)825 WinSwitchEncoding(p, encoding)
826 struct win *p;
827 int encoding;
828 {
829   int i, j, c;
830   struct mline *ml;
831   struct display *d;
832   struct canvas *cv;
833   struct layer *oldflayer;
834 
835   if ((p->w_encoding == UTF8) == (encoding == UTF8))
836     {
837       p->w_encoding = encoding;
838       return;
839     }
840   oldflayer = flayer;
841   for (d = displays; d; d = d->d_next)
842     for (cv = d->d_cvlist; cv; cv = cv->c_next)
843       if (p == Layer2Window(cv->c_layer))
844 	{
845 	  flayer = cv->c_layer;
846 	  while(flayer->l_next)
847 	    {
848 	      if (oldflayer == flayer)
849 		oldflayer = flayer->l_next;
850 	      ExitOverlayPage();
851 	    }
852 	}
853   flayer = oldflayer;
854   for (j = 0; j < p->w_height + p->w_histheight; j++)
855     {
856 #ifdef COPY_PASTE
857       ml = j < p->w_height ? &p->w_mlines[j] : &p->w_hlines[j - p->w_height];
858 #else
859       ml = &p->w_mlines[j];
860 #endif
861       if (ml->font == null && ml->fontx == 0 && encodings[p->w_encoding].deffont == 0)
862 	continue;
863       for (i = 0; i < p->w_width; i++)
864 	{
865 	  c = ml->image[i] | (ml->font[i] << 8);
866 	  if (p->w_encoding == UTF8)
867 	    c |= ml->fontx[i] << 16;
868 	  if (p->w_encoding != UTF8 && c < 256)
869 	    c |= encodings[p->w_encoding].deffont << 8;
870 	  if (c < 256)
871 	    continue;
872 	  if (ml->font == null)
873 	    {
874 	      if ((ml->font = (unsigned char *)calloc(p->w_width + 1, 1)) == 0)
875 		{
876 		  ml->font = null;
877 		  break;
878 		}
879 	    }
880 #ifdef DW_CHARS
881 	  if ((p->w_encoding != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (p->w_encoding == UTF8 && utf8_isdouble(c)))
882 	    {
883 	      if (i + 1 == p->w_width)
884 		c = '?';
885 	      else
886 		{
887 		  int c2;
888 		  i++;
889 		  c2 = ml->image[i] | (ml->font[i] << 8) | (ml->fontx[i] << 16);
890 		  c = recode_char_dw_to_encoding(c, &c2, encoding);
891 		  if (encoding == UTF8)
892 		    {
893 		      if (c > 0x10000 && ml->fontx == null)
894 			{
895 			  if ((ml->fontx = (unsigned char *)calloc(p->w_width + 1, 1)) == 0)
896 			    {
897 			      ml->fontx = null;
898 			      break;
899 			    }
900 			}
901 		      ml->fontx[i - 1]  = c >> 16 & 255;
902 		    }
903 		  else
904 		    ml->fontx = null;
905 		  ml->font[i - 1]  = c >> 8 & 255;
906 		  ml->image[i - 1] = c      & 255;
907 		  c = c2;
908 		}
909 	    }
910 	  else
911 #endif
912 	    c = recode_char_to_encoding(c, encoding);
913 	  ml->image[i] = c & 255;
914 	  ml->font[i] = c >> 8 & 255;
915 	  if (encoding == UTF8)
916 	    {
917 	      if (c > 0x10000 && ml->fontx == null)
918 		{
919 		  if ((ml->fontx = (unsigned char *)calloc(p->w_width + 1, 1)) == 0)
920 		    {
921 		      ml->fontx = null;
922 		      break;
923 		    }
924 		}
925 	      ml->fontx[i]  = c >> 16 & 255;
926 	    }
927 	  else
928 	    ml->fontx = null;
929 	}
930     }
931   p->w_encoding = encoding;
932   return;
933 }
934 
935 #ifdef DW_CHARS
936 struct interval {
937   int first;
938   int last;
939 };
940 
941 /* auxiliary function for binary search in interval table */
bisearch(int ucs,const struct interval * table,int max)942 static int bisearch(int ucs, const struct interval *table, int max) {
943   int min = 0;
944   int mid;
945 
946   if (ucs < table[0].first || ucs > table[max].last)
947     return 0;
948   while (max >= min) {
949     mid = (min + max) / 2;
950     if (ucs > table[mid].last)
951       min = mid + 1;
952     else if (ucs < table[mid].first)
953       max = mid - 1;
954     else
955       return 1;
956   }
957 
958   return 0;
959 }
960 
961 int
utf8_isdouble(c)962 utf8_isdouble(c)
963 int c;
964 {
965   /* A sorted list of intervals of ambiguous width characters generated by
966    * https://github.com/GNOME/glib/blob/glib-2-50/glib/gen-unicode-tables.pl */
967   static const struct interval ambiguous[] = {
968     {0x00A1, 0x00A1},
969     {0x00A4, 0x00A4},
970     {0x00A7, 0x00A8},
971     {0x00AA, 0x00AA},
972     {0x00AD, 0x00AE},
973     {0x00B0, 0x00B4},
974     {0x00B6, 0x00BA},
975     {0x00BC, 0x00BF},
976     {0x00C6, 0x00C6},
977     {0x00D0, 0x00D0},
978     {0x00D7, 0x00D8},
979     {0x00DE, 0x00E1},
980     {0x00E6, 0x00E6},
981     {0x00E8, 0x00EA},
982     {0x00EC, 0x00ED},
983     {0x00F0, 0x00F0},
984     {0x00F2, 0x00F3},
985     {0x00F7, 0x00FA},
986     {0x00FC, 0x00FC},
987     {0x00FE, 0x00FE},
988     {0x0101, 0x0101},
989     {0x0111, 0x0111},
990     {0x0113, 0x0113},
991     {0x011B, 0x011B},
992     {0x0126, 0x0127},
993     {0x012B, 0x012B},
994     {0x0131, 0x0133},
995     {0x0138, 0x0138},
996     {0x013F, 0x0142},
997     {0x0144, 0x0144},
998     {0x0148, 0x014B},
999     {0x014D, 0x014D},
1000     {0x0152, 0x0153},
1001     {0x0166, 0x0167},
1002     {0x016B, 0x016B},
1003     {0x01CE, 0x01CE},
1004     {0x01D0, 0x01D0},
1005     {0x01D2, 0x01D2},
1006     {0x01D4, 0x01D4},
1007     {0x01D6, 0x01D6},
1008     {0x01D8, 0x01D8},
1009     {0x01DA, 0x01DA},
1010     {0x01DC, 0x01DC},
1011     {0x0251, 0x0251},
1012     {0x0261, 0x0261},
1013     {0x02C4, 0x02C4},
1014     {0x02C7, 0x02C7},
1015     {0x02C9, 0x02CB},
1016     {0x02CD, 0x02CD},
1017     {0x02D0, 0x02D0},
1018     {0x02D8, 0x02DB},
1019     {0x02DD, 0x02DD},
1020     {0x02DF, 0x02DF},
1021     {0x0300, 0x036F},
1022     {0x0391, 0x03A1},
1023     {0x03A3, 0x03A9},
1024     {0x03B1, 0x03C1},
1025     {0x03C3, 0x03C9},
1026     {0x0401, 0x0401},
1027     {0x0410, 0x044F},
1028     {0x0451, 0x0451},
1029     {0x2010, 0x2010},
1030     {0x2013, 0x2016},
1031     {0x2018, 0x2019},
1032     {0x201C, 0x201D},
1033     {0x2020, 0x2022},
1034     {0x2024, 0x2027},
1035     {0x2030, 0x2030},
1036     {0x2032, 0x2033},
1037     {0x2035, 0x2035},
1038     {0x203B, 0x203B},
1039     {0x203E, 0x203E},
1040     {0x2074, 0x2074},
1041     {0x207F, 0x207F},
1042     {0x2081, 0x2084},
1043     {0x20AC, 0x20AC},
1044     {0x2103, 0x2103},
1045     {0x2105, 0x2105},
1046     {0x2109, 0x2109},
1047     {0x2113, 0x2113},
1048     {0x2116, 0x2116},
1049     {0x2121, 0x2122},
1050     {0x2126, 0x2126},
1051     {0x212B, 0x212B},
1052     {0x2153, 0x2154},
1053     {0x215B, 0x215E},
1054     {0x2160, 0x216B},
1055     {0x2170, 0x2179},
1056     {0x2189, 0x2189},
1057     {0x2190, 0x2199},
1058     {0x21B8, 0x21B9},
1059     {0x21D2, 0x21D2},
1060     {0x21D4, 0x21D4},
1061     {0x21E7, 0x21E7},
1062     {0x2200, 0x2200},
1063     {0x2202, 0x2203},
1064     {0x2207, 0x2208},
1065     {0x220B, 0x220B},
1066     {0x220F, 0x220F},
1067     {0x2211, 0x2211},
1068     {0x2215, 0x2215},
1069     {0x221A, 0x221A},
1070     {0x221D, 0x2220},
1071     {0x2223, 0x2223},
1072     {0x2225, 0x2225},
1073     {0x2227, 0x222C},
1074     {0x222E, 0x222E},
1075     {0x2234, 0x2237},
1076     {0x223C, 0x223D},
1077     {0x2248, 0x2248},
1078     {0x224C, 0x224C},
1079     {0x2252, 0x2252},
1080     {0x2260, 0x2261},
1081     {0x2264, 0x2267},
1082     {0x226A, 0x226B},
1083     {0x226E, 0x226F},
1084     {0x2282, 0x2283},
1085     {0x2286, 0x2287},
1086     {0x2295, 0x2295},
1087     {0x2299, 0x2299},
1088     {0x22A5, 0x22A5},
1089     {0x22BF, 0x22BF},
1090     {0x2312, 0x2312},
1091     {0x2460, 0x24E9},
1092     {0x24EB, 0x254B},
1093     {0x2550, 0x2573},
1094     {0x2580, 0x258F},
1095     {0x2592, 0x2595},
1096     {0x25A0, 0x25A1},
1097     {0x25A3, 0x25A9},
1098     {0x25B2, 0x25B3},
1099     {0x25B6, 0x25B7},
1100     {0x25BC, 0x25BD},
1101     {0x25C0, 0x25C1},
1102     {0x25C6, 0x25C8},
1103     {0x25CB, 0x25CB},
1104     {0x25CE, 0x25D1},
1105     {0x25E2, 0x25E5},
1106     {0x25EF, 0x25EF},
1107     {0x2605, 0x2606},
1108     {0x2609, 0x2609},
1109     {0x260E, 0x260F},
1110     {0x261C, 0x261C},
1111     {0x261E, 0x261E},
1112     {0x2640, 0x2640},
1113     {0x2642, 0x2642},
1114     {0x2660, 0x2661},
1115     {0x2663, 0x2665},
1116     {0x2667, 0x266A},
1117     {0x266C, 0x266D},
1118     {0x266F, 0x266F},
1119     {0x269E, 0x269F},
1120     {0x26BF, 0x26BF},
1121     {0x26C6, 0x26CD},
1122     {0x26CF, 0x26D3},
1123     {0x26D5, 0x26E1},
1124     {0x26E3, 0x26E3},
1125     {0x26E8, 0x26E9},
1126     {0x26EB, 0x26F1},
1127     {0x26F4, 0x26F4},
1128     {0x26F6, 0x26F9},
1129     {0x26FB, 0x26FC},
1130     {0x26FE, 0x26FF},
1131     {0x273D, 0x273D},
1132     {0x2776, 0x277F},
1133     {0x2B56, 0x2B59},
1134     {0x3248, 0x324F},
1135     {0xE000, 0xF8FF},
1136     {0xFE00, 0xFE0F},
1137     {0xFFFD, 0xFFFD},
1138     {0x1F100, 0x1F10A},
1139     {0x1F110, 0x1F12D},
1140     {0x1F130, 0x1F169},
1141     {0x1F170, 0x1F18D},
1142     {0x1F18F, 0x1F190},
1143     {0x1F19B, 0x1F1AC},
1144     {0xE0100, 0xE01EF},
1145     {0xF0000, 0xFFFFD},
1146     {0x100000, 0x10FFFD},
1147   };
1148   /* A sorted list of intervals of double width characters generated by
1149    * https://github.com/GNOME/glib/blob/glib-2-50/glib/gen-unicode-tables.pl */
1150   static const struct interval wide[] = {
1151     {0x1100, 0x115F},
1152     {0x231A, 0x231B},
1153     {0x2329, 0x232A},
1154     {0x23E9, 0x23EC},
1155     {0x23F0, 0x23F0},
1156     {0x23F3, 0x23F3},
1157     {0x25FD, 0x25FE},
1158     {0x2614, 0x2615},
1159     {0x2648, 0x2653},
1160     {0x267F, 0x267F},
1161     {0x2693, 0x2693},
1162     {0x26A1, 0x26A1},
1163     {0x26AA, 0x26AB},
1164     {0x26BD, 0x26BE},
1165     {0x26C4, 0x26C5},
1166     {0x26CE, 0x26CE},
1167     {0x26D4, 0x26D4},
1168     {0x26EA, 0x26EA},
1169     {0x26F2, 0x26F3},
1170     {0x26F5, 0x26F5},
1171     {0x26FA, 0x26FA},
1172     {0x26FD, 0x26FD},
1173     {0x2705, 0x2705},
1174     {0x270A, 0x270B},
1175     {0x2728, 0x2728},
1176     {0x274C, 0x274C},
1177     {0x274E, 0x274E},
1178     {0x2753, 0x2755},
1179     {0x2757, 0x2757},
1180     {0x2795, 0x2797},
1181     {0x27B0, 0x27B0},
1182     {0x27BF, 0x27BF},
1183     {0x2B1B, 0x2B1C},
1184     {0x2B50, 0x2B50},
1185     {0x2B55, 0x2B55},
1186     {0x2E80, 0x2E99},
1187     {0x2E9B, 0x2EF3},
1188     {0x2F00, 0x2FD5},
1189     {0x2FF0, 0x2FFB},
1190     {0x3000, 0x303E},
1191     {0x3041, 0x3096},
1192     {0x3099, 0x30FF},
1193     {0x3105, 0x312F},
1194     {0x3131, 0x318E},
1195     {0x3190, 0x31BA},
1196     {0x31C0, 0x31E3},
1197     {0x31F0, 0x321E},
1198     {0x3220, 0x3247},
1199     {0x3250, 0x4DBF},
1200     {0x4E00, 0xA48C},
1201     {0xA490, 0xA4C6},
1202     {0xA960, 0xA97C},
1203     {0xAC00, 0xD7A3},
1204     {0xF900, 0xFAFF},
1205     {0xFE10, 0xFE19},
1206     {0xFE30, 0xFE52},
1207     {0xFE54, 0xFE66},
1208     {0xFE68, 0xFE6B},
1209     {0xFF01, 0xFF60},
1210     {0xFFE0, 0xFFE6},
1211     {0x16FE0, 0x16FE3},
1212     {0x17000, 0x187F7},
1213     {0x18800, 0x18AF2},
1214     {0x1B000, 0x1B11E},
1215     {0x1B150, 0x1B152},
1216     {0x1B164, 0x1B167},
1217     {0x1B170, 0x1B2FB},
1218     {0x1F004, 0x1F004},
1219     {0x1F0CF, 0x1F0CF},
1220     {0x1F18E, 0x1F18E},
1221     {0x1F191, 0x1F19A},
1222     {0x1F200, 0x1F202},
1223     {0x1F210, 0x1F23B},
1224     {0x1F240, 0x1F248},
1225     {0x1F250, 0x1F251},
1226     {0x1F260, 0x1F265},
1227     {0x1F300, 0x1F320},
1228     {0x1F32D, 0x1F335},
1229     {0x1F337, 0x1F37C},
1230     {0x1F37E, 0x1F393},
1231     {0x1F3A0, 0x1F3CA},
1232     {0x1F3CF, 0x1F3D3},
1233     {0x1F3E0, 0x1F3F0},
1234     {0x1F3F4, 0x1F3F4},
1235     {0x1F3F8, 0x1F43E},
1236     {0x1F440, 0x1F440},
1237     {0x1F442, 0x1F4FC},
1238     {0x1F4FF, 0x1F53D},
1239     {0x1F54B, 0x1F54E},
1240     {0x1F550, 0x1F567},
1241     {0x1F57A, 0x1F57A},
1242     {0x1F595, 0x1F596},
1243     {0x1F5A4, 0x1F5A4},
1244     {0x1F5FB, 0x1F64F},
1245     {0x1F680, 0x1F6C5},
1246     {0x1F6CC, 0x1F6CC},
1247     {0x1F6D0, 0x1F6D2},
1248     {0x1F6D5, 0x1F6D5},
1249     {0x1F6EB, 0x1F6EC},
1250     {0x1F6F4, 0x1F6FA},
1251     {0x1F7E0, 0x1F7EB},
1252     {0x1F90D, 0x1F971},
1253     {0x1F973, 0x1F976},
1254     {0x1F97A, 0x1F9A2},
1255     {0x1F9A5, 0x1F9AA},
1256     {0x1F9AE, 0x1F9CA},
1257     {0x1F9CD, 0x1F9FF},
1258     {0x1FA70, 0x1FA73},
1259     {0x1FA78, 0x1FA7A},
1260     {0x1FA80, 0x1FA82},
1261     {0x1FA90, 0x1FA95},
1262     {0x20000, 0x2FFFD},
1263     {0x30000, 0x3FFFD},
1264   };
1265 
1266   if (c >= 0xdf00 && c <= 0xdfff)
1267 	return 1;	/* dw comining sequence */
1268   return ((bisearch(c, wide, sizeof(wide) / sizeof(struct interval) - 1)) ||
1269           (cjkwidth &&
1270            bisearch(c, ambiguous,
1271 	            sizeof(ambiguous) / sizeof(struct interval) - 1)));
1272 }
1273 #endif
1274 
1275 int
utf8_iscomb(c)1276 utf8_iscomb(c)
1277 int c;
1278 {
1279   /* taken from Markus Kuhn's wcwidth */
1280   static const struct interval combining[] = {
1281     { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
1282     { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
1283     { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
1284     { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
1285     { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
1286     { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
1287     { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
1288     { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
1289     { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
1290     { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
1291     { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
1292     { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
1293     { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
1294     { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
1295     { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
1296     { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
1297     { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
1298     { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
1299     { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
1300     { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
1301     { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
1302     { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
1303     { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
1304     { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
1305     { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
1306     { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
1307     { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
1308     { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
1309     { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
1310     { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
1311     { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
1312     { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
1313     { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
1314     { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
1315     { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
1316     { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
1317     { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
1318     { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
1319     { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
1320     { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
1321     { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
1322     { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
1323     { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
1324     { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
1325     { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
1326     { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
1327     { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
1328     { 0xE0100, 0xE01EF }
1329   };
1330 
1331   return bisearch(c, combining, sizeof(combining) / sizeof(struct interval) - 1);
1332 }
1333 
1334 static void
comb_tofront(i)1335 comb_tofront( i)
1336 int i;
1337 {
1338   for (;;)
1339     {
1340       int root = i >= 0x700 ? 0x801 : 0x800;
1341       debug1("bring to front: %x\n", i);
1342       combchars[combchars[i]->prev]->next = combchars[i]->next;
1343       combchars[combchars[i]->next]->prev = combchars[i]->prev;
1344       combchars[i]->next = combchars[root]->next;
1345       combchars[i]->prev = root;
1346       combchars[combchars[root]->next]->prev = i;
1347       combchars[root]->next = i;
1348       i = combchars[i]->c1;
1349       if (i < 0xd800 || i >= 0xe000)
1350 	return;
1351       i -= 0xd800;
1352     }
1353 }
1354 
1355 void
utf8_handle_comb(c,mc)1356 utf8_handle_comb(c, mc)
1357 int c;
1358 struct mchar *mc;
1359 {
1360   int root, i, c1;
1361   int isdouble;
1362 
1363   c1 = mc->image | (mc->font << 8) | mc->fontx << 16;
1364   isdouble = c1 >= 0x1100 && utf8_isdouble(c1);
1365   if (!combchars)
1366     {
1367       combchars = (struct combchar **)calloc(0x802, sizeof(struct combchar *));
1368       if (!combchars)
1369 	return;
1370       combchars[0x800] = (struct combchar *)malloc(sizeof(struct combchar));
1371       combchars[0x801] = (struct combchar *)malloc(sizeof(struct combchar));
1372       if (!combchars[0x800] || !combchars[0x801])
1373 	{
1374 	  if (combchars[0x800])
1375 	    free(combchars[0x800]);
1376 	  if (combchars[0x801])
1377 	    free(combchars[0x801]);
1378 	  free(combchars);
1379 	  return;
1380 	}
1381       combchars[0x800]->c1 = 0x000;
1382       combchars[0x800]->c2 = 0x700;
1383       combchars[0x800]->next = 0x800;
1384       combchars[0x800]->prev = 0x800;
1385       combchars[0x801]->c1 = 0x700;
1386       combchars[0x801]->c2 = 0x800;
1387       combchars[0x801]->next = 0x801;
1388       combchars[0x801]->prev = 0x801;
1389     }
1390   root = isdouble ? 0x801 : 0x800;
1391   for (i = combchars[root]->c1; i < combchars[root]->c2; i++)
1392     {
1393       if (!combchars[i])
1394 	break;
1395       if (combchars[i]->c1 == c1 && combchars[i]->c2 == c)
1396 	break;
1397     }
1398   if (i == combchars[root]->c2)
1399     {
1400       /* full, recycle old entry */
1401       if (c1 >= 0xd800 && c1 < 0xe000)
1402         comb_tofront(c1 - 0xd800);
1403       i = combchars[root]->prev;
1404       if (i == 0x800 || i == 0x801 || c1 == i + 0xd800)
1405 	{
1406 	  /* completely full, can't recycle */
1407 	  debug("utf8_handle_comp: completely full!\n");
1408 	  mc->image = '?';
1409 	  mc->font  = 0;
1410 	  return;
1411 	}
1412       /* FIXME: delete old char from all buffers */
1413     }
1414   else if (!combchars[i])
1415     {
1416       combchars[i] = (struct combchar *)malloc(sizeof(struct combchar));
1417       if (!combchars[i])
1418 	return;
1419       combchars[i]->prev = i;
1420       combchars[i]->next = i;
1421     }
1422   combchars[i]->c1 = c1;
1423   combchars[i]->c2 = c;
1424   mc->image = i & 0xff;
1425   mc->font  = (i >> 8) + 0xd8;
1426   mc->fontx = 0;
1427   debug3("combinig char %x %x -> %x\n", c1, c, i + 0xd800);
1428   comb_tofront(i);
1429 }
1430 
1431 #else /* !UTF8 */
1432 
1433 void
WinSwitchEncoding(p,encoding)1434 WinSwitchEncoding(p, encoding)
1435 struct win *p;
1436 int encoding;
1437 {
1438   p->w_encoding = encoding;
1439   return;
1440 }
1441 
1442 #endif /* UTF8 */
1443 
1444 static int
encmatch(s1,s2)1445 encmatch(s1, s2)
1446 char *s1;
1447 char *s2;
1448 {
1449   int c1, c2;
1450   do
1451     {
1452       c1 = (unsigned char)*s1;
1453       if (c1 >= 'A' && c1 <= 'Z')
1454 	c1 += 'a' - 'A';
1455       if (!(c1 >= 'a' && c1 <= 'z') && !(c1 >= '0' && c1 <= '9'))
1456 	{
1457 	  s1++;
1458 	  continue;
1459 	}
1460       c2 = (unsigned char)*s2;
1461       if (c2 >= 'A' && c2 <= 'Z')
1462 	c2 += 'a' - 'A';
1463       if (!(c2 >= 'a' && c2 <= 'z') && !(c2 >= '0' && c2 <= '9'))
1464 	{
1465 	  s2++;
1466 	  continue;
1467 	}
1468       if (c1 != c2)
1469 	return 0;
1470       s1++;
1471       s2++;
1472     }
1473   while(c1);
1474   return 1;
1475 }
1476 
1477 int
FindEncoding(name)1478 FindEncoding(name)
1479 char *name;
1480 {
1481   int encoding;
1482 
1483   debug1("FindEncoding %s\n", name);
1484   if (name == 0 || *name == 0)
1485     return 0;
1486   if (encmatch(name, "euc"))
1487     name = "eucJP";
1488   if (encmatch(name, "off") || encmatch(name, "iso8859-1"))
1489     return 0;
1490 #ifndef UTF8
1491   if (encmatch(name, "UTF-8"))
1492     return -1;
1493 #endif
1494   for (encoding = 0; encoding < (int)(sizeof(encodings)/sizeof(*encodings)); encoding++)
1495     if (encmatch(name, encodings[encoding].name))
1496       {
1497 #ifdef UTF8
1498 	LoadFontTranslationsForEncoding(encoding);
1499 #endif
1500         return encoding;
1501       }
1502   return -1;
1503 }
1504 
1505 char *
EncodingName(encoding)1506 EncodingName(encoding)
1507 int encoding;
1508 {
1509   if (encoding >= (int)(sizeof(encodings)/sizeof(*encodings)))
1510     return 0;
1511   return encodings[encoding].name;
1512 }
1513 
1514 int
EncodingDefFont(encoding)1515 EncodingDefFont(encoding)
1516 int encoding;
1517 {
1518   return encodings[encoding].deffont;
1519 }
1520 
1521 void
ResetEncoding(p)1522 ResetEncoding(p)
1523 struct win *p;
1524 {
1525   char *c;
1526   int encoding = p->w_encoding;
1527 
1528   c = encodings[encoding].charsets;
1529   if (c)
1530     SetCharsets(p, c);
1531 #ifdef UTF8
1532   LoadFontTranslationsForEncoding(encoding);
1533 #endif
1534   if (encodings[encoding].usegr)
1535     {
1536       p->w_gr = 2;
1537       p->w_FontE = encodings[encoding].charsets[1];
1538     }
1539   else
1540     p->w_FontE = 0;
1541   if (encodings[encoding].noc1)
1542     p->w_c1 = 0;
1543 }
1544 
1545 /* decoded char: 32-bit <fontx><font><c2><c>
1546  * fontx: non-bmp utf8
1547  * c2: multi-byte character
1548  * font is always zero for utf8
1549  * returns: -1 need more bytes
1550  *          -2 decode error
1551  */
1552 
1553 
1554 int
DecodeChar(c,encoding,statep)1555 DecodeChar(c, encoding, statep)
1556 int c;
1557 int encoding;
1558 int *statep;
1559 {
1560   int t;
1561 
1562   debug2("Decoding char %02x for encoding %d\n", c, encoding);
1563 #ifdef UTF8
1564   if (encoding == UTF8)
1565     {
1566       c = FromUtf8(c, statep);
1567       if (c >= 0x10000)
1568 	c = (c & 0x7f0000) << 8 | (c & 0xffff);
1569       return c;
1570     }
1571 #endif
1572   if (encoding == SJIS)
1573     {
1574       if (!*statep)
1575 	{
1576 	  if ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef))
1577 	    {
1578 	      *statep = c;
1579 	      return -1;
1580 	    }
1581 	  if (c < 0x80)
1582 	    return c;
1583 	  return c | (KANA << 16);
1584 	}
1585       t = c;
1586       c = *statep;
1587       *statep = 0;
1588       if (0x40 <= t && t <= 0xfc && t != 0x7f)
1589 	{
1590 	  if (c <= 0x9f)
1591 	    c = (c - 0x81) * 2 + 0x21;
1592 	  else
1593 	    c = (c - 0xc1) * 2 + 0x21;
1594 	  if (t <= 0x7e)
1595 	    t -= 0x1f;
1596 	  else if (t <= 0x9e)
1597 	    t -= 0x20;
1598 	  else
1599 	     t -= 0x7e, c++;
1600 	  return (c << 8) | t | (KANJI << 16);
1601 	}
1602       return t;
1603     }
1604   if (encoding == EUC_JP || encoding == EUC_KR || encoding == EUC_CN)
1605     {
1606       if (!*statep)
1607 	{
1608 	  if (c & 0x80)
1609 	    {
1610 	      *statep = c;
1611 	      return -1;
1612 	    }
1613 	  return c;
1614 	}
1615       t = c;
1616       c = *statep;
1617       *statep = 0;
1618       if (encoding == EUC_JP)
1619 	{
1620 	  if (c == 0x8e)
1621 	    return t | (KANA << 16);
1622 	  if (c == 0x8f)
1623 	    {
1624 	      *statep = t | (KANJI0212 << 8);
1625 	      return -1;
1626 	    }
1627 	}
1628       c &= 0xff7f;
1629       t &= 0x7f;
1630       c = c << 8 | t;
1631       if (encoding == EUC_KR)
1632 	return c | (3 << 16);
1633       if (encoding == EUC_CN)
1634 	return c | (1 << 16);
1635       if (c & (KANJI0212 << 16))
1636         return c;
1637       else
1638         return c | (KANJI << 16);
1639     }
1640   if (encoding == BIG5 || encoding == GBK)
1641     {
1642       if (!*statep)
1643 	{
1644 	  if (c & 0x80)
1645 	    {
1646 	      if (encoding == GBK && c == 0x80)
1647 		return 0xa4 | (('b'|0x80) << 16);
1648 	      *statep = c;
1649 	      return -1;
1650 	    }
1651 	  return c;
1652 	}
1653       t = c;
1654       c = *statep;
1655       *statep = 0;
1656       c &= 0x7f;
1657       return c << 8 | t | (encoding == BIG5  ? 030 << 16 : 031 << 16);
1658     }
1659   return c | (encodings[encoding].deffont << 16);
1660 }
1661 
1662 int
EncodeChar(bp,c,encoding,fontp)1663 EncodeChar(bp, c, encoding, fontp)
1664 char *bp;
1665 int c;
1666 int encoding;
1667 int *fontp;
1668 {
1669   int t, f, l;
1670 
1671   debug2("Encoding char %02x for encoding %d\n", c, encoding);
1672   if (c == -1 && fontp)
1673     {
1674       if (*fontp == 0)
1675 	return 0;
1676       if (bp)
1677 	{
1678 	  *bp++ = 033;
1679 	  *bp++ = '(';
1680 	  *bp++ = 'B';
1681 	}
1682       return 3;
1683     }
1684   f = (c >> 16) & 0xff;
1685 
1686 #ifdef UTF8
1687   if (encoding == UTF8)
1688     {
1689       if (f)
1690 	{
1691 # ifdef DW_CHARS
1692 	  if (is_dw_font(f))
1693 	    {
1694 	      int c2 = c & 0xff;
1695 	      c = (c >> 8 & 0xff) | (f << 8);
1696 	      c = recode_char_dw_to_encoding(c, &c2, encoding);
1697 	    }
1698 	  else
1699 # endif
1700 	    {
1701 	      c = (c & 0xff) | (f << 8);
1702 	      c = recode_char_to_encoding(c, encoding);
1703 	    }
1704         }
1705       return ToUtf8(bp, c);
1706     }
1707   if (f == 0 && (c & 0x7f00ff00) != 0)	/* is_utf8? */
1708     {
1709       if (c >= 0x10000)
1710 	c = (c & 0x7f0000) >> 8 | (c & 0xffff);
1711 # ifdef DW_CHARS
1712       if (utf8_isdouble(c))
1713 	{
1714 	  int c2 = 0xffff;
1715 	  c = recode_char_dw_to_encoding(c, &c2, encoding);
1716 	  c = (c << 8) | (c2 & 0xff);
1717 	}
1718       else
1719 # endif
1720 	{
1721 	  c = recode_char_to_encoding(c, encoding);
1722 	  c = ((c & 0xff00) << 8) | (c & 0xff);
1723 	}
1724       debug1("Encode: char mapped from utf8 to %x\n", c);
1725       f = c >> 16;
1726     }
1727 #endif
1728   if (f & 0x80)		/* map special 96-fonts to latin1 */
1729     f = 0;
1730 
1731   if (encoding == SJIS)
1732     {
1733       if (f == KANA)
1734         c = (c & 0xff) | 0x80;
1735       else if (f == KANJI)
1736 	{
1737 	  if (!bp)
1738 	    return 2;
1739 	  t = c & 0xff;
1740 	  c = (c >> 8) & 0xff;
1741 	  t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
1742 	  c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
1743 	  *bp++ = c;
1744 	  *bp++ = t;
1745 	  return 2;
1746 	}
1747     }
1748   if (encoding == EUC)
1749     {
1750       if (f == KANA)
1751 	{
1752 	  if (bp)
1753 	    {
1754 	      *bp++ = 0x8e;
1755 	      *bp++ = c;
1756 	    }
1757 	  return 2;
1758 	}
1759       if (f == KANJI)
1760 	{
1761 	  if (bp)
1762 	    {
1763 	      *bp++ = (c >> 8) | 0x80;
1764 	      *bp++ = c | 0x80;
1765 	    }
1766 	  return 2;
1767 	}
1768       if (f == KANJI0212)
1769 	{
1770 	  if (bp)
1771 	    {
1772 	      *bp++ = 0x8f;
1773 	      *bp++ = c >> 8;
1774 	      *bp++ = c;
1775 	    }
1776 	  return 3;
1777 	}
1778     }
1779   if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
1780     {
1781       if (bp)
1782 	{
1783 	  *bp++ = (c >> 8) | 0x80;
1784 	  *bp++ = c | 0x80;
1785 	}
1786       return 2;
1787     }
1788   if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))
1789     {
1790       if (bp)
1791 	{
1792 	  *bp++ = (c >> 8) | 0x80;
1793 	  *bp++ = c;
1794 	}
1795       return 2;
1796     }
1797   if (encoding == GBK && f == 0 && c == 0xa4)
1798     c = 0x80;
1799 
1800   l = 0;
1801   if (fontp && f != *fontp)
1802     {
1803       *fontp = f;
1804       if (f && f < ' ')
1805 	{
1806 	  if (bp)
1807 	   {
1808 	     *bp++ = 033;
1809 	     *bp++ = '$';
1810 	     if (f > 2)
1811 	       *bp++ = '(';
1812 	     *bp++ = '@' + f;
1813 	   }
1814 	  l += f > 2 ? 4 : 3;
1815 	}
1816       else if (f < 128)
1817 	{
1818 	  if (f == 0)
1819 	    f = 'B';
1820 	  if (bp)
1821 	    {
1822 	      *bp++ = 033;
1823 	      *bp++ = '(';
1824 	      *bp++ = f;
1825 	    }
1826 	  l += 3;
1827 	}
1828     }
1829   if (c & 0xff00)
1830     {
1831       if (bp)
1832 	*bp++ = c >> 8;
1833       l++;
1834     }
1835   if (bp)
1836     *bp++ = c;
1837   return l + 1;
1838 }
1839 
1840 int
CanEncodeFont(encoding,f)1841 CanEncodeFont(encoding, f)
1842 int encoding, f;
1843 {
1844   switch(encoding)
1845     {
1846 #ifdef UTF8
1847     case UTF8:
1848       return 1;
1849 #endif
1850     case SJIS:
1851       return f == KANJI || f == KANA;
1852     case EUC:
1853       return f == KANJI || f == KANA || f == KANJI0212;
1854     case EUC_KR:
1855       return f == 3;
1856     case EUC_CN:
1857       return f == 1;
1858     case BIG5:
1859       return f == 030;
1860     case GBK:
1861       return f == 031;
1862     default:
1863       break;
1864     }
1865   return 0;
1866 }
1867 
1868 #ifdef DW_CHARS
1869 int
PrepareEncodedChar(c)1870 PrepareEncodedChar(c)
1871 int c;
1872 {
1873   int encoding;
1874   int t = 0;
1875   int f;
1876 
1877   encoding = D_encoding;
1878   f = D_rend.font;
1879   t = D_mbcs;
1880   if (encoding == SJIS)
1881     {
1882       if (f == KANA)
1883         return c | 0x80;
1884       else if (f == KANJI)
1885 	{
1886 	  t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
1887 	  c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
1888 	  D_mbcs = t;
1889 	}
1890       return c;
1891     }
1892   if (encoding == EUC)
1893     {
1894       if (f == KANA)
1895 	{
1896 	  AddChar(0x8e);
1897 	  return c | 0x80;
1898 	}
1899       if (f == KANJI)
1900 	{
1901 	  D_mbcs = t | 0x80;
1902 	  return c | 0x80;
1903 	}
1904       if (f == KANJI0212)
1905 	{
1906 	  AddChar(0x8f);
1907 	  D_mbcs = t | 0x80;
1908 	  return c | 0x80;
1909 	}
1910     }
1911   if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
1912     {
1913       D_mbcs = t | 0x80;
1914       return c | 0x80;
1915     }
1916   if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))
1917     return c | 0x80;
1918   return c;
1919 }
1920 #endif
1921 
1922 int
RecodeBuf(fbuf,flen,fenc,tenc,tbuf)1923 RecodeBuf(fbuf, flen, fenc, tenc, tbuf)
1924 unsigned char *fbuf;
1925 int flen;
1926 int fenc, tenc;
1927 unsigned char *tbuf;
1928 {
1929   int c, i, j;
1930   int decstate = 0, font = 0;
1931 
1932   for (i = j = 0; i < flen; i++)
1933     {
1934       c = fbuf[i];
1935       c = DecodeChar(c, fenc, &decstate);
1936       if (c == -2)
1937 	i--;
1938       if (c < 0)
1939 	continue;
1940       j += EncodeChar(tbuf ? (char *)tbuf + j : 0, c, tenc, &font);
1941     }
1942   j += EncodeChar(tbuf ? (char *)tbuf + j : 0, -1, tenc, &font);
1943   return j;
1944 }
1945 
1946 #ifdef UTF8
1947 int
ContainsSpecialDeffont(ml,xs,xe,encoding)1948 ContainsSpecialDeffont(ml, xs, xe, encoding)
1949 struct mline *ml;
1950 int xs, xe;
1951 int encoding;
1952 {
1953   unsigned char *f, *i;
1954   int c, x, dx;
1955 
1956   if (encoding == UTF8 || encodings[encoding].deffont == 0)
1957     return 0;
1958   i = ml->image + xs;
1959   f = ml->font + xs;
1960   dx = xe - xs + 1;
1961   while (dx-- > 0)
1962     {
1963       if (*f++)
1964 	continue;
1965       c = *i++;
1966       x = recode_char_to_encoding(c | (encodings[encoding].deffont << 8), UTF8);
1967       if (c != x)
1968 	{
1969 	  debug2("ContainsSpecialDeffont: yes %02x != %02x\n", c, x);
1970 	  return 1;
1971 	}
1972     }
1973   debug("ContainsSpecialDeffont: no\n");
1974   return 0;
1975 }
1976 
1977 
1978 int
LoadFontTranslation(font,file)1979 LoadFontTranslation(font, file)
1980 int font;
1981 char *file;
1982 {
1983   char buf[1024], *myfile;
1984   FILE *f;
1985   int i;
1986   int fo;
1987   int x, u, c, ok;
1988   unsigned short (*p)[2], (*tab)[2];
1989 
1990   myfile = file;
1991   if (myfile == 0)
1992     {
1993       if (font == 0 || screenencodings == 0)
1994 	return -1;
1995       if (strlen(screenencodings) > sizeof(buf) - 10)
1996 	return -1;
1997       sprintf(buf, "%s/%02x", screenencodings, font & 0xff);
1998       myfile = buf;
1999     }
2000   debug1("LoadFontTranslation: trying %s\n", myfile);
2001   if ((f = secfopen(myfile, "r")) == 0)
2002     return -1;
2003   i = ok = 0;
2004   for (;;)
2005     {
2006       for(; i < 12; i++)
2007 	if (getc(f) != "ScreenI2UTF8"[i])
2008 	  break;
2009       if (getc(f) != 0)		/* format */
2010 	break;
2011       fo = getc(f);		/* id */
2012       if (fo == EOF)
2013 	break;
2014       if (font != -1 && font != fo)
2015 	break;
2016       i = getc(f);
2017       x = getc(f);
2018       if (x == EOF)
2019 	break;
2020       i = i << 8 | x;
2021       getc(f);
2022       while ((x = getc(f)) && x != EOF)
2023 	getc(f); 	/* skip font name (padded to 2 bytes) */
2024       if ((p = malloc(sizeof(*p) * (i + 1))) == 0)
2025 	break;
2026       tab = p;
2027       while(i > 0)
2028 	{
2029 	  x = getc(f);
2030 	  x = x << 8 | getc(f);
2031 	  u = getc(f);
2032 	  c = getc(f);
2033 	  u = u << 8 | c;
2034 	  if (c == EOF)
2035 	    break;
2036 	  (*p)[0] = x;
2037 	  (*p)[1] = u;
2038 	  p++;
2039 	  i--;
2040 	}
2041       (*p)[0] = 0;
2042       (*p)[1] = 0;
2043       if (i || (tab[0][0] & 0x8000))
2044 	{
2045 	  free(tab);
2046 	  break;
2047 	}
2048       if (recodetabs[fo].tab && (recodetabs[fo].flags & RECODETAB_ALLOCED) != 0)
2049 	free(recodetabs[fo].tab);
2050       recodetabs[fo].tab = tab;
2051       recodetabs[fo].flags = RECODETAB_ALLOCED;
2052       debug1("Successful load of recodetab %02x\n", fo);
2053       c = getc(f);
2054       if (c == EOF)
2055 	{
2056 	  ok = 1;
2057 	  break;
2058 	}
2059       if (c != 'S')
2060 	break;
2061       i = 1;
2062     }
2063   fclose(f);
2064   if (font != -1 && file == 0 && recodetabs[font].flags == 0)
2065     recodetabs[font].flags = RECODETAB_TRIED;
2066   return ok ? 0 : -1;
2067 }
2068 
2069 void
LoadFontTranslationsForEncoding(encoding)2070 LoadFontTranslationsForEncoding(encoding)
2071 int encoding;
2072 {
2073   char *c;
2074   int f;
2075 
2076   debug1("LoadFontTranslationsForEncoding: encoding %d\n", encoding);
2077   if ((c = encodings[encoding].fontlist) != 0)
2078     while ((f = (unsigned char)*c++) != 0)
2079       if (recodetabs[f].flags == 0)
2080 	  LoadFontTranslation(f, 0);
2081   f = encodings[encoding].deffont;
2082   if (f > 0 && recodetabs[f].flags == 0)
2083     LoadFontTranslation(f, 0);
2084 }
2085 
2086 #endif /* UTF8 */
2087 
2088 #else /* !ENCODINGS */
2089 
2090 /* Simple version of EncodeChar to encode font changes for
2091  * copy/paste mode
2092  */
2093 int
EncodeChar(bp,c,encoding,fontp)2094 EncodeChar(bp, c, encoding, fontp)
2095 char *bp;
2096 int c;
2097 int encoding;
2098 int *fontp;
2099 {
2100   int f, l;
2101   f = (c == -1) ? 0 : c >> 16;
2102   l = 0;
2103   if (fontp && f != *fontp)
2104     {
2105       *fontp = f;
2106       if (f && f < ' ')
2107 	{
2108 	  if (bp)
2109 	   {
2110 	     *bp++ = 033;
2111 	     *bp++ = '$';
2112 	     if (f > 2)
2113 	       *bp++ = '(';
2114 	     *bp++ = '@' + f;
2115 	   }
2116 	  l += f > 2 ? 4 : 3;
2117 	}
2118       else if (f < 128)
2119 	{
2120 	  if (f == 0)
2121 	    f = 'B';
2122 	  if (bp)
2123 	    {
2124 	      *bp++ = 033;
2125 	      *bp++ = '(';
2126 	      *bp++ = f;
2127 	    }
2128 	  l += 3;
2129 	}
2130     }
2131   if (c == -1)
2132     return l;
2133   if (c & 0xff00)
2134     {
2135       if (bp)
2136 	*bp++ = c >> 8;
2137       l++;
2138     }
2139   if (bp)
2140     *bp++ = c;
2141   return l + 1;
2142 }
2143 
2144 #endif /* ENCODINGS */
2145