1 /*
2     $Id: unicode.c 2551 2021-03-20 01:14:37Z soci $
3 
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License along
15     with this program; if not, write to the Free Software Foundation, Inc.,
16     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 
18 */
19 #include "unicode.h"
20 #include "wchar.h"
21 #include "wctype.h"
22 #include <ctype.h>
23 #include <string.h>
24 #include "error.h"
25 #include "unicodedata.h"
26 #include "str.h"
27 #include "console.h"
28 
29 enum { U_CASEFOLD = 1, U_COMPAT = 2 };
30 
utf8in(const uint8_t * c,uchar_t * out)31 FAST_CALL unsigned int utf8in(const uint8_t *c, uchar_t *out) { /* only for internal use with validated utf-8! */
32     unsigned int i, j;
33     uchar_t ch = c[0];
34 
35     if (ch < 0xe0) {
36         *out = (ch << 6) ^ c[1] ^ 0x3080;
37         return 2;
38     }
39     if (ch < 0xf0) {
40         ch ^= 0xe0;i = 3;
41     } else if (ch < 0xf8) {
42         ch ^= 0xf0;i = 4;
43     } else if (ch < 0xfc) {
44         ch ^= 0xf8;i = 5;
45     } else {
46         ch ^= 0xfc;i = 6;
47     }
48 
49     for (j = 1;j < i; j++) {
50         ch = (ch << 6) ^ c[j] ^ 0x80;
51     }
52     *out = ch;
53     return i;
54 }
55 
utf8out(uchar_t i,uint8_t * c)56 FAST_CALL unsigned int utf8out(uchar_t i, uint8_t *c) {
57     if (i < 0x800) {
58         c[0] = (uint8_t)(0xc0 | (i >> 6));
59         c[1] = (uint8_t)(0x80 | (i & 0x3f));
60         return 2;
61     }
62     if (i < 0x10000) {
63         c[0] = (uint8_t)(0xe0 | (i >> 12));
64         c[1] = (uint8_t)(0x80 | ((i >> 6) & 0x3f));
65         c[2] = (uint8_t)(0x80 | (i & 0x3f));
66         return 3;
67     }
68     if (i < 0x200000) {
69         c[0] = (uint8_t)(0xf0 | (i >> 18));
70         c[1] = (uint8_t)(0x80 | ((i >> 12) & 0x3f));
71         c[2] = (uint8_t)(0x80 | ((i >> 6) & 0x3f));
72         c[3] = (uint8_t)(0x80 | (i & 0x3f));
73         return 4;
74     }
75     if (i < 0x4000000) {
76         c[0] = (uint8_t)(0xf8 | (i >> 24));
77         c[1] = (uint8_t)(0x80 | ((i >> 18) & 0x3f));
78         c[2] = (uint8_t)(0x80 | ((i >> 12) & 0x3f));
79         c[3] = (uint8_t)(0x80 | ((i >> 6) & 0x3f));
80         c[4] = (uint8_t)(0x80 | (i & 0x3f));
81         return 5;
82     }
83     if ((i & ~(uchar_t)0x7fffffff) != 0) return 0;
84     c[0] = (uint8_t)(0xfc | (i >> 30));
85     c[1] = (uint8_t)(0x80 | ((i >> 24) & 0x3f));
86     c[2] = (uint8_t)(0x80 | ((i >> 18) & 0x3f));
87     c[3] = (uint8_t)(0x80 | ((i >> 12) & 0x3f));
88     c[4] = (uint8_t)(0x80 | ((i >> 6) & 0x3f));
89     c[5] = (uint8_t)(0x80 | (i & 0x3f));
90     return 6;
91 }
92 
utf8outlen(uchar_t i)93 static inline unsigned int utf8outlen(uchar_t i) {
94     if (i < 0x800) return 2;
95     if (i < 0x10000) return 3;
96     if (i < 0x200000) return 4;
97     if (i < 0x4000000) return 5;
98     return 6;
99 }
100 
extend_ubuff(struct ubuff_s * d)101 MUST_CHECK bool extend_ubuff(struct ubuff_s *d) {
102     uint32_t len = d->len + 16;
103     uchar_t *data;
104     if (len < 16 || ((size_t)len + 0) > SIZE_MAX / sizeof *data) return true;
105     data = (uchar_t *)realloc(d->data, len * sizeof *data);
106     if (data == NULL) return true;
107     d->data = data;
108     d->len = len;
109     return false;
110 }
111 
udecompose(uchar_t ch,struct ubuff_s * d,int options)112 static MUST_CHECK bool udecompose(uchar_t ch, struct ubuff_s *d, int options) {
113     const struct properties_s *prop;
114     if (ch >= 0xac00 && ch <= 0xd7a3) {
115         uchar_t ht, hs = ch - 0xac00;
116         if (d->p + 3 > d->len && extend_ubuff(d)) return true;
117         d->data[d->p++] = 0x1100 + hs / 588;
118         d->data[d->p++] = 0x1161 + (hs % 588) / 28;
119         ht = hs % 28;
120         if (ht != 0) {
121             d->data[d->p++] = 0x11a7 + ht;
122         }
123         return false;
124     }
125     prop = uget_property(ch);
126     if ((options & U_CASEFOLD) != 0 && prop->casefold != 0) {
127         if (prop->casefold > 0) {
128             if (d->p >= d->len && extend_ubuff(d)) return true;
129             d->data[d->p++] = (uint16_t)prop->casefold;
130             return false;
131         }
132         if (prop->casefold > -16384) {
133             const int16_t *p;
134             for (p = &usequences[-prop->casefold];; p++) {
135                 if (d->p >= d->len && extend_ubuff(d)) return true;
136                 d->data[d->p++] = (uint16_t)abs(*p);
137                 if (*p < 0) return false;
138             }
139         } else {
140             const int32_t *p;
141             for (p = &usequences2[-prop->casefold - 16384];; p++) {
142                 if (d->p >= d->len && extend_ubuff(d)) return true;
143                 d->data[d->p++] = (uint32_t)abs(*p);
144                 if (*p < 0) return false;
145             }
146         }
147     }
148     if (prop->decompose != 0) {
149         if ((prop->property & pr_compat) == 0 || (options & U_COMPAT) != 0) {
150             if (prop->decompose > 0) {
151                 return udecompose((uint16_t)prop->decompose, d, options);
152             }
153             if (prop->decompose > -16384) {
154                 const int16_t *p;
155                 for (p = &usequences[-prop->decompose];; p++) {
156                     uchar_t ch2 = (uint16_t)abs(*p);
157                     if (ch2 < 0x80 || (uint16_t)(ch2 - 0x300) < 0x40U) {
158                         if (d->p >= d->len && extend_ubuff(d)) return true;
159                         d->data[d->p++] = ch2;
160                     } else if (udecompose(ch2, d, options)) return true;
161                     if (*p < 0) return false;
162                 }
163             } else {
164                 const int32_t *p;
165                 for (p = &usequences2[-prop->decompose - 16384];; p++) {
166                     if (udecompose((uint32_t)abs(*p), d, options)) return true;
167                     if (*p < 0) return false;
168                 }
169             }
170         }
171     }
172     if (d->p >= d->len && extend_ubuff(d)) return true;
173     d->data[d->p++] = ch;
174     return false;
175 }
176 
unormalize(struct ubuff_s * d)177 static void unormalize(struct ubuff_s *d) {
178     uint32_t pos, max;
179     if (d->p < 2) return;
180     pos = 0;
181     max = d->p - 1;
182     while (pos < max) {
183         uchar_t ch2 = d->data[pos + 1];
184         if (ch2 >= 0x300) {
185             uint8_t cc2 = uget_property(ch2)->combclass;
186             if (cc2 != 0) {
187                 uchar_t ch1 = d->data[pos];
188                 uint8_t cc1 = uget_property(ch1)->combclass;
189                 if (cc1 > cc2) {
190                     d->data[pos] = ch2;
191                     d->data[pos + 1] = ch1;
192                     if (pos != 0) {
193                         pos--;
194                         continue;
195                     }
196                 }
197             }
198         }
199         pos++;
200     }
201 }
202 
ucompose(const struct ubuff_s * buff,struct ubuff_s * d)203 static MUST_CHECK bool ucompose(const struct ubuff_s *buff, struct ubuff_s *d) {
204     const struct properties_s *prop, *sprop = NULL;
205     uchar_t ch;
206     int mclass = -1;
207     uint32_t i, sp = ~(uint32_t)0;
208     d->p = 0;
209     for (i = 0; i < buff->p; i++) {
210         ch = buff->data[i];
211         prop = uget_property(ch);
212         if (sp != ~(uint32_t)0 && prop->combclass > mclass) {
213             uchar_t sc = d->data[sp];
214             if (sc >= 0xac00) {
215                 uchar_t hs = sc - 0xac00;
216                 if (hs < 588*19 && (hs % 28) == 0) {
217                     if (ch >= 0x11a7 && ch < 0x11a7 + 28) {
218                         d->data[sp] = sc + ch - 0x11a7;
219                         sprop = NULL;
220                         continue;
221                     }
222                 }
223             } else if (sc >= 0x1100 && sc < 0x1100 + 19 && ch >= 0x1161 && ch < 0x1161 + 21) {
224                 d->data[sp] = 0xac00 + (ch - 0x1161 + (sc - 0x1100) * 21) * 28;
225                 sprop = NULL;
226                 continue;
227             }
228             if (sprop == NULL) sprop = uget_property(sc);
229             if (sprop->base >= 0 && prop->diar >= 0) {
230                 int16_t comp = ucomposing[sprop->base + prop->diar];
231                 if (comp != 0) {
232                     d->data[sp] = (comp > 0) ? (uint16_t)comp : ucomposed[-comp];
233                     sprop = NULL;
234                     continue;
235                 }
236             }
237         }
238         if (prop->combclass != 0) {
239             if (prop->combclass > mclass) {
240                 mclass = prop->combclass;
241             }
242         } else {
243             sp = d->p;
244             sprop = prop;
245             mclass = -1;
246         }
247         if (d->p >= d->len && extend_ubuff(d)) return true;
248         d->data[d->p++] = ch;
249     }
250     return false;
251 }
252 
unfc(struct ubuff_s * b)253 MUST_CHECK bool unfc(struct ubuff_s *b) {
254     uint32_t i;
255     static struct ubuff_s dbuf;
256     if (b == NULL) {
257         free(dbuf.data);
258         return false;
259     }
260     for (dbuf.p = i = 0; i < b->p; i++) {
261         if (udecompose(b->data[i], &dbuf, 0)) return true;
262     }
263     unormalize(&dbuf);
264     return ucompose(&dbuf, b);
265 }
266 
unfkc(str_t * s1,const str_t * s2,int mode)267 MUST_CHECK bool unfkc(str_t *s1, const str_t *s2, int mode) {
268     const uint8_t *d;
269     uint8_t *s;
270     size_t i, l;
271     uint32_t j;
272     static struct ubuff_s dbuf, dbuf2;
273     if (s2 == NULL) {
274         free(dbuf.data);
275         free(dbuf2.data);
276         return false;
277     }
278     mode = ((mode != 0) ? U_CASEFOLD : 0) | U_COMPAT;
279     d = s2->data;
280     dbuf.p = 0;
281     for (i = 0; i < s2->len;) {
282         uchar_t ch = d[i];
283         if ((ch & 0x80) != 0) {
284             i += utf8in(d + i, &ch);
285             if (udecompose(ch, &dbuf, mode)) return true;
286             continue;
287         }
288         if ((mode & U_CASEFOLD) != 0 && ch >= 'A' && ch <= 'Z') ch |= 0x20;
289         if (dbuf.p >= dbuf.len && extend_ubuff(&dbuf)) return true;
290         dbuf.data[dbuf.p++] = ch;
291         i++;
292     }
293     unormalize(&dbuf);
294     if (ucompose(&dbuf, &dbuf2)) return true;
295     l = 0;
296     for (j = 0; j < dbuf2.p; j++) {
297         uchar_t ch = dbuf2.data[j];
298         l += (ch != 0 && ch < 0x80) ? 1 : utf8outlen(ch);
299     }
300     s = (uint8_t *)s1->data;
301     if (l > s1->len) {
302         s = (uint8_t *)realloc(s, l);
303         if (s == NULL) return true;
304         s1->data = s;
305     }
306     s1->len = l;
307     for (j = 0; j < dbuf2.p; j++) {
308         uchar_t ch = dbuf2.data[j];
309         if (ch != 0 && ch < 0x80) {
310             *s++ = (uint8_t)ch;
311             continue;
312         }
313         s += utf8out(ch, s);
314     }
315     return false;
316 }
317 
argv_print(const char * line,FILE * f)318 size_t argv_print(const char *line, FILE *f) {
319     size_t len = 0;
320 #ifdef _WIN32
321     size_t i = 0, back;
322     bool quote = false, space = false;
323 
324     for (;;i++) {
325         switch (line[i]) {
326         case '%':
327         case '"': quote = true; if (!space) continue; break;
328         case ' ': space = true; if (!quote) continue; break;
329         case 0: break;
330         default: continue;
331         }
332         break;
333     }
334 
335     if (space) {
336         if (quote) {len++;putc('^', f);}
337         len++;putc('"', f);
338     }
339     i = 0; back = 0;
340     for (;;) {
341         int ch = line[i];
342         if ((ch & 0x80) != 0) {
343             uchar_t ch2 = (uint8_t)ch;
344             unsigned int ln = utf8in((const uint8_t *)line + i, &ch2);
345             if (iswprint((wint_t)ch2) != 0) {
346                 int ln2;
347                 char tmp[64];
348                 memcpy(tmp, line + i, ln);
349                 tmp[ln] = 0;
350                 ln2 = fwprintf(f, L"%S", tmp);
351                 if (ln2 > 0) {
352                     i += ln;
353                     len += (unsigned int)ln2;
354                     continue;
355                 }
356             }
357             i += ln;
358             len++;putc('?', f);
359             continue;
360         }
361         if (ch == 0) break;
362 
363         if (ch == '\\') {
364             back++;
365             i++;
366             len++;putc('\\', f);
367             continue;
368         }
369         if (!space || quote) {
370             if (strchr("()%!^<>&|\"", ch) != NULL) {
371                 if (ch == '"') {
372                     while ((back--) != 0) {len++;putc('\\', f);}
373                     len++;putc('\\', f);
374                 }
375                 len++;putc('^', f);
376             }
377         } else {
378             if (ch == '%') {
379                 len++;putc('^', f);
380             }
381         }
382         back = 0;
383 
384         i++;
385         if (isprint(ch) == 0) ch = '?';
386         len++;putc(ch, f);
387     }
388     if (space) {
389         while ((back--) != 0) {len++;putc('\\', f);}
390         if (quote) {len++;putc('^', f);}
391         len++;putc('"', f);
392     }
393 #else
394     size_t i;
395     bool quote = false;
396 
397     for (i = 0;line[i] != 0;i++) {
398         if (line[i] == '!') break;
399         if (strchr(" \"$&()*;<>'?[\\]`{|}", line[i]) != NULL) quote = true;
400     }
401     if (line[i] != 0) quote = false;
402     if (quote) {len++;putc('"', f);}
403     else {
404         switch (line[0]) {
405         case '~':
406         case '#': len++;putc('\\', f); break;
407         }
408     }
409     i = 0;
410     for (;;) {
411         int ch = line[i];
412         if ((ch & 0x80) != 0) {
413             uchar_t ch2 = (uint8_t)ch;
414             int ln2;
415             i += utf8in((const uint8_t *)line + i, &ch2);
416             if (iswprint((wint_t)ch2) != 0) {
417                 mbstate_t ps;
418                 char temp[64];
419                 size_t ln;
420                 memset(&ps, 0, sizeof ps);
421                 ln = wcrtomb(temp, (wchar_t)ch2, &ps);
422                 if (ln != (size_t)-1) {
423                     len += fwrite(temp, ln, 1, f);
424                     continue;
425                 }
426             }
427             ln2 = fprintf(f, ch2 < 0x10000 ? "$'\\u%" PRIx32 "'" : "$'\\U%" PRIx32 "'", ch2);
428             if (ln2 > 0) len += (size_t)ln2;
429             continue;
430         }
431         if (ch == 0) break;
432 
433         if (quote) {
434             if (strchr("$`\"\\", ch) != NULL) {len++;putc('\\', f);}
435         } else {
436             if (strchr(" !\"$&()*;<>'?[\\]`{|}", ch) != NULL) {
437                 len++;putc('\\', f);
438             }
439         }
440 
441         i++;
442         if (isprint(ch) == 0) {
443             int ln = fprintf(f, "$'\\x%x'", ch);
444             if (ln > 0) len += (size_t)ln;
445             continue;
446         }
447         len++;putc(ch, f);
448     }
449     if (quote) {len++;putc('"', f);}
450 #endif
451     return len;
452 }
453 
makefile_print(const char * line,FILE * f)454 size_t makefile_print(const char *line, FILE *f) {
455     size_t len = 0, i = 0, bl = 0;
456 
457     for (;;) {
458         int ch = line[i];
459         if ((ch & 0x80) != 0) {
460             uchar_t ch2 = (uint8_t)ch;
461 #ifdef _WIN32
462             unsigned int ln = utf8in((const uint8_t *)line + i, &ch2);
463             if (iswprint((wint_t)ch2) != 0) {
464                 int ln2;
465                 char tmp[64];
466                 memcpy(tmp, line + i, ln);
467                 tmp[ln] = 0;
468                 ln2 = fwprintf(f, L"%S", tmp);
469                 if (ln2 > 0) {
470                     i += ln;
471                     len += (unsigned int)ln2;
472                     bl = 0;
473                     continue;
474                 }
475             }
476             i += ln;
477 #else
478             i += utf8in((const uint8_t *)line + i, &ch2);
479             if (iswprint((wint_t)ch2) != 0) {
480                 mbstate_t ps;
481                 char temp[64];
482                 size_t ln;
483                 memset(&ps, 0, sizeof ps);
484                 ln = wcrtomb(temp, (wchar_t)ch2, &ps);
485                 if (ln != (size_t)-1) {
486                     len += fwrite(temp, ln, 1, f);
487                     bl = 0;
488                     continue;
489                 }
490             }
491 #endif
492             len++;putc('?', f);
493             bl = 0;
494             continue;
495         }
496         if (ch == 0) break;
497 
498         switch (ch) {
499         case '\\':
500             bl++;
501             break;
502         case ' ':
503         case '#':
504             while (bl > 0) {
505                 len++; putc('\\', f);
506                 bl--;
507             }
508             putc('\\', f);
509             break;
510         case '$':
511             len++; putc('$', f);
512             /* fall through */
513         default:
514             bl = 0;
515             break;
516         }
517 
518         i++;
519         if (isprint(ch) == 0) ch = '?';
520         len++; putc(ch, f);
521     }
522     return len;
523 }
524 
unknown_print(FILE * f,uchar_t ch)525 static int unknown_print(FILE *f, uchar_t ch) {
526     char temp[64];
527     const char *format = (ch >= 256) ? "<U+%" PRIX32 ">" : "<%02" PRIX32 ">";
528     if (f != NULL) {
529         int ln;
530         if (console_use_color) console_reverse(f);
531         ln = fprintf(f, format, ch);
532         if (console_use_color) {
533             if (console_use_bold) console_defaultbold(f); else console_default(f);
534         }
535         return ln;
536     }
537     return sprintf(temp, format, ch);
538 }
539 
printable_print(const uint8_t * line,FILE * f)540 void printable_print(const uint8_t *line, FILE *f) {
541 #ifdef _WIN32
542     size_t i = 0, l = 0;
543     for (;;) {
544         uchar_t ch = line[i];
545         if ((ch >= 0x20 && ch <= 0x7e) || ch == 0x09) {
546             i++;
547             continue;
548         }
549         if (l != i) fwrite(line + l, 1, i - l, f);
550         if (ch == 0) break;
551         if ((ch & 0x80) != 0) {
552             unsigned int ln = utf8in(line + i, &ch);
553             if (iswprint((wint_t)ch) != 0) {
554                 char tmp[64];
555                 memcpy(tmp, line + i, ln);
556                 tmp[ln] = 0;
557                 if (fwprintf(f, L"%S", tmp) > 0) {
558                     i += ln;
559                     l = i;
560                     continue;
561                 }
562             }
563             i += ln;
564         } else i++;
565         l = i;
566         unknown_print(f, ch);
567     }
568 #else
569     size_t i = 0, l = 0;
570     for (;;) {
571         uchar_t ch = line[i];
572         if ((ch >= 0x20 && ch <= 0x7e) || ch == 0x09) {
573             i++;
574             continue;
575         }
576         if (l != i) fwrite(line + l, 1, i - l, f);
577         if (ch == 0) break;
578         if ((ch & 0x80) != 0) {
579             i += utf8in(line + i, &ch);
580             if (iswprint((wint_t)ch) != 0) {
581                 mbstate_t ps;
582                 char temp[64];
583                 size_t ln;
584                 memset(&ps, 0, sizeof ps);
585                 ln = wcrtomb(temp, (wchar_t)ch, &ps);
586                 if (ln != (size_t)-1) {
587                     fwrite(temp, ln, 1, f);
588                     l = i;
589                     continue;
590                 }
591             }
592         } else i++;
593         unknown_print(f, ch);
594         l = i;
595     }
596 #endif
597 }
598 
printable_print2(const uint8_t * line,FILE * f,size_t max)599 size_t printable_print2(const uint8_t *line, FILE *f, size_t max) {
600 #ifdef _WIN32
601     size_t i, l = 0, len = 0;
602     int err;
603     for (i = 0; i < max;) {
604         uchar_t ch = line[i];
605         if ((ch & 0x80) != 0) {
606             unsigned int ln;
607             if (l != i) len += fwrite(line + l, 1, i - l, f);
608             ln = utf8in(line + i, &ch);
609             if (iswprint((wint_t)ch) != 0) {
610                 char tmp[64];
611                 memcpy(tmp, line + i, ln);
612                 tmp[ln] = 0;
613                 if (fwprintf(f, L"%S", tmp) >= 0) {
614                     i += ln;
615                     l = i;
616                     len++;
617                     continue;
618                 }
619             }
620             i += ln;
621             l = i;
622             err = unknown_print(f, ch);
623             if (err > 0) len += (size_t)err;
624             continue;
625         }
626         if ((ch < 0x20 && ch != 0x09) || ch > 0x7e) {
627             if (l != i) len += fwrite(line + l, 1, i - l, f);
628             i++;
629             l = i;
630             err = unknown_print(f, ch);
631             if (err > 0) len += (size_t)err;
632             continue;
633         }
634         i++;
635     }
636     if (i != l) len += fwrite(line + l, 1, i - l, f);
637     return len;
638 #else
639     size_t i, l = 0, len = 0;
640     int err;
641     for (i = 0; i < max;) {
642         uchar_t ch = line[i];
643         if ((ch & 0x80) != 0) {
644             if (l != i) len += fwrite(line + l, 1, i - l, f);
645             i += utf8in(line + i, &ch);
646             l = i;
647             if (iswprint((wint_t)ch) != 0) {
648                 mbstate_t ps;
649                 char temp[64];
650                 size_t ln;
651                 memset(&ps, 0, sizeof ps);
652                 ln = wcrtomb(temp, (wchar_t)ch, &ps);
653                 if (ln != (size_t)-1) {
654                     len += fwrite(temp, ln, 1, f); /* 1 character */
655                     continue;
656                 }
657             }
658             err = unknown_print(f, ch);
659             if (err > 0) len += (size_t)err;
660             continue;
661         }
662         if ((ch < 0x20 && ch != 0x09) || ch > 0x7e) {
663             if (l != i) len += fwrite(line + l, 1, i - l, f);
664             i++;
665             l = i;
666             err = unknown_print(f, ch);
667             if (err > 0) len += (size_t)err;
668             continue;
669         }
670         i++;
671     }
672     if (i != l) len += fwrite(line + l, 1, i - l, f);
673     return len;
674 #endif
675 }
676 
caret_print(const uint8_t * line,FILE * f,size_t max)677 void caret_print(const uint8_t *line, FILE *f, size_t max) {
678     size_t i, l = 0;
679     int err;
680     for (i = 0; i < max;) {
681         uchar_t ch = line[i];
682         if ((ch & 0x80) != 0) {
683 #ifdef _WIN32
684             unsigned int ln = utf8in(line + i, &ch);
685             if (iswprint((wint_t)ch) != 0) {
686                 char tmp[64];
687                 wchar_t tmp2[64];
688                 memcpy(tmp, line + i, ln);
689                 tmp[ln] = 0;
690                 if (swprintf(tmp2, lenof(tmp2), L"%S", tmp) > 0) {
691                     int width = wcwidth_v9(ch);
692                     if (width > 0) l += (unsigned int)width;
693                     i += ln;
694                     continue;
695                 }
696             }
697             i += ln;
698 #else
699             i += utf8in(line + i, &ch);
700             if (iswprint((wint_t)ch) != 0) {
701                 char temp[64];
702                 mbstate_t ps;
703                 memset(&ps, 0, sizeof ps);
704                 if (wcrtomb(temp, (wchar_t)ch, &ps) != (size_t)-1) {
705                     int width = wcwidth_v9(ch);
706                     if (width > 0) l += (unsigned int)width;
707                     continue;
708                 }
709             }
710 #endif
711             err = unknown_print(NULL, ch);
712             if (err > 0) l += (size_t)err;
713             continue;
714         }
715         if (ch == 0) break;
716         if (ch == '\t') {
717             while (l != 0) {
718                 putc(' ', f);
719                 l--;
720             }
721             putc('\t', f);
722             i++;
723             continue;
724         }
725         if (ch < 0x20 || ch > 0x7e) {
726             err = unknown_print(NULL, ch);
727             if (err > 0) l += (size_t)err;
728         } else l++;
729         i++;
730     }
731     while (l != 0) {
732         putc(' ', f);
733         l--;
734     }
735 }
736 
calcpos(const uint8_t * line,size_t pos)737 size_t calcpos(const uint8_t *line, size_t pos) {
738     size_t s, l;
739     s = l = 0;
740     while (s < pos) {
741         if (line[s] == 0) return l;
742         s += utf8len(line[s]);
743         l++;
744     }
745     return l;
746 }
747