1 /* $OpenBSD: utf8.c,v 1.65 2024/05/24 12:41:24 nicm Exp $ */
2
3 /*
4 * Copyright (c) 2008 Nicholas Marriott <nicholas.marriott@gmail.com>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/types.h>
20
21 #include <ctype.h>
22 #include <errno.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <vis.h>
26
27 #include "tmux.h"
28
29 static const wchar_t utf8_force_wide[] = {
30 0x0261D,
31 0x026F9,
32 0x0270A,
33 0x0270B,
34 0x0270C,
35 0x0270D,
36 0x1F1E6,
37 0x1F1E7,
38 0x1F1E8,
39 0x1F1E9,
40 0x1F1EA,
41 0x1F1EB,
42 0x1F1EC,
43 0x1F1ED,
44 0x1F1EE,
45 0x1F1EF,
46 0x1F1F0,
47 0x1F1F1,
48 0x1F1F2,
49 0x1F1F3,
50 0x1F1F4,
51 0x1F1F5,
52 0x1F1F6,
53 0x1F1F7,
54 0x1F1F8,
55 0x1F1F9,
56 0x1F1FA,
57 0x1F1FB,
58 0x1F1FC,
59 0x1F1FD,
60 0x1F1FE,
61 0x1F1FF,
62 0x1F385,
63 0x1F3C2,
64 0x1F3C3,
65 0x1F3C4,
66 0x1F3C7,
67 0x1F3CA,
68 0x1F3CB,
69 0x1F3CC,
70 0x1F3FB,
71 0x1F3FC,
72 0x1F3FD,
73 0x1F3FE,
74 0x1F3FF,
75 0x1F442,
76 0x1F443,
77 0x1F446,
78 0x1F447,
79 0x1F448,
80 0x1F449,
81 0x1F44A,
82 0x1F44B,
83 0x1F44C,
84 0x1F44D,
85 0x1F44E,
86 0x1F44F,
87 0x1F450,
88 0x1F466,
89 0x1F467,
90 0x1F468,
91 0x1F469,
92 0x1F46B,
93 0x1F46C,
94 0x1F46D,
95 0x1F46E,
96 0x1F470,
97 0x1F471,
98 0x1F472,
99 0x1F473,
100 0x1F474,
101 0x1F475,
102 0x1F476,
103 0x1F477,
104 0x1F478,
105 0x1F47C,
106 0x1F481,
107 0x1F482,
108 0x1F483,
109 0x1F485,
110 0x1F486,
111 0x1F487,
112 0x1F48F,
113 0x1F491,
114 0x1F4AA,
115 0x1F574,
116 0x1F575,
117 0x1F57A,
118 0x1F590,
119 0x1F595,
120 0x1F596,
121 0x1F645,
122 0x1F646,
123 0x1F647,
124 0x1F64B,
125 0x1F64C,
126 0x1F64D,
127 0x1F64E,
128 0x1F64F,
129 0x1F6A3,
130 0x1F6B4,
131 0x1F6B5,
132 0x1F6B6,
133 0x1F6C0,
134 0x1F6CC,
135 0x1F90C,
136 0x1F90F,
137 0x1F918,
138 0x1F919,
139 0x1F91A,
140 0x1F91B,
141 0x1F91C,
142 0x1F91D,
143 0x1F91E,
144 0x1F91F,
145 0x1F926,
146 0x1F930,
147 0x1F931,
148 0x1F932,
149 0x1F933,
150 0x1F934,
151 0x1F935,
152 0x1F936,
153 0x1F937,
154 0x1F938,
155 0x1F939,
156 0x1F93D,
157 0x1F93E,
158 0x1F977,
159 0x1F9B5,
160 0x1F9B6,
161 0x1F9B8,
162 0x1F9B9,
163 0x1F9BB,
164 0x1F9CD,
165 0x1F9CE,
166 0x1F9CF,
167 0x1F9D1,
168 0x1F9D2,
169 0x1F9D3,
170 0x1F9D4,
171 0x1F9D5,
172 0x1F9D6,
173 0x1F9D7,
174 0x1F9D8,
175 0x1F9D9,
176 0x1F9DA,
177 0x1F9DB,
178 0x1F9DC,
179 0x1F9DD,
180 0x1FAC3,
181 0x1FAC4,
182 0x1FAC5,
183 0x1FAF0,
184 0x1FAF1,
185 0x1FAF2,
186 0x1FAF3,
187 0x1FAF4,
188 0x1FAF5,
189 0x1FAF6,
190 0x1FAF7,
191 0x1FAF8
192 };
193
194 struct utf8_item {
195 RB_ENTRY(utf8_item) index_entry;
196 u_int index;
197
198 RB_ENTRY(utf8_item) data_entry;
199 char data[UTF8_SIZE];
200 u_char size;
201 };
202
203 static int
utf8_data_cmp(struct utf8_item * ui1,struct utf8_item * ui2)204 utf8_data_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
205 {
206 if (ui1->size < ui2->size)
207 return (-1);
208 if (ui1->size > ui2->size)
209 return (1);
210 return (memcmp(ui1->data, ui2->data, ui1->size));
211 }
212 RB_HEAD(utf8_data_tree, utf8_item);
213 RB_GENERATE_STATIC(utf8_data_tree, utf8_item, data_entry, utf8_data_cmp);
214 static struct utf8_data_tree utf8_data_tree = RB_INITIALIZER(utf8_data_tree);
215
216 static int
utf8_index_cmp(struct utf8_item * ui1,struct utf8_item * ui2)217 utf8_index_cmp(struct utf8_item *ui1, struct utf8_item *ui2)
218 {
219 if (ui1->index < ui2->index)
220 return (-1);
221 if (ui1->index > ui2->index)
222 return (1);
223 return (0);
224 }
225 RB_HEAD(utf8_index_tree, utf8_item);
226 RB_GENERATE_STATIC(utf8_index_tree, utf8_item, index_entry, utf8_index_cmp);
227 static struct utf8_index_tree utf8_index_tree = RB_INITIALIZER(utf8_index_tree);
228
229 static u_int utf8_next_index;
230
231 #define UTF8_GET_SIZE(uc) (((uc) >> 24) & 0x1f)
232 #define UTF8_GET_WIDTH(uc) (((uc) >> 29) - 1)
233
234 #define UTF8_SET_SIZE(size) (((utf8_char)(size)) << 24)
235 #define UTF8_SET_WIDTH(width) ((((utf8_char)(width)) + 1) << 29)
236
237 /* Get a UTF-8 item from data. */
238 static struct utf8_item *
utf8_item_by_data(const u_char * data,size_t size)239 utf8_item_by_data(const u_char *data, size_t size)
240 {
241 struct utf8_item ui;
242
243 memcpy(ui.data, data, size);
244 ui.size = size;
245
246 return (RB_FIND(utf8_data_tree, &utf8_data_tree, &ui));
247 }
248
249 /* Get a UTF-8 item from data. */
250 static struct utf8_item *
utf8_item_by_index(u_int index)251 utf8_item_by_index(u_int index)
252 {
253 struct utf8_item ui;
254
255 ui.index = index;
256
257 return (RB_FIND(utf8_index_tree, &utf8_index_tree, &ui));
258 }
259
260 /* Add a UTF-8 item. */
261 static int
utf8_put_item(const u_char * data,size_t size,u_int * index)262 utf8_put_item(const u_char *data, size_t size, u_int *index)
263 {
264 struct utf8_item *ui;
265
266 ui = utf8_item_by_data(data, size);
267 if (ui != NULL) {
268 *index = ui->index;
269 log_debug("%s: found %.*s = %u", __func__, (int)size, data,
270 *index);
271 return (0);
272 }
273
274 if (utf8_next_index == 0xffffff + 1)
275 return (-1);
276
277 ui = xcalloc(1, sizeof *ui);
278 ui->index = utf8_next_index++;
279 RB_INSERT(utf8_index_tree, &utf8_index_tree, ui);
280
281 memcpy(ui->data, data, size);
282 ui->size = size;
283 RB_INSERT(utf8_data_tree, &utf8_data_tree, ui);
284
285 *index = ui->index;
286 log_debug("%s: added %.*s = %u", __func__, (int)size, data, *index);
287 return (0);
288 }
289
290 static int
utf8_table_cmp(const void * vp1,const void * vp2)291 utf8_table_cmp(const void *vp1, const void *vp2)
292 {
293 const wchar_t *wc1 = vp1, *wc2 = vp2;
294
295 if (*wc1 < *wc2)
296 return (-1);
297 if (*wc1 > *wc2)
298 return (1);
299 return (0);
300 }
301
302 /* Check if character in table. */
303 int
utf8_in_table(wchar_t find,const wchar_t * table,u_int count)304 utf8_in_table(wchar_t find, const wchar_t *table, u_int count)
305 {
306 wchar_t *found;
307
308 found = bsearch(&find, table, count, sizeof *table, utf8_table_cmp);
309 return (found != NULL);
310 }
311
312 /* Get UTF-8 character from data. */
313 enum utf8_state
utf8_from_data(const struct utf8_data * ud,utf8_char * uc)314 utf8_from_data(const struct utf8_data *ud, utf8_char *uc)
315 {
316 u_int index;
317
318 if (ud->width > 2)
319 fatalx("invalid UTF-8 width: %u", ud->width);
320
321 if (ud->size > UTF8_SIZE)
322 goto fail;
323 if (ud->size <= 3) {
324 index = (((utf8_char)ud->data[2] << 16)|
325 ((utf8_char)ud->data[1] << 8)|
326 ((utf8_char)ud->data[0]));
327 } else if (utf8_put_item(ud->data, ud->size, &index) != 0)
328 goto fail;
329 *uc = UTF8_SET_SIZE(ud->size)|UTF8_SET_WIDTH(ud->width)|index;
330 log_debug("%s: (%d %d %.*s) -> %08x", __func__, ud->width, ud->size,
331 (int)ud->size, ud->data, *uc);
332 return (UTF8_DONE);
333
334 fail:
335 if (ud->width == 0)
336 *uc = UTF8_SET_SIZE(0)|UTF8_SET_WIDTH(0);
337 else if (ud->width == 1)
338 *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x20;
339 else
340 *uc = UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|0x2020;
341 return (UTF8_ERROR);
342 }
343
344 /* Get UTF-8 data from character. */
345 void
utf8_to_data(utf8_char uc,struct utf8_data * ud)346 utf8_to_data(utf8_char uc, struct utf8_data *ud)
347 {
348 struct utf8_item *ui;
349 u_int index;
350
351 memset(ud, 0, sizeof *ud);
352 ud->size = ud->have = UTF8_GET_SIZE(uc);
353 ud->width = UTF8_GET_WIDTH(uc);
354
355 if (ud->size <= 3) {
356 ud->data[2] = (uc >> 16);
357 ud->data[1] = ((uc >> 8) & 0xff);
358 ud->data[0] = (uc & 0xff);
359 } else {
360 index = (uc & 0xffffff);
361 if ((ui = utf8_item_by_index(index)) == NULL)
362 memset(ud->data, ' ', ud->size);
363 else
364 memcpy(ud->data, ui->data, ud->size);
365 }
366
367 log_debug("%s: %08x -> (%d %d %.*s)", __func__, uc, ud->width, ud->size,
368 (int)ud->size, ud->data);
369 }
370
371 /* Get UTF-8 character from a single ASCII character. */
372 u_int
utf8_build_one(u_char ch)373 utf8_build_one(u_char ch)
374 {
375 return (UTF8_SET_SIZE(1)|UTF8_SET_WIDTH(1)|ch);
376 }
377
378 /* Set a single character. */
379 void
utf8_set(struct utf8_data * ud,u_char ch)380 utf8_set(struct utf8_data *ud, u_char ch)
381 {
382 static const struct utf8_data empty = { { 0 }, 1, 1, 1 };
383
384 memcpy(ud, &empty, sizeof *ud);
385 *ud->data = ch;
386 }
387
388 /* Copy UTF-8 character. */
389 void
utf8_copy(struct utf8_data * to,const struct utf8_data * from)390 utf8_copy(struct utf8_data *to, const struct utf8_data *from)
391 {
392 u_int i;
393
394 memcpy(to, from, sizeof *to);
395
396 for (i = to->size; i < sizeof to->data; i++)
397 to->data[i] = '\0';
398 }
399
400 /* Get width of Unicode character. */
401 static enum utf8_state
utf8_width(struct utf8_data * ud,int * width)402 utf8_width(struct utf8_data *ud, int *width)
403 {
404 wchar_t wc;
405
406 if (utf8_towc(ud, &wc) != UTF8_DONE)
407 return (UTF8_ERROR);
408 if (utf8_in_table(wc, utf8_force_wide, nitems(utf8_force_wide))) {
409 *width = 2;
410 return (UTF8_DONE);
411 }
412
413 *width = wcwidth(wc);
414 log_debug("wcwidth(%05X) returned %d", (u_int)wc, *width);
415 if (*width < 0) {
416 /*
417 * C1 control characters are nonprintable, so they are always
418 * zero width.
419 */
420 *width = (wc >= 0x80 && wc <= 0x9f) ? 0 : 1;
421 }
422 if (*width >= 0 && *width <= 0xff)
423 return (UTF8_DONE);
424 return (UTF8_ERROR);
425 }
426
427 /* Convert UTF-8 character to wide character. */
428 enum utf8_state
utf8_towc(const struct utf8_data * ud,wchar_t * wc)429 utf8_towc(const struct utf8_data *ud, wchar_t *wc)
430 {
431 switch (mbtowc(wc, ud->data, ud->size)) {
432 case -1:
433 log_debug("UTF-8 %.*s, mbtowc() %d", (int)ud->size, ud->data,
434 errno);
435 mbtowc(NULL, NULL, MB_CUR_MAX);
436 return (UTF8_ERROR);
437 case 0:
438 return (UTF8_ERROR);
439 }
440 log_debug("UTF-8 %.*s is %05X", (int)ud->size, ud->data, (u_int)*wc);
441 return (UTF8_DONE);
442 }
443
444 /*
445 * Open UTF-8 sequence.
446 *
447 * 11000010-11011111 C2-DF start of 2-byte sequence
448 * 11100000-11101111 E0-EF start of 3-byte sequence
449 * 11110000-11110100 F0-F4 start of 4-byte sequence
450 */
451 enum utf8_state
utf8_open(struct utf8_data * ud,u_char ch)452 utf8_open(struct utf8_data *ud, u_char ch)
453 {
454 memset(ud, 0, sizeof *ud);
455 if (ch >= 0xc2 && ch <= 0xdf)
456 ud->size = 2;
457 else if (ch >= 0xe0 && ch <= 0xef)
458 ud->size = 3;
459 else if (ch >= 0xf0 && ch <= 0xf4)
460 ud->size = 4;
461 else
462 return (UTF8_ERROR);
463 utf8_append(ud, ch);
464 return (UTF8_MORE);
465 }
466
467 /* Append character to UTF-8, closing if finished. */
468 enum utf8_state
utf8_append(struct utf8_data * ud,u_char ch)469 utf8_append(struct utf8_data *ud, u_char ch)
470 {
471 int width;
472
473 if (ud->have >= ud->size)
474 fatalx("UTF-8 character overflow");
475 if (ud->size > sizeof ud->data)
476 fatalx("UTF-8 character size too large");
477
478 if (ud->have != 0 && (ch & 0xc0) != 0x80)
479 ud->width = 0xff;
480
481 ud->data[ud->have++] = ch;
482 if (ud->have != ud->size)
483 return (UTF8_MORE);
484
485 if (ud->width == 0xff)
486 return (UTF8_ERROR);
487 if (utf8_width(ud, &width) != UTF8_DONE)
488 return (UTF8_ERROR);
489 ud->width = width;
490
491 return (UTF8_DONE);
492 }
493
494 /*
495 * Encode len characters from src into dst, which is guaranteed to have four
496 * bytes available for each character from src (for \abc or UTF-8) plus space
497 * for \0.
498 */
499 int
utf8_strvis(char * dst,const char * src,size_t len,int flag)500 utf8_strvis(char *dst, const char *src, size_t len, int flag)
501 {
502 struct utf8_data ud;
503 const char *start = dst, *end = src + len;
504 enum utf8_state more;
505 size_t i;
506
507 while (src < end) {
508 if ((more = utf8_open(&ud, *src)) == UTF8_MORE) {
509 while (++src < end && more == UTF8_MORE)
510 more = utf8_append(&ud, *src);
511 if (more == UTF8_DONE) {
512 /* UTF-8 character finished. */
513 for (i = 0; i < ud.size; i++)
514 *dst++ = ud.data[i];
515 continue;
516 }
517 /* Not a complete, valid UTF-8 character. */
518 src -= ud.have;
519 }
520 if ((flag & VIS_DQ) && src[0] == '$' && src < end - 1) {
521 if (isalpha((u_char)src[1]) ||
522 src[1] == '_' ||
523 src[1] == '{')
524 *dst++ = '\\';
525 *dst++ = '$';
526 } else if (src < end - 1)
527 dst = vis(dst, src[0], flag, src[1]);
528 else if (src < end)
529 dst = vis(dst, src[0], flag, '\0');
530 src++;
531 }
532 *dst = '\0';
533 return (dst - start);
534 }
535
536 /* Same as utf8_strvis but allocate the buffer. */
537 int
utf8_stravis(char ** dst,const char * src,int flag)538 utf8_stravis(char **dst, const char *src, int flag)
539 {
540 char *buf;
541 int len;
542
543 buf = xreallocarray(NULL, 4, strlen(src) + 1);
544 len = utf8_strvis(buf, src, strlen(src), flag);
545
546 *dst = xrealloc(buf, len + 1);
547 return (len);
548 }
549
550 /* Same as utf8_strvis but allocate the buffer. */
551 int
utf8_stravisx(char ** dst,const char * src,size_t srclen,int flag)552 utf8_stravisx(char **dst, const char *src, size_t srclen, int flag)
553 {
554 char *buf;
555 int len;
556
557 buf = xreallocarray(NULL, 4, srclen + 1);
558 len = utf8_strvis(buf, src, srclen, flag);
559
560 *dst = xrealloc(buf, len + 1);
561 return (len);
562 }
563
564 /* Does this string contain anything that isn't valid UTF-8? */
565 int
utf8_isvalid(const char * s)566 utf8_isvalid(const char *s)
567 {
568 struct utf8_data ud;
569 const char *end;
570 enum utf8_state more;
571
572 end = s + strlen(s);
573 while (s < end) {
574 if ((more = utf8_open(&ud, *s)) == UTF8_MORE) {
575 while (++s < end && more == UTF8_MORE)
576 more = utf8_append(&ud, *s);
577 if (more == UTF8_DONE)
578 continue;
579 return (0);
580 }
581 if (*s < 0x20 || *s > 0x7e)
582 return (0);
583 s++;
584 }
585 return (1);
586 }
587
588 /*
589 * Sanitize a string, changing any UTF-8 characters to '_'. Caller should free
590 * the returned string. Anything not valid printable ASCII or UTF-8 is
591 * stripped.
592 */
593 char *
utf8_sanitize(const char * src)594 utf8_sanitize(const char *src)
595 {
596 char *dst = NULL;
597 size_t n = 0;
598 enum utf8_state more;
599 struct utf8_data ud;
600 u_int i;
601
602 while (*src != '\0') {
603 dst = xreallocarray(dst, n + 1, sizeof *dst);
604 if ((more = utf8_open(&ud, *src)) == UTF8_MORE) {
605 while (*++src != '\0' && more == UTF8_MORE)
606 more = utf8_append(&ud, *src);
607 if (more == UTF8_DONE) {
608 dst = xreallocarray(dst, n + ud.width,
609 sizeof *dst);
610 for (i = 0; i < ud.width; i++)
611 dst[n++] = '_';
612 continue;
613 }
614 src -= ud.have;
615 }
616 if (*src > 0x1f && *src < 0x7f)
617 dst[n++] = *src;
618 else
619 dst[n++] = '_';
620 src++;
621 }
622 dst = xreallocarray(dst, n + 1, sizeof *dst);
623 dst[n] = '\0';
624 return (dst);
625 }
626
627 /* Get UTF-8 buffer length. */
628 size_t
utf8_strlen(const struct utf8_data * s)629 utf8_strlen(const struct utf8_data *s)
630 {
631 size_t i;
632
633 for (i = 0; s[i].size != 0; i++)
634 /* nothing */;
635 return (i);
636 }
637
638 /* Get UTF-8 string width. */
639 u_int
utf8_strwidth(const struct utf8_data * s,ssize_t n)640 utf8_strwidth(const struct utf8_data *s, ssize_t n)
641 {
642 ssize_t i;
643 u_int width = 0;
644
645 for (i = 0; s[i].size != 0; i++) {
646 if (n != -1 && n == i)
647 break;
648 width += s[i].width;
649 }
650 return (width);
651 }
652
653 /*
654 * Convert a string into a buffer of UTF-8 characters. Terminated by size == 0.
655 * Caller frees.
656 */
657 struct utf8_data *
utf8_fromcstr(const char * src)658 utf8_fromcstr(const char *src)
659 {
660 struct utf8_data *dst = NULL;
661 size_t n = 0;
662 enum utf8_state more;
663
664 while (*src != '\0') {
665 dst = xreallocarray(dst, n + 1, sizeof *dst);
666 if ((more = utf8_open(&dst[n], *src)) == UTF8_MORE) {
667 while (*++src != '\0' && more == UTF8_MORE)
668 more = utf8_append(&dst[n], *src);
669 if (more == UTF8_DONE) {
670 n++;
671 continue;
672 }
673 src -= dst[n].have;
674 }
675 utf8_set(&dst[n], *src);
676 n++;
677 src++;
678 }
679 dst = xreallocarray(dst, n + 1, sizeof *dst);
680 dst[n].size = 0;
681 return (dst);
682 }
683
684 /* Convert from a buffer of UTF-8 characters into a string. Caller frees. */
685 char *
utf8_tocstr(struct utf8_data * src)686 utf8_tocstr(struct utf8_data *src)
687 {
688 char *dst = NULL;
689 size_t n = 0;
690
691 for(; src->size != 0; src++) {
692 dst = xreallocarray(dst, n + src->size, 1);
693 memcpy(dst + n, src->data, src->size);
694 n += src->size;
695 }
696 dst = xreallocarray(dst, n + 1, 1);
697 dst[n] = '\0';
698 return (dst);
699 }
700
701 /* Get width of UTF-8 string. */
702 u_int
utf8_cstrwidth(const char * s)703 utf8_cstrwidth(const char *s)
704 {
705 struct utf8_data tmp;
706 u_int width;
707 enum utf8_state more;
708
709 width = 0;
710 while (*s != '\0') {
711 if ((more = utf8_open(&tmp, *s)) == UTF8_MORE) {
712 while (*++s != '\0' && more == UTF8_MORE)
713 more = utf8_append(&tmp, *s);
714 if (more == UTF8_DONE) {
715 width += tmp.width;
716 continue;
717 }
718 s -= tmp.have;
719 }
720 if (*s > 0x1f && *s != 0x7f)
721 width++;
722 s++;
723 }
724 return (width);
725 }
726
727 /* Pad UTF-8 string to width on the left. Caller frees. */
728 char *
utf8_padcstr(const char * s,u_int width)729 utf8_padcstr(const char *s, u_int width)
730 {
731 size_t slen;
732 char *out;
733 u_int n, i;
734
735 n = utf8_cstrwidth(s);
736 if (n >= width)
737 return (xstrdup(s));
738
739 slen = strlen(s);
740 out = xmalloc(slen + 1 + (width - n));
741 memcpy(out, s, slen);
742 for (i = n; i < width; i++)
743 out[slen++] = ' ';
744 out[slen] = '\0';
745 return (out);
746 }
747
748 /* Pad UTF-8 string to width on the right. Caller frees. */
749 char *
utf8_rpadcstr(const char * s,u_int width)750 utf8_rpadcstr(const char *s, u_int width)
751 {
752 size_t slen;
753 char *out;
754 u_int n, i;
755
756 n = utf8_cstrwidth(s);
757 if (n >= width)
758 return (xstrdup(s));
759
760 slen = strlen(s);
761 out = xmalloc(slen + 1 + (width - n));
762 for (i = 0; i < width - n; i++)
763 out[i] = ' ';
764 memcpy(out + i, s, slen);
765 out[i + slen] = '\0';
766 return (out);
767 }
768
769 int
utf8_cstrhas(const char * s,const struct utf8_data * ud)770 utf8_cstrhas(const char *s, const struct utf8_data *ud)
771 {
772 struct utf8_data *copy, *loop;
773 int found = 0;
774
775 copy = utf8_fromcstr(s);
776 for (loop = copy; loop->size != 0; loop++) {
777 if (loop->size != ud->size)
778 continue;
779 if (memcmp(loop->data, ud->data, loop->size) == 0) {
780 found = 1;
781 break;
782 }
783 }
784 free(copy);
785
786 return (found);
787 }
788