1 /*         ______   ___    ___
2  *        /\  _  \ /\_ \  /\_ \
3  *        \ \ \L\ \\//\ \ \//\ \      __     __   _ __   ___
4  *         \ \  __ \ \ \ \  \ \ \   /'__`\ /'_ `\/\`'__\/ __`\
5  *          \ \ \/\ \ \_\ \_ \_\ \_/\  __//\ \L\ \ \ \//\ \L\ \
6  *           \ \_\ \_\/\____\/\____\ \____\ \____ \ \_\\ \____/
7  *            \/_/\/_/\/____/\/____/\/____/\/___L\ \/_/ \/___/
8  *                                           /\____/
9  *                                           \_/__/
10  *
11  *      UTF-8 string handling functions.
12  *
13  *      By Peter Wang.
14  *
15  *      See LICENSE.txt for copyright information.
16  */
17 
18 
19 #include <stdarg.h>
20 #include "allegro5/allegro.h"
21 #include "allegro5/utf8.h"
22 #include "allegro5/internal/bstrlib.h"
23 #include "allegro5/internal/aintern.h"
24 
25 ALLEGRO_STATIC_ASSERT(utf8,
26    sizeof(ALLEGRO_USTR_INFO) >= sizeof(struct _al_tagbstring));
27 
28 #ifdef ALLEGRO_MSVC
29    #pragma warning (disable: 4066)
30 #endif
31 
32 #ifndef ALLEGRO_HAVE_VA_COPY
33    /* If va_copy() is not defined we assume that a simple assignment suffices.
34     * From a few web searches, this appears to be true for MSVC 7.
35     */
36    #define va_copy(a, b)   ((a) = (b))
37 #endif
38 
39 
40 #define IS_SINGLE_BYTE(c)  (((unsigned)(c) & 0x80) == 0)
41 #define IS_LEAD_BYTE(c)    (((unsigned)(c) - 0xC0) < 0x3E)
42 #define IS_TRAIL_BYTE(c)   (((unsigned)(c) & 0xC0) == 0x80)
43 
44 
all_ascii(const ALLEGRO_USTR * us)45 static bool all_ascii(const ALLEGRO_USTR *us)
46 {
47    const unsigned char *data = (const unsigned char *) _al_bdata(us);
48    int size = _al_blength(us);
49 
50    while (size-- > 0) {
51       if (*data > 127)
52          return false;
53       data++;
54    }
55 
56    return true;
57 }
58 
59 
60 /* Function: al_ustr_new
61  */
al_ustr_new(const char * s)62 ALLEGRO_USTR *al_ustr_new(const char *s)
63 {
64    return _al_bfromcstr(s);
65 }
66 
67 
68 /* Function: al_ustr_new_from_buffer
69  */
al_ustr_new_from_buffer(const char * s,size_t size)70 ALLEGRO_USTR *al_ustr_new_from_buffer(const char *s, size_t size)
71 {
72    return _al_blk2bstr(s, size);
73 }
74 
75 
76 /* Function: al_ustr_newf
77  */
al_ustr_newf(const char * fmt,...)78 ALLEGRO_USTR *al_ustr_newf(const char *fmt, ...)
79 {
80    ALLEGRO_USTR *us;
81    va_list ap;
82 
83    us = al_ustr_new("");
84    va_start(ap, fmt);
85    al_ustr_vappendf(us, fmt, ap);
86    va_end(ap);
87    return us;
88 }
89 
90 
91 /* Function: al_ustr_free
92  */
al_ustr_free(ALLEGRO_USTR * us)93 void al_ustr_free(ALLEGRO_USTR *us)
94 {
95    _al_bdestroy(us);
96 }
97 
98 
99 /* Function: al_cstr
100  */
al_cstr(const ALLEGRO_USTR * us)101 const char *al_cstr(const ALLEGRO_USTR *us)
102 {
103    /* May or may not be NUL terminated. */
104    return _al_bdata(us);
105 }
106 
107 
108 /* Function: al_ustr_to_buffer
109  */
al_ustr_to_buffer(const ALLEGRO_USTR * us,char * buffer,int size)110 void al_ustr_to_buffer(const ALLEGRO_USTR *us, char *buffer, int size)
111 {
112    int need;
113 
114    if (size <= 0)
115       return;
116    /* add 1 for terminating 0 byte */
117    need = _al_blength(us) + 1;
118    if (size > need)
119       size = need;
120    _al_sane_strncpy(buffer, _al_bdata(us), size);
121 }
122 
123 
124 /* Function: al_cstr_dup
125  */
al_cstr_dup(const ALLEGRO_USTR * us)126 char *al_cstr_dup(const ALLEGRO_USTR *us)
127 {
128    return _al_bstr2cstr(us, '\0');
129 }
130 
131 
132 /* Function: al_ustr_dup
133  */
al_ustr_dup(const ALLEGRO_USTR * us)134 ALLEGRO_USTR *al_ustr_dup(const ALLEGRO_USTR *us)
135 {
136    return _al_bstrcpy(us);
137 }
138 
139 
140 /* Function: al_ustr_dup_substr
141  */
al_ustr_dup_substr(const ALLEGRO_USTR * us,int start_pos,int end_pos)142 ALLEGRO_USTR *al_ustr_dup_substr(const ALLEGRO_USTR *us, int start_pos,
143    int end_pos)
144 {
145    return _al_bmidstr(us, start_pos, end_pos - start_pos);
146 }
147 
148 
149 /* Function: al_ustr_empty_string
150  */
al_ustr_empty_string(void)151 const ALLEGRO_USTR *al_ustr_empty_string(void)
152 {
153    static struct _al_tagbstring empty = _al_bsStatic("");
154    return &empty;
155 }
156 
157 
158 /* Function: al_ref_cstr
159  */
al_ref_cstr(ALLEGRO_USTR_INFO * info,const char * s)160 const ALLEGRO_USTR *al_ref_cstr(ALLEGRO_USTR_INFO *info, const char *s)
161 {
162    struct _al_tagbstring *tb = (struct _al_tagbstring *) info;
163    ASSERT(info);
164    ASSERT(s);
165 
166    _al_btfromcstr(*tb, s);
167    return tb;
168 }
169 
170 
171 /* Function: al_ref_buffer
172  */
al_ref_buffer(ALLEGRO_USTR_INFO * info,const char * s,size_t size)173 const ALLEGRO_USTR *al_ref_buffer(ALLEGRO_USTR_INFO *info, const char *s, size_t size)
174 {
175    struct _al_tagbstring *tb = (struct _al_tagbstring *) info;
176    ASSERT(s);
177 
178    _al_blk2tbstr(*tb, s, size);
179    return tb;
180 }
181 
182 
183 /* Function: al_ref_ustr
184  */
al_ref_ustr(ALLEGRO_USTR_INFO * info,const ALLEGRO_USTR * us,int start_pos,int end_pos)185 const ALLEGRO_USTR *al_ref_ustr(ALLEGRO_USTR_INFO *info, const ALLEGRO_USTR *us,
186    int start_pos, int end_pos)
187 {
188    struct _al_tagbstring *tb = (struct _al_tagbstring *) info;
189 
190    _al_bmid2tbstr(*tb, us, start_pos, end_pos - start_pos);
191    return tb;
192 }
193 
194 
195 /* Function: al_ustr_size
196  */
al_ustr_size(const ALLEGRO_USTR * us)197 size_t al_ustr_size(const ALLEGRO_USTR *us)
198 {
199    return _al_blength(us);
200 }
201 
202 
203 /* Function: al_ustr_length
204  */
al_ustr_length(const ALLEGRO_USTR * us)205 size_t al_ustr_length(const ALLEGRO_USTR *us)
206 {
207    int pos = 0;
208    int c = 0;
209 
210    while (al_ustr_next(us, &pos))
211       c++;
212 
213    return c;
214 }
215 
216 
217 /* Function: al_ustr_offset
218  */
al_ustr_offset(const ALLEGRO_USTR * us,int index)219 int al_ustr_offset(const ALLEGRO_USTR *us, int index)
220 {
221    int pos = 0;
222 
223    if (index < 0)
224       index += al_ustr_length(us);
225 
226    while (index-- > 0) {
227       if (!al_ustr_next(us, &pos))
228          return pos;
229    }
230 
231    return pos;
232 }
233 
234 
235 /* Function: al_ustr_next
236  */
al_ustr_next(const ALLEGRO_USTR * us,int * pos)237 bool al_ustr_next(const ALLEGRO_USTR *us, int *pos)
238 {
239    const unsigned char *data = (const unsigned char *) _al_bdata(us);
240    int size = _al_blength(us);
241    int c;
242 
243    if (*pos >= size) {
244       return false;
245    }
246 
247    while (++(*pos) < size) {
248       c = data[*pos];
249       if (IS_SINGLE_BYTE(c) || IS_LEAD_BYTE(c))
250          break;
251    }
252 
253    return true;
254 }
255 
256 
257 /* Function: al_ustr_prev
258  */
al_ustr_prev(const ALLEGRO_USTR * us,int * pos)259 bool al_ustr_prev(const ALLEGRO_USTR *us, int *pos)
260 {
261    const unsigned char *data = (const unsigned char *) _al_bdata(us);
262    int c;
263 
264    if (!data)
265       return false;
266 
267    if (*pos <= 0)
268       return false;
269 
270    while (*pos > 0) {
271       (*pos)--;
272       c = data[*pos];
273       if (IS_SINGLE_BYTE(c) || IS_LEAD_BYTE(c))
274          break;
275    }
276 
277    return true;
278 }
279 
280 
281 /* Function: al_ustr_get
282  */
al_ustr_get(const ALLEGRO_USTR * ub,int pos)283 int32_t al_ustr_get(const ALLEGRO_USTR *ub, int pos)
284 {
285    int32_t c;
286    int remain;
287    int32_t minc;
288    const unsigned char *data;
289 
290    c = _al_bchare(ub, pos, -1);
291 
292    if (c < 0) {
293       /* Out of bounds. */
294       al_set_errno(ERANGE);
295       return -1;
296    }
297 
298    if (c <= 0x7F) {
299       /* Plain ASCII. */
300       return c;
301    }
302 
303    if (c <= 0xC1) {
304       /* Trailing byte of multi-byte sequence or an overlong encoding for
305        * code point <= 127.
306        */
307       al_set_errno(EILSEQ);
308       return -2;
309    }
310 
311    if (c <= 0xDF) {
312       /* 2-byte sequence. */
313       c &= 0x1F;
314       remain = 1;
315       minc = 0x80;
316    }
317    else if (c <= 0xEF) {
318       /* 3-byte sequence. */
319       c &= 0x0F;
320       remain = 2;
321       minc = 0x800;
322    }
323    else if (c <= 0xF4) {
324       /* 4-byte sequence. */
325       c &= 0x07;
326       remain = 3;
327       minc = 0x10000;
328    }
329    else {
330       /* Otherwise invalid. */
331       al_set_errno(EILSEQ);
332       return -2;
333    }
334 
335    if (pos + remain > _al_blength(ub)) {
336       al_set_errno(EILSEQ);
337       return -2;
338    }
339 
340    data = (const unsigned char *) _al_bdata(ub);
341    while (remain--) {
342       int d = data[++pos];
343 
344       if (!IS_TRAIL_BYTE(d)) {
345          al_set_errno(EILSEQ);
346          return -2;
347       }
348 
349       c = (c << 6) | (d & 0x3F);
350    }
351 
352    /* Check for overlong forms, which could be used to bypass security
353     * validations.  We could also check code points aren't above U+10FFFF or in
354     * the surrogate ranges, but we don't.
355     */
356 
357    if (c < minc) {
358       al_set_errno(EILSEQ);
359       return -2;
360    }
361 
362    return c;
363 }
364 
365 
366 /* Function: al_ustr_get_next
367  */
al_ustr_get_next(const ALLEGRO_USTR * us,int * pos)368 int32_t al_ustr_get_next(const ALLEGRO_USTR *us, int *pos)
369 {
370    int32_t c = al_ustr_get(us, *pos);
371 
372    if (c >= 0) {
373       (*pos) += al_utf8_width(c);
374       return c;
375    }
376 
377    if (c == -1) {
378       /* Past end. */
379       return c;
380    }
381 
382    /* Some invalid byte sequence. */
383    al_ustr_next(us, pos);
384    return c;
385 }
386 
387 
388 /* Function: al_ustr_prev_get
389  */
al_ustr_prev_get(const ALLEGRO_USTR * us,int * pos)390 int32_t al_ustr_prev_get(const ALLEGRO_USTR *us, int *pos)
391 {
392    if (al_ustr_prev(us, pos)) {
393       return al_ustr_get(us, *pos);
394    }
395 
396    /* Past beginning. */
397    return -1;
398 }
399 
400 
401 /* Function: al_ustr_insert
402  */
al_ustr_insert(ALLEGRO_USTR * us1,int pos,const ALLEGRO_USTR * us2)403 bool al_ustr_insert(ALLEGRO_USTR *us1, int pos, const ALLEGRO_USTR *us2)
404 {
405    return _al_binsert(us1, pos, us2, '\0') == _AL_BSTR_OK;
406 }
407 
408 
409 /* Function: al_ustr_insert_cstr
410  */
al_ustr_insert_cstr(ALLEGRO_USTR * us,int pos,const char * s)411 bool al_ustr_insert_cstr(ALLEGRO_USTR *us, int pos, const char *s)
412 {
413    ALLEGRO_USTR_INFO info;
414 
415    return al_ustr_insert(us, pos, al_ref_cstr(&info, s));
416 }
417 
418 
419 /* Function: al_ustr_insert_chr
420  */
al_ustr_insert_chr(ALLEGRO_USTR * us,int pos,int32_t c)421 size_t al_ustr_insert_chr(ALLEGRO_USTR *us, int pos, int32_t c)
422 {
423    uint32_t uc = c;
424    size_t sz;
425 
426    if (uc < 128) {
427       return (_al_binsertch(us, pos, 1, uc) == _AL_BSTR_OK) ? 1 : 0;
428    }
429 
430    sz = al_utf8_width(c);
431    if (_al_binsertch(us, pos, sz, '\0') == _AL_BSTR_OK) {
432       char* data = _al_bdataofs(us, pos);
433       if (data)
434          return al_utf8_encode(data, c);
435       else
436          return 0;
437    }
438 
439    return 0;
440 }
441 
442 
443 /* Function: al_ustr_append
444  */
al_ustr_append(ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)445 bool al_ustr_append(ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
446 {
447    return _al_bconcat(us1, us2) == _AL_BSTR_OK;
448 }
449 
450 
451 /* Function: al_ustr_append_cstr
452  */
al_ustr_append_cstr(ALLEGRO_USTR * us,const char * s)453 bool al_ustr_append_cstr(ALLEGRO_USTR *us, const char *s)
454 {
455    return _al_bcatcstr(us, s) == _AL_BSTR_OK;
456 }
457 
458 
459 /* Function: al_ustr_append_chr
460  */
al_ustr_append_chr(ALLEGRO_USTR * us,int32_t c)461 size_t al_ustr_append_chr(ALLEGRO_USTR *us, int32_t c)
462 {
463    uint32_t uc = c;
464 
465    if (uc < 128) {
466       return (_al_bconchar(us, uc) == _AL_BSTR_OK) ? 1 : 0;
467    }
468 
469    return al_ustr_insert_chr(us, al_ustr_size(us), c);
470 }
471 
472 
473 /* Function: al_ustr_appendf
474  */
al_ustr_appendf(ALLEGRO_USTR * us,const char * fmt,...)475 bool al_ustr_appendf(ALLEGRO_USTR *us, const char *fmt, ...)
476 {
477    va_list ap;
478    bool rc;
479 
480    va_start(ap, fmt);
481    rc = al_ustr_vappendf(us, fmt, ap);
482    va_end(ap);
483    return rc;
484 }
485 
486 
487 /* Function: al_ustr_vappendf
488  */
al_ustr_vappendf(ALLEGRO_USTR * us,const char * fmt,va_list ap)489 bool al_ustr_vappendf(ALLEGRO_USTR *us, const char *fmt, va_list ap)
490 {
491    va_list arglist;
492    int sz;
493    int rc;
494 
495 #ifdef DEBUGMODE
496    /* Exercise resizing logic more often. */
497    sz = 1;
498 #else
499    sz = 128;
500 #endif
501 
502    for (;;) {
503       /* Make a copy of the argument list as vsnprintf() may clobber it. */
504       va_copy(arglist, ap);
505       rc = _al_bvcformata(us, sz, fmt, arglist);
506       va_end(arglist);
507 
508       if (rc >= 0) {
509          return true;
510       }
511 
512       if (rc == _AL_BSTR_ERR) {
513          /* A real error? */
514          return false;
515       }
516 
517       /* Increase size */
518       sz = -rc;
519    }
520 }
521 
522 
523 /* Function: al_ustr_remove_chr
524  */
al_ustr_remove_chr(ALLEGRO_USTR * us,int pos)525 bool al_ustr_remove_chr(ALLEGRO_USTR *us, int pos)
526 {
527    int32_t c;
528    size_t w;
529 
530    c = al_ustr_get(us, pos);
531    if (c < 0)
532       return false;
533 
534    w = al_utf8_width(c);
535    return _al_bdelete(us, pos, w) == _AL_BSTR_OK;
536 }
537 
538 
539 /* Function: al_ustr_remove_range
540  */
al_ustr_remove_range(ALLEGRO_USTR * us,int start_pos,int end_pos)541 bool al_ustr_remove_range(ALLEGRO_USTR *us, int start_pos, int end_pos)
542 {
543    return _al_bdelete(us, start_pos, end_pos - start_pos) == _AL_BSTR_OK;
544 }
545 
546 
547 /* Function: al_ustr_truncate
548  */
al_ustr_truncate(ALLEGRO_USTR * us,int start_pos)549 bool al_ustr_truncate(ALLEGRO_USTR *us, int start_pos)
550 {
551    return _al_btrunc(us, start_pos) == _AL_BSTR_OK;
552 }
553 
554 
555 /* Function: al_ustr_ltrim_ws
556  */
al_ustr_ltrim_ws(ALLEGRO_USTR * us)557 bool al_ustr_ltrim_ws(ALLEGRO_USTR *us)
558 {
559    return _al_bltrimws(us) == _AL_BSTR_OK;
560 }
561 
562 
563 /* Function: al_ustr_rtrim_ws
564  */
al_ustr_rtrim_ws(ALLEGRO_USTR * us)565 bool al_ustr_rtrim_ws(ALLEGRO_USTR *us)
566 {
567    return _al_brtrimws(us) == _AL_BSTR_OK;
568 }
569 
570 
571 /* Function: al_ustr_trim_ws
572  */
al_ustr_trim_ws(ALLEGRO_USTR * us)573 bool al_ustr_trim_ws(ALLEGRO_USTR *us)
574 {
575    return _al_btrimws(us) == _AL_BSTR_OK;
576 }
577 
578 
579 /* Function: al_ustr_assign
580  */
al_ustr_assign(ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)581 bool al_ustr_assign(ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
582 {
583    return _al_bassign(us1, us2) == _AL_BSTR_OK;
584 }
585 
586 
587 /* Function: al_ustr_assign_substr
588  */
al_ustr_assign_substr(ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2,int start_pos,int end_pos)589 bool al_ustr_assign_substr(ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2,
590    int start_pos, int end_pos)
591 {
592    int rc = _al_bassignmidstr(us1, us2, start_pos, end_pos - start_pos);
593    return rc == _AL_BSTR_OK;
594 }
595 
596 
597 /* Function: al_ustr_assign_cstr
598  */
al_ustr_assign_cstr(ALLEGRO_USTR * us1,const char * s)599 bool al_ustr_assign_cstr(ALLEGRO_USTR *us1, const char *s)
600 {
601    return _al_bassigncstr(us1, s) == _AL_BSTR_OK;
602 }
603 
604 
605 /* Function: al_ustr_set_chr
606  */
al_ustr_set_chr(ALLEGRO_USTR * us,int start_pos,int32_t c)607 size_t al_ustr_set_chr(ALLEGRO_USTR *us, int start_pos, int32_t c)
608 {
609    int32_t oldc;
610    size_t oldw;
611    size_t neww;
612    int rc;
613 
614    oldc = al_ustr_get(us, start_pos);
615    if (oldc == -2)
616       return 0;
617 
618    oldw = al_utf8_width(oldc);
619    neww = al_utf8_width(c);
620    if (neww == 0)
621       return 0;
622 
623    if (oldw > neww)
624       rc = _al_bdelete(us, start_pos, oldw - neww);
625    else if (neww > oldw)
626       rc = _al_binsertch(us, start_pos, neww - oldw, '\0');
627    else
628       rc = _AL_BSTR_OK;
629 
630    if (rc == _AL_BSTR_OK) {
631       char* data = _al_bdataofs(us, start_pos);
632       if (data) {
633          return al_utf8_encode(data, c);
634       }
635       else {
636          return 0;
637       }
638    }
639    else {
640       return 0;
641    }
642 }
643 
644 
645 /* Function: al_ustr_replace_range
646  */
al_ustr_replace_range(ALLEGRO_USTR * us1,int start_pos1,int end_pos1,const ALLEGRO_USTR * us2)647 bool al_ustr_replace_range(ALLEGRO_USTR *us1, int start_pos1, int end_pos1,
648    const ALLEGRO_USTR *us2)
649 {
650    return _al_breplace(us1, start_pos1, end_pos1 - start_pos1, us2, '\0')
651       == _AL_BSTR_OK;
652 }
653 
654 
655 /* Function: al_ustr_find_chr
656  */
al_ustr_find_chr(const ALLEGRO_USTR * us,int start_pos,int32_t c)657 int al_ustr_find_chr(const ALLEGRO_USTR *us, int start_pos, int32_t c)
658 {
659    char encc[4];
660    size_t sizec;
661    struct _al_tagbstring enctb;
662    int rc;
663 
664    /* Fast path for ASCII characters. */
665    if (c < 128) {
666       rc = _al_bstrchrp(us, c, start_pos);
667       return (rc == _AL_BSTR_ERR) ? -1 : rc;
668    }
669 
670    /* Non-ASCII.  We can simply encode the character into a string and search
671     * for that.
672     */
673 
674    sizec = al_utf8_encode(encc, c);
675    if (!sizec) {
676       al_set_errno(EINVAL);
677       return -1; /* error */
678    }
679 
680    _al_blk2tbstr(enctb, encc, sizec);
681    rc = _al_binstr(us, start_pos, &enctb);
682    return (rc == _AL_BSTR_ERR) ? -1 : rc;
683 }
684 
685 
686 /* Function: al_ustr_rfind_chr
687  */
al_ustr_rfind_chr(const ALLEGRO_USTR * us,int end_pos,int32_t c)688 int al_ustr_rfind_chr(const ALLEGRO_USTR *us, int end_pos, int32_t c)
689 {
690    char encc[4];
691    size_t sizec;
692    struct _al_tagbstring enctb;
693    int rc;
694 
695    /* Fast path for ASCII characters. */
696    if (c < 128) {
697       rc = _al_bstrrchrp(us, c, end_pos - 1);
698       return (rc == _AL_BSTR_ERR) ? -1 : rc;
699    }
700 
701    /* Non-ASCII.  We can simply encode the character into a string and search
702     * for that.
703     */
704 
705    sizec = al_utf8_encode(encc, c);
706    if (!sizec) {
707       al_set_errno(EINVAL);
708       return -1; /* error */
709    }
710 
711    _al_blk2tbstr(enctb, encc, sizec);
712    rc = _al_binstrr(us, end_pos - sizec, &enctb);
713    return (rc == _AL_BSTR_ERR) ? -1 : rc;
714 }
715 
716 
717 /* Function: al_ustr_find_set
718  */
al_ustr_find_set(const ALLEGRO_USTR * us,int start_pos,const ALLEGRO_USTR * accept)719 int al_ustr_find_set(const ALLEGRO_USTR *us, int start_pos,
720    const ALLEGRO_USTR *accept)
721 {
722    int rc;
723    int32_t c, d;
724    int pos;
725    int set_pos;
726 
727    /* Fast path for ASCII characters. */
728    if (all_ascii(accept)) {
729       rc = _al_binchr(us, start_pos, accept);
730       return (rc == _AL_BSTR_ERR) ? -1 : rc;
731    }
732 
733    /* Non-ASCII. */
734    pos = 0;
735    while ((c = al_ustr_get(us, pos)) != -1) {
736       if (c == -2) {
737          /* Invalid byte sequence. */
738          pos++;
739          continue;
740       }
741 
742       set_pos = 0;
743       while ((d = al_ustr_get_next(accept, &set_pos)) != -1) {
744          if (c == d)
745             return pos;
746       }
747 
748       pos += al_utf8_width(c);
749    }
750 
751    return -1;
752 }
753 
754 
755 /* Function: al_ustr_find_set_cstr
756  */
al_ustr_find_set_cstr(const ALLEGRO_USTR * us,int start_pos,const char * accept)757 int al_ustr_find_set_cstr(const ALLEGRO_USTR *us, int start_pos,
758    const char *accept)
759 {
760    ALLEGRO_USTR_INFO info;
761    const ALLEGRO_USTR *accept_us = al_ref_cstr(&info, accept);
762 
763    return al_ustr_find_set(us, start_pos, accept_us);
764 }
765 
766 
767 /* Function: al_ustr_find_cset
768  */
al_ustr_find_cset(const ALLEGRO_USTR * us,int start_pos,const ALLEGRO_USTR * reject)769 int al_ustr_find_cset(const ALLEGRO_USTR *us, int start_pos,
770    const ALLEGRO_USTR *reject)
771 {
772    int rc;
773    int32_t c, d;
774    int pos;
775    int set_pos;
776 
777    /* Fast path for ASCII characters. */
778    if (all_ascii(reject)) {
779       rc = _al_bninchr(us, start_pos, reject);
780       return (rc == _AL_BSTR_ERR) ? -1 : rc;
781    }
782 
783    /* Non-ASCII. */
784    pos = 0;
785    while ((c = al_ustr_get(us, pos)) != -1) {
786       if (c == -2) {
787          /* Invalid byte sequence. */
788          pos++;
789          continue;
790       }
791 
792       set_pos = 0;
793       while ((d = al_ustr_get_next(reject, &set_pos)) != -1) {
794          if (c == d)
795             break;
796       }
797 
798       if (d == -1) {
799          return pos;
800       }
801 
802       pos += al_utf8_width(c);
803    }
804 
805    return -1;
806 }
807 
808 
809 /* Function: al_ustr_find_cset_cstr
810  */
al_ustr_find_cset_cstr(const ALLEGRO_USTR * us,int start_pos,const char * reject)811 int al_ustr_find_cset_cstr(const ALLEGRO_USTR *us, int start_pos,
812    const char *reject)
813 {
814    ALLEGRO_USTR_INFO info;
815    const ALLEGRO_USTR *reject_us = al_ref_cstr(&info, reject);
816 
817    return al_ustr_find_cset(us, start_pos, reject_us);
818 }
819 
820 
821 /* Function: al_ustr_find_str
822  */
al_ustr_find_str(const ALLEGRO_USTR * haystack,int start_pos,const ALLEGRO_USTR * needle)823 int al_ustr_find_str(const ALLEGRO_USTR *haystack, int start_pos,
824    const ALLEGRO_USTR *needle)
825 {
826    int rc = _al_binstr(haystack, start_pos, needle);
827    return (rc == _AL_BSTR_ERR) ? -1 : rc;
828 }
829 
830 
831 /* Function: al_ustr_find_cstr
832  */
al_ustr_find_cstr(const ALLEGRO_USTR * haystack,int start_pos,const char * needle)833 int al_ustr_find_cstr(const ALLEGRO_USTR *haystack, int start_pos,
834    const char *needle)
835 {
836    ALLEGRO_USTR_INFO info;
837    const ALLEGRO_USTR *needle_us = al_ref_cstr(&info, needle);
838 
839    return al_ustr_find_str(haystack, start_pos, needle_us);
840 }
841 
842 
843 /* Function: al_ustr_rfind_str
844  */
al_ustr_rfind_str(const ALLEGRO_USTR * haystack,int end_pos,const ALLEGRO_USTR * needle)845 int al_ustr_rfind_str(const ALLEGRO_USTR *haystack, int end_pos,
846    const ALLEGRO_USTR *needle)
847 {
848    int rc = _al_binstrr(haystack, end_pos - _al_blength(needle), needle);
849    return (rc == _AL_BSTR_ERR) ? -1 : rc;
850 }
851 
852 
853 /* Function: al_ustr_rfind_cstr
854  */
al_ustr_rfind_cstr(const ALLEGRO_USTR * haystack,int end_pos,const char * needle)855 int al_ustr_rfind_cstr(const ALLEGRO_USTR *haystack, int end_pos,
856    const char *needle)
857 {
858    ALLEGRO_USTR_INFO info;
859    const ALLEGRO_USTR *needle_us = al_ref_cstr(&info, needle);
860 
861    return al_ustr_rfind_str(haystack, end_pos, needle_us);
862 }
863 
864 
865 /* Function: al_ustr_find_replace
866  */
al_ustr_find_replace(ALLEGRO_USTR * us,int start_pos,const ALLEGRO_USTR * find,const ALLEGRO_USTR * replace)867 bool al_ustr_find_replace(ALLEGRO_USTR *us, int start_pos,
868    const ALLEGRO_USTR *find, const ALLEGRO_USTR *replace)
869 {
870    return _al_bfindreplace(us, find, replace, start_pos) == _AL_BSTR_OK;
871 }
872 
873 
874 /* Function: al_ustr_find_replace_cstr
875  */
al_ustr_find_replace_cstr(ALLEGRO_USTR * us,int start_pos,const char * find,const char * replace)876 bool al_ustr_find_replace_cstr(ALLEGRO_USTR *us, int start_pos,
877    const char *find, const char *replace)
878 {
879    ALLEGRO_USTR_INFO find_info;
880    ALLEGRO_USTR_INFO repl_info;
881    const ALLEGRO_USTR *find_us = al_ref_cstr(&find_info, find);
882    const ALLEGRO_USTR *repl_us = al_ref_cstr(&repl_info, replace);
883 
884    return al_ustr_find_replace(us, start_pos, find_us, repl_us);
885 }
886 
887 
888 /* Function: al_ustr_equal
889  */
al_ustr_equal(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)890 bool al_ustr_equal(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
891 {
892    return _al_biseq(us1, us2) == 1;
893 }
894 
895 
896 /* Function: al_ustr_compare
897  */
al_ustr_compare(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)898 int al_ustr_compare(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
899 {
900    int pos1 = 0;
901    int pos2 = 0;
902 
903    for (;;) {
904       int32_t c1 = al_ustr_get_next(us1, &pos1);
905       int32_t c2 = al_ustr_get_next(us2, &pos2);
906 
907       if (c1 != c2) {
908          /* This happens to work even when one of c1 or c2 is -1. */
909          return c1 - c2;
910       }
911 
912       if (c1 == -1) /* == c2 */
913          return 0;
914    }
915 }
916 
917 
918 /* Function: al_ustr_ncompare
919  */
al_ustr_ncompare(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2,int n)920 int al_ustr_ncompare(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2, int n)
921 {
922    int pos1 = 0;
923    int pos2 = 0;
924 
925    if (n <= 0)
926       return 0;
927 
928    for (;;) {
929       int32_t c1 = al_ustr_get_next(us1, &pos1);
930       int32_t c2 = al_ustr_get_next(us2, &pos2);
931 
932       if (c1 != c2) {
933          /* This happens to work even when one of c1 or c2 is -1. */
934          return c1 - c2;
935       }
936 
937       if ((c1 == -1) || (--n <= 0))
938          return 0;
939    }
940 }
941 
942 
943 /* Function: al_ustr_has_prefix
944  */
al_ustr_has_prefix(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)945 bool al_ustr_has_prefix(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
946 {
947    return 0 == _al_bstrncmp(us1, us2, _al_blength(us2));
948 }
949 
950 
951 /* Function: al_ustr_has_prefix_cstr
952  */
al_ustr_has_prefix_cstr(const ALLEGRO_USTR * us1,const char * s2)953 bool al_ustr_has_prefix_cstr(const ALLEGRO_USTR *us1, const char *s2)
954 {
955    ALLEGRO_USTR_INFO info;
956    const ALLEGRO_USTR *us2 = al_ref_cstr(&info, s2);
957 
958    return al_ustr_has_prefix(us1, us2);
959 }
960 
961 
962 /* Function: al_ustr_has_suffix
963  */
al_ustr_has_suffix(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)964 bool al_ustr_has_suffix(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
965 {
966    struct _al_tagbstring tb1;
967    int pos;
968 
969    pos = _al_blength(us1) - _al_blength(us2);
970    _al_bmid2tbstr(tb1, us1, pos, INT_MAX);
971    return _al_biseq(&tb1, us2);
972 }
973 
974 
975 /* Function: al_ustr_has_suffix_cstr
976  */
al_ustr_has_suffix_cstr(const ALLEGRO_USTR * us1,const char * s2)977 bool al_ustr_has_suffix_cstr(const ALLEGRO_USTR *us1, const char *s2)
978 {
979    ALLEGRO_USTR_INFO info;
980    const ALLEGRO_USTR *us2 = al_ref_cstr(&info, s2);
981 
982    return al_ustr_has_suffix(us1, us2);
983 }
984 
985 
986 /* Function: al_utf8_width
987  */
al_utf8_width(int32_t c)988 size_t al_utf8_width(int32_t c)
989 {
990    /* So we don't need to check for negative values nor use unsigned ints
991     * in the interface, which are a pain.
992     */
993    uint32_t uc = c;
994 
995    if (uc <= 0x7f)
996       return 1;
997    if (uc <= 0x7ff)
998       return 2;
999    if (uc <= 0xffff)
1000       return 3;
1001    if (uc <= 0x10ffff)
1002       return 4;
1003    /* The rest are illegal. */
1004    return 0;
1005 }
1006 
1007 
1008 /* Function: al_utf8_encode
1009  */
al_utf8_encode(char s[],int32_t c)1010 size_t al_utf8_encode(char s[], int32_t c)
1011 {
1012    uint32_t uc = c;
1013 
1014    if (uc <= 0x7f) {
1015       s[0] = uc;
1016       return 1;
1017    }
1018 
1019    if (uc <= 0x7ff) {
1020       s[0] = 0xC0 | ((uc >> 6) & 0x1F);
1021       s[1] = 0x80 |  (uc       & 0x3F);
1022       return 2;
1023    }
1024 
1025    if (uc <= 0xffff) {
1026       s[0] = 0xE0 | ((uc >> 12) & 0x0F);
1027       s[1] = 0x80 | ((uc >>  6) & 0x3F);
1028       s[2] = 0x80 |  (uc        & 0x3F);
1029       return 3;
1030    }
1031 
1032    if (uc <= 0x10ffff) {
1033       s[0] = 0xF0 | ((uc >> 18) & 0x07);
1034       s[1] = 0x80 | ((uc >> 12) & 0x3F);
1035       s[2] = 0x80 | ((uc >>  6) & 0x3F);
1036       s[3] = 0x80 |  (uc        & 0x3F);
1037       return 4;
1038    }
1039 
1040    /* Otherwise is illegal. */
1041    return 0;
1042 }
1043 
1044 
1045 /* Function: al_utf16_width
1046  */
al_utf16_width(int c)1047 size_t al_utf16_width(int c)
1048 {
1049    /* So we don't need to check for negative values nor use unsigned ints
1050     * in the interface, which are a pain.
1051     */
1052    uint32_t uc = c;
1053 
1054    /* We do not check for invalid code points. */
1055    if (uc <= 0xffff)
1056       return 2;
1057    if (uc <= 0x10ffff)
1058       return 4;
1059 
1060    /* The rest are illegal. */
1061    return 0;
1062 }
1063 
1064 
1065 /* Function: al_utf16_encode
1066  */
al_utf16_encode(uint16_t s[],int32_t c)1067 size_t al_utf16_encode(uint16_t s[], int32_t c)
1068 {
1069    uint32_t uc = c;
1070 
1071    if (uc <= 0xffff) {
1072       /* Note: We always assume the native endianness here. */
1073       s[0] = uc;
1074       return 2;
1075    }
1076 
1077    if (uc <= 0x10ffff) {
1078       uint32_t u_ = uc - 0x10000;
1079       /* Note: We always assume the native endianness here. */
1080       s[0] = 0xd800 | (u_ >> 10);
1081       s[1] = 0xdc00 | (u_ & 0x3ff);
1082       return 4;
1083    }
1084 
1085    /* Otherwise is illegal. */
1086    return 0;
1087 }
1088 
1089 
_al_utf16_get(uint16_t const * s,int n,int * c)1090 static size_t _al_utf16_get(uint16_t const *s, int n, int *c)
1091 {
1092    if (s[0] < 0xd800 || s[0] > 0xdfff) {
1093       *c = s[0];
1094       return 1;
1095    }
1096    if (n < 2)
1097       return 0;
1098    *c = 0x10000 | ((s[0] & 0x3ff) << 10) | (s[1] & 0x3ff);
1099    return 2;
1100 }
1101 
1102 
1103 /* Function: al_ustr_new_from_utf16
1104  */
al_ustr_new_from_utf16(uint16_t const * s)1105 ALLEGRO_USTR *al_ustr_new_from_utf16(uint16_t const *s)
1106 {
1107    unsigned int i = 0;
1108    ALLEGRO_USTR *ustr = al_ustr_new("");
1109    while (1) {
1110       int c;
1111       /* We expect the passed string to be 0 terminated, so there are
1112        * always 2 words available.
1113        */
1114       size_t n = _al_utf16_get(s + i, 2, &c);
1115       /* Note: The string already is 0 terminated. */
1116       if (c == 0)
1117          break;
1118       al_ustr_append_chr(ustr, c);
1119       i += n;
1120    }
1121    return ustr;
1122 }
1123 
1124 
1125 /* Function: al_ustr_size_utf16
1126  */
al_ustr_size_utf16(const ALLEGRO_USTR * us)1127 size_t al_ustr_size_utf16(const ALLEGRO_USTR *us)
1128 {
1129    int pos = 0;
1130    size_t sz = 0;
1131    while (1) {
1132       int32_t c = al_ustr_get_next(us, &pos);
1133       if (c < 0)
1134          break;
1135       sz += al_utf16_width(c);
1136    }
1137    /* Size of terminating 0 character - al_ustr_get_next will not
1138     * return it.
1139     */
1140    sz += 2;
1141    return sz;
1142 }
1143 
1144 
1145 /* Function: al_ustr_encode_utf16
1146  */
al_ustr_encode_utf16(const ALLEGRO_USTR * us,uint16_t * s,size_t n)1147 size_t al_ustr_encode_utf16(const ALLEGRO_USTR *us, uint16_t *s,
1148    size_t n)
1149 {
1150    int pos = 0;
1151    size_t i = 0;
1152    while (1) {
1153       /* Used to hold one encoded UTF-16 character. */
1154       uint16_t encoded[2] = {0, 0};
1155       size_t sz;
1156       int32_t c = al_ustr_get_next(us, &pos);
1157       if (c < 0)
1158          break;
1159       sz = al_utf16_encode(encoded, c);
1160       /* Need two bytes for terminating 0. */
1161       if (i * 2 + sz > n - 2)
1162          break;
1163       s[i++] = encoded[0];
1164       if (sz == 4)
1165          s[i++] = encoded[1];
1166    }
1167    /* Append terminating 0 - al_ustr_get_next withheld it. */
1168    if (i * 2 + 1 < n)
1169       s[i++] = 0;
1170 
1171    return i * 2;
1172 }
1173 
1174 
1175 /* vim: set sts=3 sw=3 et: */
1176