1 /* ______ ___ ___
2 * /\ _ \ /\_ \ /\_ \
3 * \ \ \L\ \\//\ \ \//\ \ __ __ _ __ ___
4 * \ \ __ \ \ \ \ \ \ \ /'__`\ /'_ `\/\`'__\/ __`\
5 * \ \ \/\ \ \_\ \_ \_\ \_/\ __//\ \L\ \ \ \//\ \L\ \
6 * \ \_\ \_\/\____\/\____\ \____\ \____ \ \_\\ \____/
7 * \/_/\/_/\/____/\/____/\/____/\/___L\ \/_/ \/___/
8 * /\____/
9 * \_/__/
10 *
11 * UTF-8 string handling functions.
12 *
13 * By Peter Wang.
14 *
15 * See LICENSE.txt for copyright information.
16 */
17
18
19 #include <stdarg.h>
20 #include "allegro5/allegro.h"
21 #include "allegro5/utf8.h"
22 #include "allegro5/internal/bstrlib.h"
23 #include "allegro5/internal/aintern.h"
24
25 ALLEGRO_STATIC_ASSERT(utf8,
26 sizeof(ALLEGRO_USTR_INFO) >= sizeof(struct _al_tagbstring));
27
28 #ifdef ALLEGRO_MSVC
29 #pragma warning (disable: 4066)
30 #endif
31
32 #ifndef ALLEGRO_HAVE_VA_COPY
33 /* If va_copy() is not defined we assume that a simple assignment suffices.
34 * From a few web searches, this appears to be true for MSVC 7.
35 */
36 #define va_copy(a, b) ((a) = (b))
37 #endif
38
39
40 #define IS_SINGLE_BYTE(c) (((unsigned)(c) & 0x80) == 0)
41 #define IS_LEAD_BYTE(c) (((unsigned)(c) - 0xC0) < 0x3E)
42 #define IS_TRAIL_BYTE(c) (((unsigned)(c) & 0xC0) == 0x80)
43
44
all_ascii(const ALLEGRO_USTR * us)45 static bool all_ascii(const ALLEGRO_USTR *us)
46 {
47 const unsigned char *data = (const unsigned char *) _al_bdata(us);
48 int size = _al_blength(us);
49
50 while (size-- > 0) {
51 if (*data > 127)
52 return false;
53 data++;
54 }
55
56 return true;
57 }
58
59
60 /* Function: al_ustr_new
61 */
al_ustr_new(const char * s)62 ALLEGRO_USTR *al_ustr_new(const char *s)
63 {
64 return _al_bfromcstr(s);
65 }
66
67
68 /* Function: al_ustr_new_from_buffer
69 */
al_ustr_new_from_buffer(const char * s,size_t size)70 ALLEGRO_USTR *al_ustr_new_from_buffer(const char *s, size_t size)
71 {
72 return _al_blk2bstr(s, size);
73 }
74
75
76 /* Function: al_ustr_newf
77 */
al_ustr_newf(const char * fmt,...)78 ALLEGRO_USTR *al_ustr_newf(const char *fmt, ...)
79 {
80 ALLEGRO_USTR *us;
81 va_list ap;
82
83 us = al_ustr_new("");
84 va_start(ap, fmt);
85 al_ustr_vappendf(us, fmt, ap);
86 va_end(ap);
87 return us;
88 }
89
90
91 /* Function: al_ustr_free
92 */
al_ustr_free(ALLEGRO_USTR * us)93 void al_ustr_free(ALLEGRO_USTR *us)
94 {
95 _al_bdestroy(us);
96 }
97
98
99 /* Function: al_cstr
100 */
al_cstr(const ALLEGRO_USTR * us)101 const char *al_cstr(const ALLEGRO_USTR *us)
102 {
103 /* May or may not be NUL terminated. */
104 return _al_bdata(us);
105 }
106
107
108 /* Function: al_ustr_to_buffer
109 */
al_ustr_to_buffer(const ALLEGRO_USTR * us,char * buffer,int size)110 void al_ustr_to_buffer(const ALLEGRO_USTR *us, char *buffer, int size)
111 {
112 int need;
113
114 if (size <= 0)
115 return;
116 /* add 1 for terminating 0 byte */
117 need = _al_blength(us) + 1;
118 if (size > need)
119 size = need;
120 _al_sane_strncpy(buffer, _al_bdata(us), size);
121 }
122
123
124 /* Function: al_cstr_dup
125 */
al_cstr_dup(const ALLEGRO_USTR * us)126 char *al_cstr_dup(const ALLEGRO_USTR *us)
127 {
128 return _al_bstr2cstr(us, '\0');
129 }
130
131
132 /* Function: al_ustr_dup
133 */
al_ustr_dup(const ALLEGRO_USTR * us)134 ALLEGRO_USTR *al_ustr_dup(const ALLEGRO_USTR *us)
135 {
136 return _al_bstrcpy(us);
137 }
138
139
140 /* Function: al_ustr_dup_substr
141 */
al_ustr_dup_substr(const ALLEGRO_USTR * us,int start_pos,int end_pos)142 ALLEGRO_USTR *al_ustr_dup_substr(const ALLEGRO_USTR *us, int start_pos,
143 int end_pos)
144 {
145 return _al_bmidstr(us, start_pos, end_pos - start_pos);
146 }
147
148
149 /* Function: al_ustr_empty_string
150 */
al_ustr_empty_string(void)151 const ALLEGRO_USTR *al_ustr_empty_string(void)
152 {
153 static struct _al_tagbstring empty = _al_bsStatic("");
154 return ∅
155 }
156
157
158 /* Function: al_ref_cstr
159 */
al_ref_cstr(ALLEGRO_USTR_INFO * info,const char * s)160 const ALLEGRO_USTR *al_ref_cstr(ALLEGRO_USTR_INFO *info, const char *s)
161 {
162 struct _al_tagbstring *tb = (struct _al_tagbstring *) info;
163 ASSERT(info);
164 ASSERT(s);
165
166 _al_btfromcstr(*tb, s);
167 return tb;
168 }
169
170
171 /* Function: al_ref_buffer
172 */
al_ref_buffer(ALLEGRO_USTR_INFO * info,const char * s,size_t size)173 const ALLEGRO_USTR *al_ref_buffer(ALLEGRO_USTR_INFO *info, const char *s, size_t size)
174 {
175 struct _al_tagbstring *tb = (struct _al_tagbstring *) info;
176 ASSERT(s);
177
178 _al_blk2tbstr(*tb, s, size);
179 return tb;
180 }
181
182
183 /* Function: al_ref_ustr
184 */
al_ref_ustr(ALLEGRO_USTR_INFO * info,const ALLEGRO_USTR * us,int start_pos,int end_pos)185 const ALLEGRO_USTR *al_ref_ustr(ALLEGRO_USTR_INFO *info, const ALLEGRO_USTR *us,
186 int start_pos, int end_pos)
187 {
188 struct _al_tagbstring *tb = (struct _al_tagbstring *) info;
189
190 _al_bmid2tbstr(*tb, us, start_pos, end_pos - start_pos);
191 return tb;
192 }
193
194
195 /* Function: al_ustr_size
196 */
al_ustr_size(const ALLEGRO_USTR * us)197 size_t al_ustr_size(const ALLEGRO_USTR *us)
198 {
199 return _al_blength(us);
200 }
201
202
203 /* Function: al_ustr_length
204 */
al_ustr_length(const ALLEGRO_USTR * us)205 size_t al_ustr_length(const ALLEGRO_USTR *us)
206 {
207 int pos = 0;
208 int c = 0;
209
210 while (al_ustr_next(us, &pos))
211 c++;
212
213 return c;
214 }
215
216
217 /* Function: al_ustr_offset
218 */
al_ustr_offset(const ALLEGRO_USTR * us,int index)219 int al_ustr_offset(const ALLEGRO_USTR *us, int index)
220 {
221 int pos = 0;
222
223 if (index < 0)
224 index += al_ustr_length(us);
225
226 while (index-- > 0) {
227 if (!al_ustr_next(us, &pos))
228 return pos;
229 }
230
231 return pos;
232 }
233
234
235 /* Function: al_ustr_next
236 */
al_ustr_next(const ALLEGRO_USTR * us,int * pos)237 bool al_ustr_next(const ALLEGRO_USTR *us, int *pos)
238 {
239 const unsigned char *data = (const unsigned char *) _al_bdata(us);
240 int size = _al_blength(us);
241 int c;
242
243 if (*pos >= size) {
244 return false;
245 }
246
247 while (++(*pos) < size) {
248 c = data[*pos];
249 if (IS_SINGLE_BYTE(c) || IS_LEAD_BYTE(c))
250 break;
251 }
252
253 return true;
254 }
255
256
257 /* Function: al_ustr_prev
258 */
al_ustr_prev(const ALLEGRO_USTR * us,int * pos)259 bool al_ustr_prev(const ALLEGRO_USTR *us, int *pos)
260 {
261 const unsigned char *data = (const unsigned char *) _al_bdata(us);
262 int c;
263
264 if (!data)
265 return false;
266
267 if (*pos <= 0)
268 return false;
269
270 while (*pos > 0) {
271 (*pos)--;
272 c = data[*pos];
273 if (IS_SINGLE_BYTE(c) || IS_LEAD_BYTE(c))
274 break;
275 }
276
277 return true;
278 }
279
280
281 /* Function: al_ustr_get
282 */
al_ustr_get(const ALLEGRO_USTR * ub,int pos)283 int32_t al_ustr_get(const ALLEGRO_USTR *ub, int pos)
284 {
285 int32_t c;
286 int remain;
287 int32_t minc;
288 const unsigned char *data;
289
290 c = _al_bchare(ub, pos, -1);
291
292 if (c < 0) {
293 /* Out of bounds. */
294 al_set_errno(ERANGE);
295 return -1;
296 }
297
298 if (c <= 0x7F) {
299 /* Plain ASCII. */
300 return c;
301 }
302
303 if (c <= 0xC1) {
304 /* Trailing byte of multi-byte sequence or an overlong encoding for
305 * code point <= 127.
306 */
307 al_set_errno(EILSEQ);
308 return -2;
309 }
310
311 if (c <= 0xDF) {
312 /* 2-byte sequence. */
313 c &= 0x1F;
314 remain = 1;
315 minc = 0x80;
316 }
317 else if (c <= 0xEF) {
318 /* 3-byte sequence. */
319 c &= 0x0F;
320 remain = 2;
321 minc = 0x800;
322 }
323 else if (c <= 0xF4) {
324 /* 4-byte sequence. */
325 c &= 0x07;
326 remain = 3;
327 minc = 0x10000;
328 }
329 else {
330 /* Otherwise invalid. */
331 al_set_errno(EILSEQ);
332 return -2;
333 }
334
335 if (pos + remain > _al_blength(ub)) {
336 al_set_errno(EILSEQ);
337 return -2;
338 }
339
340 data = (const unsigned char *) _al_bdata(ub);
341 while (remain--) {
342 int d = data[++pos];
343
344 if (!IS_TRAIL_BYTE(d)) {
345 al_set_errno(EILSEQ);
346 return -2;
347 }
348
349 c = (c << 6) | (d & 0x3F);
350 }
351
352 /* Check for overlong forms, which could be used to bypass security
353 * validations. We could also check code points aren't above U+10FFFF or in
354 * the surrogate ranges, but we don't.
355 */
356
357 if (c < minc) {
358 al_set_errno(EILSEQ);
359 return -2;
360 }
361
362 return c;
363 }
364
365
366 /* Function: al_ustr_get_next
367 */
al_ustr_get_next(const ALLEGRO_USTR * us,int * pos)368 int32_t al_ustr_get_next(const ALLEGRO_USTR *us, int *pos)
369 {
370 int32_t c = al_ustr_get(us, *pos);
371
372 if (c >= 0) {
373 (*pos) += al_utf8_width(c);
374 return c;
375 }
376
377 if (c == -1) {
378 /* Past end. */
379 return c;
380 }
381
382 /* Some invalid byte sequence. */
383 al_ustr_next(us, pos);
384 return c;
385 }
386
387
388 /* Function: al_ustr_prev_get
389 */
al_ustr_prev_get(const ALLEGRO_USTR * us,int * pos)390 int32_t al_ustr_prev_get(const ALLEGRO_USTR *us, int *pos)
391 {
392 if (al_ustr_prev(us, pos)) {
393 return al_ustr_get(us, *pos);
394 }
395
396 /* Past beginning. */
397 return -1;
398 }
399
400
401 /* Function: al_ustr_insert
402 */
al_ustr_insert(ALLEGRO_USTR * us1,int pos,const ALLEGRO_USTR * us2)403 bool al_ustr_insert(ALLEGRO_USTR *us1, int pos, const ALLEGRO_USTR *us2)
404 {
405 return _al_binsert(us1, pos, us2, '\0') == _AL_BSTR_OK;
406 }
407
408
409 /* Function: al_ustr_insert_cstr
410 */
al_ustr_insert_cstr(ALLEGRO_USTR * us,int pos,const char * s)411 bool al_ustr_insert_cstr(ALLEGRO_USTR *us, int pos, const char *s)
412 {
413 ALLEGRO_USTR_INFO info;
414
415 return al_ustr_insert(us, pos, al_ref_cstr(&info, s));
416 }
417
418
419 /* Function: al_ustr_insert_chr
420 */
al_ustr_insert_chr(ALLEGRO_USTR * us,int pos,int32_t c)421 size_t al_ustr_insert_chr(ALLEGRO_USTR *us, int pos, int32_t c)
422 {
423 uint32_t uc = c;
424 size_t sz;
425
426 if (uc < 128) {
427 return (_al_binsertch(us, pos, 1, uc) == _AL_BSTR_OK) ? 1 : 0;
428 }
429
430 sz = al_utf8_width(c);
431 if (_al_binsertch(us, pos, sz, '\0') == _AL_BSTR_OK) {
432 char* data = _al_bdataofs(us, pos);
433 if (data)
434 return al_utf8_encode(data, c);
435 else
436 return 0;
437 }
438
439 return 0;
440 }
441
442
443 /* Function: al_ustr_append
444 */
al_ustr_append(ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)445 bool al_ustr_append(ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
446 {
447 return _al_bconcat(us1, us2) == _AL_BSTR_OK;
448 }
449
450
451 /* Function: al_ustr_append_cstr
452 */
al_ustr_append_cstr(ALLEGRO_USTR * us,const char * s)453 bool al_ustr_append_cstr(ALLEGRO_USTR *us, const char *s)
454 {
455 return _al_bcatcstr(us, s) == _AL_BSTR_OK;
456 }
457
458
459 /* Function: al_ustr_append_chr
460 */
al_ustr_append_chr(ALLEGRO_USTR * us,int32_t c)461 size_t al_ustr_append_chr(ALLEGRO_USTR *us, int32_t c)
462 {
463 uint32_t uc = c;
464
465 if (uc < 128) {
466 return (_al_bconchar(us, uc) == _AL_BSTR_OK) ? 1 : 0;
467 }
468
469 return al_ustr_insert_chr(us, al_ustr_size(us), c);
470 }
471
472
473 /* Function: al_ustr_appendf
474 */
al_ustr_appendf(ALLEGRO_USTR * us,const char * fmt,...)475 bool al_ustr_appendf(ALLEGRO_USTR *us, const char *fmt, ...)
476 {
477 va_list ap;
478 bool rc;
479
480 va_start(ap, fmt);
481 rc = al_ustr_vappendf(us, fmt, ap);
482 va_end(ap);
483 return rc;
484 }
485
486
487 /* Function: al_ustr_vappendf
488 */
al_ustr_vappendf(ALLEGRO_USTR * us,const char * fmt,va_list ap)489 bool al_ustr_vappendf(ALLEGRO_USTR *us, const char *fmt, va_list ap)
490 {
491 va_list arglist;
492 int sz;
493 int rc;
494
495 #ifdef DEBUGMODE
496 /* Exercise resizing logic more often. */
497 sz = 1;
498 #else
499 sz = 128;
500 #endif
501
502 for (;;) {
503 /* Make a copy of the argument list as vsnprintf() may clobber it. */
504 va_copy(arglist, ap);
505 rc = _al_bvcformata(us, sz, fmt, arglist);
506 va_end(arglist);
507
508 if (rc >= 0) {
509 return true;
510 }
511
512 if (rc == _AL_BSTR_ERR) {
513 /* A real error? */
514 return false;
515 }
516
517 /* Increase size */
518 sz = -rc;
519 }
520 }
521
522
523 /* Function: al_ustr_remove_chr
524 */
al_ustr_remove_chr(ALLEGRO_USTR * us,int pos)525 bool al_ustr_remove_chr(ALLEGRO_USTR *us, int pos)
526 {
527 int32_t c;
528 size_t w;
529
530 c = al_ustr_get(us, pos);
531 if (c < 0)
532 return false;
533
534 w = al_utf8_width(c);
535 return _al_bdelete(us, pos, w) == _AL_BSTR_OK;
536 }
537
538
539 /* Function: al_ustr_remove_range
540 */
al_ustr_remove_range(ALLEGRO_USTR * us,int start_pos,int end_pos)541 bool al_ustr_remove_range(ALLEGRO_USTR *us, int start_pos, int end_pos)
542 {
543 return _al_bdelete(us, start_pos, end_pos - start_pos) == _AL_BSTR_OK;
544 }
545
546
547 /* Function: al_ustr_truncate
548 */
al_ustr_truncate(ALLEGRO_USTR * us,int start_pos)549 bool al_ustr_truncate(ALLEGRO_USTR *us, int start_pos)
550 {
551 return _al_btrunc(us, start_pos) == _AL_BSTR_OK;
552 }
553
554
555 /* Function: al_ustr_ltrim_ws
556 */
al_ustr_ltrim_ws(ALLEGRO_USTR * us)557 bool al_ustr_ltrim_ws(ALLEGRO_USTR *us)
558 {
559 return _al_bltrimws(us) == _AL_BSTR_OK;
560 }
561
562
563 /* Function: al_ustr_rtrim_ws
564 */
al_ustr_rtrim_ws(ALLEGRO_USTR * us)565 bool al_ustr_rtrim_ws(ALLEGRO_USTR *us)
566 {
567 return _al_brtrimws(us) == _AL_BSTR_OK;
568 }
569
570
571 /* Function: al_ustr_trim_ws
572 */
al_ustr_trim_ws(ALLEGRO_USTR * us)573 bool al_ustr_trim_ws(ALLEGRO_USTR *us)
574 {
575 return _al_btrimws(us) == _AL_BSTR_OK;
576 }
577
578
579 /* Function: al_ustr_assign
580 */
al_ustr_assign(ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)581 bool al_ustr_assign(ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
582 {
583 return _al_bassign(us1, us2) == _AL_BSTR_OK;
584 }
585
586
587 /* Function: al_ustr_assign_substr
588 */
al_ustr_assign_substr(ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2,int start_pos,int end_pos)589 bool al_ustr_assign_substr(ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2,
590 int start_pos, int end_pos)
591 {
592 int rc = _al_bassignmidstr(us1, us2, start_pos, end_pos - start_pos);
593 return rc == _AL_BSTR_OK;
594 }
595
596
597 /* Function: al_ustr_assign_cstr
598 */
al_ustr_assign_cstr(ALLEGRO_USTR * us1,const char * s)599 bool al_ustr_assign_cstr(ALLEGRO_USTR *us1, const char *s)
600 {
601 return _al_bassigncstr(us1, s) == _AL_BSTR_OK;
602 }
603
604
605 /* Function: al_ustr_set_chr
606 */
al_ustr_set_chr(ALLEGRO_USTR * us,int start_pos,int32_t c)607 size_t al_ustr_set_chr(ALLEGRO_USTR *us, int start_pos, int32_t c)
608 {
609 int32_t oldc;
610 size_t oldw;
611 size_t neww;
612 int rc;
613
614 oldc = al_ustr_get(us, start_pos);
615 if (oldc == -2)
616 return 0;
617
618 oldw = al_utf8_width(oldc);
619 neww = al_utf8_width(c);
620 if (neww == 0)
621 return 0;
622
623 if (oldw > neww)
624 rc = _al_bdelete(us, start_pos, oldw - neww);
625 else if (neww > oldw)
626 rc = _al_binsertch(us, start_pos, neww - oldw, '\0');
627 else
628 rc = _AL_BSTR_OK;
629
630 if (rc == _AL_BSTR_OK) {
631 char* data = _al_bdataofs(us, start_pos);
632 if (data) {
633 return al_utf8_encode(data, c);
634 }
635 else {
636 return 0;
637 }
638 }
639 else {
640 return 0;
641 }
642 }
643
644
645 /* Function: al_ustr_replace_range
646 */
al_ustr_replace_range(ALLEGRO_USTR * us1,int start_pos1,int end_pos1,const ALLEGRO_USTR * us2)647 bool al_ustr_replace_range(ALLEGRO_USTR *us1, int start_pos1, int end_pos1,
648 const ALLEGRO_USTR *us2)
649 {
650 return _al_breplace(us1, start_pos1, end_pos1 - start_pos1, us2, '\0')
651 == _AL_BSTR_OK;
652 }
653
654
655 /* Function: al_ustr_find_chr
656 */
al_ustr_find_chr(const ALLEGRO_USTR * us,int start_pos,int32_t c)657 int al_ustr_find_chr(const ALLEGRO_USTR *us, int start_pos, int32_t c)
658 {
659 char encc[4];
660 size_t sizec;
661 struct _al_tagbstring enctb;
662 int rc;
663
664 /* Fast path for ASCII characters. */
665 if (c < 128) {
666 rc = _al_bstrchrp(us, c, start_pos);
667 return (rc == _AL_BSTR_ERR) ? -1 : rc;
668 }
669
670 /* Non-ASCII. We can simply encode the character into a string and search
671 * for that.
672 */
673
674 sizec = al_utf8_encode(encc, c);
675 if (!sizec) {
676 al_set_errno(EINVAL);
677 return -1; /* error */
678 }
679
680 _al_blk2tbstr(enctb, encc, sizec);
681 rc = _al_binstr(us, start_pos, &enctb);
682 return (rc == _AL_BSTR_ERR) ? -1 : rc;
683 }
684
685
686 /* Function: al_ustr_rfind_chr
687 */
al_ustr_rfind_chr(const ALLEGRO_USTR * us,int end_pos,int32_t c)688 int al_ustr_rfind_chr(const ALLEGRO_USTR *us, int end_pos, int32_t c)
689 {
690 char encc[4];
691 size_t sizec;
692 struct _al_tagbstring enctb;
693 int rc;
694
695 /* Fast path for ASCII characters. */
696 if (c < 128) {
697 rc = _al_bstrrchrp(us, c, end_pos - 1);
698 return (rc == _AL_BSTR_ERR) ? -1 : rc;
699 }
700
701 /* Non-ASCII. We can simply encode the character into a string and search
702 * for that.
703 */
704
705 sizec = al_utf8_encode(encc, c);
706 if (!sizec) {
707 al_set_errno(EINVAL);
708 return -1; /* error */
709 }
710
711 _al_blk2tbstr(enctb, encc, sizec);
712 rc = _al_binstrr(us, end_pos - sizec, &enctb);
713 return (rc == _AL_BSTR_ERR) ? -1 : rc;
714 }
715
716
717 /* Function: al_ustr_find_set
718 */
al_ustr_find_set(const ALLEGRO_USTR * us,int start_pos,const ALLEGRO_USTR * accept)719 int al_ustr_find_set(const ALLEGRO_USTR *us, int start_pos,
720 const ALLEGRO_USTR *accept)
721 {
722 int rc;
723 int32_t c, d;
724 int pos;
725 int set_pos;
726
727 /* Fast path for ASCII characters. */
728 if (all_ascii(accept)) {
729 rc = _al_binchr(us, start_pos, accept);
730 return (rc == _AL_BSTR_ERR) ? -1 : rc;
731 }
732
733 /* Non-ASCII. */
734 pos = 0;
735 while ((c = al_ustr_get(us, pos)) != -1) {
736 if (c == -2) {
737 /* Invalid byte sequence. */
738 pos++;
739 continue;
740 }
741
742 set_pos = 0;
743 while ((d = al_ustr_get_next(accept, &set_pos)) != -1) {
744 if (c == d)
745 return pos;
746 }
747
748 pos += al_utf8_width(c);
749 }
750
751 return -1;
752 }
753
754
755 /* Function: al_ustr_find_set_cstr
756 */
al_ustr_find_set_cstr(const ALLEGRO_USTR * us,int start_pos,const char * accept)757 int al_ustr_find_set_cstr(const ALLEGRO_USTR *us, int start_pos,
758 const char *accept)
759 {
760 ALLEGRO_USTR_INFO info;
761 const ALLEGRO_USTR *accept_us = al_ref_cstr(&info, accept);
762
763 return al_ustr_find_set(us, start_pos, accept_us);
764 }
765
766
767 /* Function: al_ustr_find_cset
768 */
al_ustr_find_cset(const ALLEGRO_USTR * us,int start_pos,const ALLEGRO_USTR * reject)769 int al_ustr_find_cset(const ALLEGRO_USTR *us, int start_pos,
770 const ALLEGRO_USTR *reject)
771 {
772 int rc;
773 int32_t c, d;
774 int pos;
775 int set_pos;
776
777 /* Fast path for ASCII characters. */
778 if (all_ascii(reject)) {
779 rc = _al_bninchr(us, start_pos, reject);
780 return (rc == _AL_BSTR_ERR) ? -1 : rc;
781 }
782
783 /* Non-ASCII. */
784 pos = 0;
785 while ((c = al_ustr_get(us, pos)) != -1) {
786 if (c == -2) {
787 /* Invalid byte sequence. */
788 pos++;
789 continue;
790 }
791
792 set_pos = 0;
793 while ((d = al_ustr_get_next(reject, &set_pos)) != -1) {
794 if (c == d)
795 break;
796 }
797
798 if (d == -1) {
799 return pos;
800 }
801
802 pos += al_utf8_width(c);
803 }
804
805 return -1;
806 }
807
808
809 /* Function: al_ustr_find_cset_cstr
810 */
al_ustr_find_cset_cstr(const ALLEGRO_USTR * us,int start_pos,const char * reject)811 int al_ustr_find_cset_cstr(const ALLEGRO_USTR *us, int start_pos,
812 const char *reject)
813 {
814 ALLEGRO_USTR_INFO info;
815 const ALLEGRO_USTR *reject_us = al_ref_cstr(&info, reject);
816
817 return al_ustr_find_cset(us, start_pos, reject_us);
818 }
819
820
821 /* Function: al_ustr_find_str
822 */
al_ustr_find_str(const ALLEGRO_USTR * haystack,int start_pos,const ALLEGRO_USTR * needle)823 int al_ustr_find_str(const ALLEGRO_USTR *haystack, int start_pos,
824 const ALLEGRO_USTR *needle)
825 {
826 int rc = _al_binstr(haystack, start_pos, needle);
827 return (rc == _AL_BSTR_ERR) ? -1 : rc;
828 }
829
830
831 /* Function: al_ustr_find_cstr
832 */
al_ustr_find_cstr(const ALLEGRO_USTR * haystack,int start_pos,const char * needle)833 int al_ustr_find_cstr(const ALLEGRO_USTR *haystack, int start_pos,
834 const char *needle)
835 {
836 ALLEGRO_USTR_INFO info;
837 const ALLEGRO_USTR *needle_us = al_ref_cstr(&info, needle);
838
839 return al_ustr_find_str(haystack, start_pos, needle_us);
840 }
841
842
843 /* Function: al_ustr_rfind_str
844 */
al_ustr_rfind_str(const ALLEGRO_USTR * haystack,int end_pos,const ALLEGRO_USTR * needle)845 int al_ustr_rfind_str(const ALLEGRO_USTR *haystack, int end_pos,
846 const ALLEGRO_USTR *needle)
847 {
848 int rc = _al_binstrr(haystack, end_pos - _al_blength(needle), needle);
849 return (rc == _AL_BSTR_ERR) ? -1 : rc;
850 }
851
852
853 /* Function: al_ustr_rfind_cstr
854 */
al_ustr_rfind_cstr(const ALLEGRO_USTR * haystack,int end_pos,const char * needle)855 int al_ustr_rfind_cstr(const ALLEGRO_USTR *haystack, int end_pos,
856 const char *needle)
857 {
858 ALLEGRO_USTR_INFO info;
859 const ALLEGRO_USTR *needle_us = al_ref_cstr(&info, needle);
860
861 return al_ustr_rfind_str(haystack, end_pos, needle_us);
862 }
863
864
865 /* Function: al_ustr_find_replace
866 */
al_ustr_find_replace(ALLEGRO_USTR * us,int start_pos,const ALLEGRO_USTR * find,const ALLEGRO_USTR * replace)867 bool al_ustr_find_replace(ALLEGRO_USTR *us, int start_pos,
868 const ALLEGRO_USTR *find, const ALLEGRO_USTR *replace)
869 {
870 return _al_bfindreplace(us, find, replace, start_pos) == _AL_BSTR_OK;
871 }
872
873
874 /* Function: al_ustr_find_replace_cstr
875 */
al_ustr_find_replace_cstr(ALLEGRO_USTR * us,int start_pos,const char * find,const char * replace)876 bool al_ustr_find_replace_cstr(ALLEGRO_USTR *us, int start_pos,
877 const char *find, const char *replace)
878 {
879 ALLEGRO_USTR_INFO find_info;
880 ALLEGRO_USTR_INFO repl_info;
881 const ALLEGRO_USTR *find_us = al_ref_cstr(&find_info, find);
882 const ALLEGRO_USTR *repl_us = al_ref_cstr(&repl_info, replace);
883
884 return al_ustr_find_replace(us, start_pos, find_us, repl_us);
885 }
886
887
888 /* Function: al_ustr_equal
889 */
al_ustr_equal(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)890 bool al_ustr_equal(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
891 {
892 return _al_biseq(us1, us2) == 1;
893 }
894
895
896 /* Function: al_ustr_compare
897 */
al_ustr_compare(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)898 int al_ustr_compare(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
899 {
900 int pos1 = 0;
901 int pos2 = 0;
902
903 for (;;) {
904 int32_t c1 = al_ustr_get_next(us1, &pos1);
905 int32_t c2 = al_ustr_get_next(us2, &pos2);
906
907 if (c1 != c2) {
908 /* This happens to work even when one of c1 or c2 is -1. */
909 return c1 - c2;
910 }
911
912 if (c1 == -1) /* == c2 */
913 return 0;
914 }
915 }
916
917
918 /* Function: al_ustr_ncompare
919 */
al_ustr_ncompare(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2,int n)920 int al_ustr_ncompare(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2, int n)
921 {
922 int pos1 = 0;
923 int pos2 = 0;
924
925 if (n <= 0)
926 return 0;
927
928 for (;;) {
929 int32_t c1 = al_ustr_get_next(us1, &pos1);
930 int32_t c2 = al_ustr_get_next(us2, &pos2);
931
932 if (c1 != c2) {
933 /* This happens to work even when one of c1 or c2 is -1. */
934 return c1 - c2;
935 }
936
937 if ((c1 == -1) || (--n <= 0))
938 return 0;
939 }
940 }
941
942
943 /* Function: al_ustr_has_prefix
944 */
al_ustr_has_prefix(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)945 bool al_ustr_has_prefix(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
946 {
947 return 0 == _al_bstrncmp(us1, us2, _al_blength(us2));
948 }
949
950
951 /* Function: al_ustr_has_prefix_cstr
952 */
al_ustr_has_prefix_cstr(const ALLEGRO_USTR * us1,const char * s2)953 bool al_ustr_has_prefix_cstr(const ALLEGRO_USTR *us1, const char *s2)
954 {
955 ALLEGRO_USTR_INFO info;
956 const ALLEGRO_USTR *us2 = al_ref_cstr(&info, s2);
957
958 return al_ustr_has_prefix(us1, us2);
959 }
960
961
962 /* Function: al_ustr_has_suffix
963 */
al_ustr_has_suffix(const ALLEGRO_USTR * us1,const ALLEGRO_USTR * us2)964 bool al_ustr_has_suffix(const ALLEGRO_USTR *us1, const ALLEGRO_USTR *us2)
965 {
966 struct _al_tagbstring tb1;
967 int pos;
968
969 pos = _al_blength(us1) - _al_blength(us2);
970 _al_bmid2tbstr(tb1, us1, pos, INT_MAX);
971 return _al_biseq(&tb1, us2);
972 }
973
974
975 /* Function: al_ustr_has_suffix_cstr
976 */
al_ustr_has_suffix_cstr(const ALLEGRO_USTR * us1,const char * s2)977 bool al_ustr_has_suffix_cstr(const ALLEGRO_USTR *us1, const char *s2)
978 {
979 ALLEGRO_USTR_INFO info;
980 const ALLEGRO_USTR *us2 = al_ref_cstr(&info, s2);
981
982 return al_ustr_has_suffix(us1, us2);
983 }
984
985
986 /* Function: al_utf8_width
987 */
al_utf8_width(int32_t c)988 size_t al_utf8_width(int32_t c)
989 {
990 /* So we don't need to check for negative values nor use unsigned ints
991 * in the interface, which are a pain.
992 */
993 uint32_t uc = c;
994
995 if (uc <= 0x7f)
996 return 1;
997 if (uc <= 0x7ff)
998 return 2;
999 if (uc <= 0xffff)
1000 return 3;
1001 if (uc <= 0x10ffff)
1002 return 4;
1003 /* The rest are illegal. */
1004 return 0;
1005 }
1006
1007
1008 /* Function: al_utf8_encode
1009 */
al_utf8_encode(char s[],int32_t c)1010 size_t al_utf8_encode(char s[], int32_t c)
1011 {
1012 uint32_t uc = c;
1013
1014 if (uc <= 0x7f) {
1015 s[0] = uc;
1016 return 1;
1017 }
1018
1019 if (uc <= 0x7ff) {
1020 s[0] = 0xC0 | ((uc >> 6) & 0x1F);
1021 s[1] = 0x80 | (uc & 0x3F);
1022 return 2;
1023 }
1024
1025 if (uc <= 0xffff) {
1026 s[0] = 0xE0 | ((uc >> 12) & 0x0F);
1027 s[1] = 0x80 | ((uc >> 6) & 0x3F);
1028 s[2] = 0x80 | (uc & 0x3F);
1029 return 3;
1030 }
1031
1032 if (uc <= 0x10ffff) {
1033 s[0] = 0xF0 | ((uc >> 18) & 0x07);
1034 s[1] = 0x80 | ((uc >> 12) & 0x3F);
1035 s[2] = 0x80 | ((uc >> 6) & 0x3F);
1036 s[3] = 0x80 | (uc & 0x3F);
1037 return 4;
1038 }
1039
1040 /* Otherwise is illegal. */
1041 return 0;
1042 }
1043
1044
1045 /* Function: al_utf16_width
1046 */
al_utf16_width(int c)1047 size_t al_utf16_width(int c)
1048 {
1049 /* So we don't need to check for negative values nor use unsigned ints
1050 * in the interface, which are a pain.
1051 */
1052 uint32_t uc = c;
1053
1054 /* We do not check for invalid code points. */
1055 if (uc <= 0xffff)
1056 return 2;
1057 if (uc <= 0x10ffff)
1058 return 4;
1059
1060 /* The rest are illegal. */
1061 return 0;
1062 }
1063
1064
1065 /* Function: al_utf16_encode
1066 */
al_utf16_encode(uint16_t s[],int32_t c)1067 size_t al_utf16_encode(uint16_t s[], int32_t c)
1068 {
1069 uint32_t uc = c;
1070
1071 if (uc <= 0xffff) {
1072 /* Note: We always assume the native endianness here. */
1073 s[0] = uc;
1074 return 2;
1075 }
1076
1077 if (uc <= 0x10ffff) {
1078 uint32_t u_ = uc - 0x10000;
1079 /* Note: We always assume the native endianness here. */
1080 s[0] = 0xd800 | (u_ >> 10);
1081 s[1] = 0xdc00 | (u_ & 0x3ff);
1082 return 4;
1083 }
1084
1085 /* Otherwise is illegal. */
1086 return 0;
1087 }
1088
1089
_al_utf16_get(uint16_t const * s,int n,int * c)1090 static size_t _al_utf16_get(uint16_t const *s, int n, int *c)
1091 {
1092 if (s[0] < 0xd800 || s[0] > 0xdfff) {
1093 *c = s[0];
1094 return 1;
1095 }
1096 if (n < 2)
1097 return 0;
1098 *c = 0x10000 | ((s[0] & 0x3ff) << 10) | (s[1] & 0x3ff);
1099 return 2;
1100 }
1101
1102
1103 /* Function: al_ustr_new_from_utf16
1104 */
al_ustr_new_from_utf16(uint16_t const * s)1105 ALLEGRO_USTR *al_ustr_new_from_utf16(uint16_t const *s)
1106 {
1107 unsigned int i = 0;
1108 ALLEGRO_USTR *ustr = al_ustr_new("");
1109 while (1) {
1110 int c;
1111 /* We expect the passed string to be 0 terminated, so there are
1112 * always 2 words available.
1113 */
1114 size_t n = _al_utf16_get(s + i, 2, &c);
1115 /* Note: The string already is 0 terminated. */
1116 if (c == 0)
1117 break;
1118 al_ustr_append_chr(ustr, c);
1119 i += n;
1120 }
1121 return ustr;
1122 }
1123
1124
1125 /* Function: al_ustr_size_utf16
1126 */
al_ustr_size_utf16(const ALLEGRO_USTR * us)1127 size_t al_ustr_size_utf16(const ALLEGRO_USTR *us)
1128 {
1129 int pos = 0;
1130 size_t sz = 0;
1131 while (1) {
1132 int32_t c = al_ustr_get_next(us, &pos);
1133 if (c < 0)
1134 break;
1135 sz += al_utf16_width(c);
1136 }
1137 /* Size of terminating 0 character - al_ustr_get_next will not
1138 * return it.
1139 */
1140 sz += 2;
1141 return sz;
1142 }
1143
1144
1145 /* Function: al_ustr_encode_utf16
1146 */
al_ustr_encode_utf16(const ALLEGRO_USTR * us,uint16_t * s,size_t n)1147 size_t al_ustr_encode_utf16(const ALLEGRO_USTR *us, uint16_t *s,
1148 size_t n)
1149 {
1150 int pos = 0;
1151 size_t i = 0;
1152 while (1) {
1153 /* Used to hold one encoded UTF-16 character. */
1154 uint16_t encoded[2] = {0, 0};
1155 size_t sz;
1156 int32_t c = al_ustr_get_next(us, &pos);
1157 if (c < 0)
1158 break;
1159 sz = al_utf16_encode(encoded, c);
1160 /* Need two bytes for terminating 0. */
1161 if (i * 2 + sz > n - 2)
1162 break;
1163 s[i++] = encoded[0];
1164 if (sz == 4)
1165 s[i++] = encoded[1];
1166 }
1167 /* Append terminating 0 - al_ustr_get_next withheld it. */
1168 if (i * 2 + 1 < n)
1169 s[i++] = 0;
1170
1171 return i * 2;
1172 }
1173
1174
1175 /* vim: set sts=3 sw=3 et: */
1176