1 /* Elementary Unicode string functions.
2    Copyright (C) 2001-2002, 2005-2020 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify it
5    under the terms of the GNU General Public License as published
6    by the Free Software Foundation; either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 #ifndef _UNISTR_H
18 #define _UNISTR_H
19 
20 #include "unitypes.h"
21 
22 /* Get common macros for C.  */
23 #include "unused-parameter.h"
24 
25 /* Get bool.  */
26 #include <stdbool.h>
27 
28 /* Get size_t.  */
29 #include <stddef.h>
30 
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34 
35 
36 /* Conventions:
37 
38    All functions prefixed with u8_ operate on UTF-8 encoded strings.
39    Their unit is an uint8_t (1 byte).
40 
41    All functions prefixed with u16_ operate on UTF-16 encoded strings.
42    Their unit is an uint16_t (a 2-byte word).
43 
44    All functions prefixed with u32_ operate on UCS-4 encoded strings.
45    Their unit is an uint32_t (a 4-byte word).
46 
47    All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
48    n units.
49 
50    All arguments starting with "str" and the arguments of functions starting
51    with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string
52    which terminates at the first NUL unit.  This termination unit is
53    considered part of the string for all memory allocation purposes, but
54    is not considered part of the string for all other logical purposes.
55 
56    Functions returning a string result take a (resultbuf, lengthp) argument
57    pair.  If resultbuf is not NULL and the result fits into *lengthp units,
58    it is put in resultbuf, and resultbuf is returned.  Otherwise, a freshly
59    allocated string is returned.  In both cases, *lengthp is set to the
60    length (number of units) of the returned string.  In case of error,
61    NULL is returned and errno is set.  */
62 
63 
64 /* Elementary string checks.  */
65 
66 /* Check whether an UTF-8 string is well-formed.
67    Return NULL if valid, or a pointer to the first invalid unit otherwise.  */
68 extern const uint8_t *
69        u8_check (const uint8_t *s, size_t n)
70        _UC_ATTRIBUTE_PURE;
71 
72 /* Check whether an UTF-16 string is well-formed.
73    Return NULL if valid, or a pointer to the first invalid unit otherwise.  */
74 extern const uint16_t *
75        u16_check (const uint16_t *s, size_t n)
76        _UC_ATTRIBUTE_PURE;
77 
78 /* Check whether an UCS-4 string is well-formed.
79    Return NULL if valid, or a pointer to the first invalid unit otherwise.  */
80 extern const uint32_t *
81        u32_check (const uint32_t *s, size_t n)
82        _UC_ATTRIBUTE_PURE;
83 
84 
85 /* Elementary string conversions.  */
86 
87 /* Convert an UTF-8 string to an UTF-16 string.  */
88 extern uint16_t *
89        u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf,
90                   size_t *lengthp);
91 
92 /* Convert an UTF-8 string to an UCS-4 string.  */
93 extern uint32_t *
94        u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf,
95                   size_t *lengthp);
96 
97 /* Convert an UTF-16 string to an UTF-8 string.  */
98 extern uint8_t *
99        u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf,
100                   size_t *lengthp);
101 
102 /* Convert an UTF-16 string to an UCS-4 string.  */
103 extern uint32_t *
104        u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf,
105                    size_t *lengthp);
106 
107 /* Convert an UCS-4 string to an UTF-8 string.  */
108 extern uint8_t *
109        u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf,
110                   size_t *lengthp);
111 
112 /* Convert an UCS-4 string to an UTF-16 string.  */
113 extern uint16_t *
114        u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf,
115                    size_t *lengthp);
116 
117 
118 /* Elementary string functions.  */
119 
120 /* Return the length (number of units) of the first character in S, which is
121    no longer than N.  Return 0 if it is the NUL character.  Return -1 upon
122    failure.  */
123 /* Similar to mblen(), except that s must not be NULL.  */
124 extern int
125        u8_mblen (const uint8_t *s, size_t n)
126        _UC_ATTRIBUTE_PURE;
127 extern int
128        u16_mblen (const uint16_t *s, size_t n)
129        _UC_ATTRIBUTE_PURE;
130 extern int
131        u32_mblen (const uint32_t *s, size_t n)
132        _UC_ATTRIBUTE_PURE;
133 
134 /* Return the length (number of units) of the first character in S, putting
135    its 'ucs4_t' representation in *PUC.  Upon failure, *PUC is set to 0xfffd,
136    and an appropriate number of units is returned.
137    The number of available units, N, must be > 0.  */
138 /* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0,
139    and the NUL character is not treated specially.  */
140 /* The variants with _unsafe suffix are for backward compatibility with
141    libunistring versions < 0.9.7.  */
142 
143 #if GNULIB_UNISTR_U8_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
144 # if !HAVE_INLINE
145 extern int
146        u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n);
147 # else
148 extern int
149        u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n);
150 static inline int
u8_mbtouc_unsafe(ucs4_t * puc,const uint8_t * s,size_t n)151 u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
152 {
153   uint8_t c = *s;
154 
155   if (c < 0x80)
156     {
157       *puc = c;
158       return 1;
159     }
160   else
161     return u8_mbtouc_unsafe_aux (puc, s, n);
162 }
163 # endif
164 #endif
165 
166 #if GNULIB_UNISTR_U16_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
167 # if !HAVE_INLINE
168 extern int
169        u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n);
170 # else
171 extern int
172        u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n);
173 static inline int
u16_mbtouc_unsafe(ucs4_t * puc,const uint16_t * s,size_t n)174 u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
175 {
176   uint16_t c = *s;
177 
178   if (c < 0xd800 || c >= 0xe000)
179     {
180       *puc = c;
181       return 1;
182     }
183   else
184     return u16_mbtouc_unsafe_aux (puc, s, n);
185 }
186 # endif
187 #endif
188 
189 #if GNULIB_UNISTR_U32_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
190 # if !HAVE_INLINE
191 extern int
192        u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n);
193 # else
194 static inline int
u32_mbtouc_unsafe(ucs4_t * puc,const uint32_t * s,size_t n _GL_UNUSED_PARAMETER)195 u32_mbtouc_unsafe (ucs4_t *puc,
196                    const uint32_t *s, size_t n _GL_UNUSED_PARAMETER)
197 {
198   uint32_t c = *s;
199 
200   if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
201     *puc = c;
202   else
203     /* invalid multibyte character */
204     *puc = 0xfffd;
205   return 1;
206 }
207 # endif
208 #endif
209 
210 #if GNULIB_UNISTR_U8_MBTOUC || HAVE_LIBUNISTRING
211 # if !HAVE_INLINE
212 extern int
213        u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n);
214 # else
215 extern int
216        u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
217 static inline int
u8_mbtouc(ucs4_t * puc,const uint8_t * s,size_t n)218 u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
219 {
220   uint8_t c = *s;
221 
222   if (c < 0x80)
223     {
224       *puc = c;
225       return 1;
226     }
227   else
228     return u8_mbtouc_aux (puc, s, n);
229 }
230 # endif
231 #endif
232 
233 #if GNULIB_UNISTR_U16_MBTOUC || HAVE_LIBUNISTRING
234 # if !HAVE_INLINE
235 extern int
236        u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
237 # else
238 extern int
239        u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
240 static inline int
u16_mbtouc(ucs4_t * puc,const uint16_t * s,size_t n)241 u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
242 {
243   uint16_t c = *s;
244 
245   if (c < 0xd800 || c >= 0xe000)
246     {
247       *puc = c;
248       return 1;
249     }
250   else
251     return u16_mbtouc_aux (puc, s, n);
252 }
253 # endif
254 #endif
255 
256 #if GNULIB_UNISTR_U32_MBTOUC || HAVE_LIBUNISTRING
257 # if !HAVE_INLINE
258 extern int
259        u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
260 # else
261 static inline int
u32_mbtouc(ucs4_t * puc,const uint32_t * s,size_t n _GL_UNUSED_PARAMETER)262 u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _GL_UNUSED_PARAMETER)
263 {
264   uint32_t c = *s;
265 
266   if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
267     *puc = c;
268   else
269     /* invalid multibyte character */
270     *puc = 0xfffd;
271   return 1;
272 }
273 # endif
274 #endif
275 
276 /* Return the length (number of units) of the first character in S, putting
277    its 'ucs4_t' representation in *PUC.  Upon failure, *PUC is set to 0xfffd,
278    and -1 is returned for an invalid sequence of units, -2 is returned for an
279    incomplete sequence of units.
280    The number of available units, N, must be > 0.  */
281 /* Similar to u*_mbtouc(), except that the return value gives more details
282    about the failure, similar to mbrtowc().  */
283 
284 #if GNULIB_UNISTR_U8_MBTOUCR || HAVE_LIBUNISTRING
285 extern int
286        u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n);
287 #endif
288 
289 #if GNULIB_UNISTR_U16_MBTOUCR || HAVE_LIBUNISTRING
290 extern int
291        u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n);
292 #endif
293 
294 #if GNULIB_UNISTR_U32_MBTOUCR || HAVE_LIBUNISTRING
295 extern int
296        u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n);
297 #endif
298 
299 /* Put the multibyte character represented by UC in S, returning its
300    length.  Return -1 upon failure, -2 if the number of available units, N,
301    is too small.  The latter case cannot occur if N >= 6/2/1, respectively.  */
302 /* Similar to wctomb(), except that s must not be NULL, and the argument n
303    must be specified.  */
304 
305 #if GNULIB_UNISTR_U8_UCTOMB || HAVE_LIBUNISTRING
306 /* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr.  */
307 extern int
308        u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n);
309 # if !HAVE_INLINE
310 extern int
311        u8_uctomb (uint8_t *s, ucs4_t uc, int n);
312 # else
313 static inline int
u8_uctomb(uint8_t * s,ucs4_t uc,int n)314 u8_uctomb (uint8_t *s, ucs4_t uc, int n)
315 {
316   if (uc < 0x80 && n > 0)
317     {
318       s[0] = uc;
319       return 1;
320     }
321   else
322     return u8_uctomb_aux (s, uc, n);
323 }
324 # endif
325 #endif
326 
327 #if GNULIB_UNISTR_U16_UCTOMB || HAVE_LIBUNISTRING
328 /* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr.  */
329 extern int
330        u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n);
331 # if !HAVE_INLINE
332 extern int
333        u16_uctomb (uint16_t *s, ucs4_t uc, int n);
334 # else
335 static inline int
u16_uctomb(uint16_t * s,ucs4_t uc,int n)336 u16_uctomb (uint16_t *s, ucs4_t uc, int n)
337 {
338   if (uc < 0xd800 && n > 0)
339     {
340       s[0] = uc;
341       return 1;
342     }
343   else
344     return u16_uctomb_aux (s, uc, n);
345 }
346 # endif
347 #endif
348 
349 #if GNULIB_UNISTR_U32_UCTOMB || HAVE_LIBUNISTRING
350 # if !HAVE_INLINE
351 extern int
352        u32_uctomb (uint32_t *s, ucs4_t uc, int n);
353 # else
354 static inline int
u32_uctomb(uint32_t * s,ucs4_t uc,int n)355 u32_uctomb (uint32_t *s, ucs4_t uc, int n)
356 {
357   if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
358     {
359       if (n > 0)
360         {
361           *s = uc;
362           return 1;
363         }
364       else
365         return -2;
366     }
367   else
368     return -1;
369 }
370 # endif
371 #endif
372 
373 /* Copy N units from SRC to DEST.  */
374 /* Similar to memcpy().  */
375 extern uint8_t *
376        u8_cpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
377 extern uint16_t *
378        u16_cpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
379 extern uint32_t *
380        u32_cpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
381 
382 /* Copy N units from SRC to DEST, guaranteeing correct behavior for
383    overlapping memory areas.  */
384 /* Similar to memmove().  */
385 extern uint8_t *
386        u8_move (uint8_t *dest, const uint8_t *src, size_t n);
387 extern uint16_t *
388        u16_move (uint16_t *dest, const uint16_t *src, size_t n);
389 extern uint32_t *
390        u32_move (uint32_t *dest, const uint32_t *src, size_t n);
391 
392 /* Set the first N characters of S to UC.  UC should be a character that
393    occupies only 1 unit.  */
394 /* Similar to memset().  */
395 extern uint8_t *
396        u8_set (uint8_t *s, ucs4_t uc, size_t n);
397 extern uint16_t *
398        u16_set (uint16_t *s, ucs4_t uc, size_t n);
399 extern uint32_t *
400        u32_set (uint32_t *s, ucs4_t uc, size_t n);
401 
402 /* Compare S1 and S2, each of length N.  */
403 /* Similar to memcmp().  */
404 extern int
405        u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n)
406        _UC_ATTRIBUTE_PURE;
407 extern int
408        u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n)
409        _UC_ATTRIBUTE_PURE;
410 extern int
411        u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n)
412        _UC_ATTRIBUTE_PURE;
413 
414 /* Compare S1 and S2.  */
415 /* Similar to the gnulib function memcmp2().  */
416 extern int
417        u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2)
418        _UC_ATTRIBUTE_PURE;
419 extern int
420        u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2)
421        _UC_ATTRIBUTE_PURE;
422 extern int
423        u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2)
424        _UC_ATTRIBUTE_PURE;
425 
426 /* Search the string at S for UC.  */
427 /* Similar to memchr().  */
428 extern uint8_t *
429        u8_chr (const uint8_t *s, size_t n, ucs4_t uc)
430        _UC_ATTRIBUTE_PURE;
431 extern uint16_t *
432        u16_chr (const uint16_t *s, size_t n, ucs4_t uc)
433        _UC_ATTRIBUTE_PURE;
434 extern uint32_t *
435        u32_chr (const uint32_t *s, size_t n, ucs4_t uc)
436        _UC_ATTRIBUTE_PURE;
437 
438 /* Count the number of Unicode characters in the N units from S.  */
439 /* Similar to mbsnlen().  */
440 extern size_t
441        u8_mbsnlen (const uint8_t *s, size_t n)
442        _UC_ATTRIBUTE_PURE;
443 extern size_t
444        u16_mbsnlen (const uint16_t *s, size_t n)
445        _UC_ATTRIBUTE_PURE;
446 extern size_t
447        u32_mbsnlen (const uint32_t *s, size_t n)
448        _UC_ATTRIBUTE_PURE;
449 
450 /* Elementary string functions with memory allocation.  */
451 
452 /* Make a freshly allocated copy of S, of length N.  */
453 extern uint8_t *
454        u8_cpy_alloc (const uint8_t *s, size_t n);
455 extern uint16_t *
456        u16_cpy_alloc (const uint16_t *s, size_t n);
457 extern uint32_t *
458        u32_cpy_alloc (const uint32_t *s, size_t n);
459 
460 /* Elementary string functions on NUL terminated strings.  */
461 
462 /* Return the length (number of units) of the first character in S.
463    Return 0 if it is the NUL character.  Return -1 upon failure.  */
464 extern int
465        u8_strmblen (const uint8_t *s)
466        _UC_ATTRIBUTE_PURE;
467 extern int
468        u16_strmblen (const uint16_t *s)
469        _UC_ATTRIBUTE_PURE;
470 extern int
471        u32_strmblen (const uint32_t *s)
472        _UC_ATTRIBUTE_PURE;
473 
474 /* Return the length (number of units) of the first character in S, putting
475    its 'ucs4_t' representation in *PUC.  Return 0 if it is the NUL
476    character.  Return -1 upon failure.  */
477 extern int
478        u8_strmbtouc (ucs4_t *puc, const uint8_t *s);
479 extern int
480        u16_strmbtouc (ucs4_t *puc, const uint16_t *s);
481 extern int
482        u32_strmbtouc (ucs4_t *puc, const uint32_t *s);
483 
484 /* Forward iteration step.  Advances the pointer past the next character,
485    or returns NULL if the end of the string has been reached.  Puts the
486    character's 'ucs4_t' representation in *PUC.  */
487 extern const uint8_t *
488        u8_next (ucs4_t *puc, const uint8_t *s);
489 extern const uint16_t *
490        u16_next (ucs4_t *puc, const uint16_t *s);
491 extern const uint32_t *
492        u32_next (ucs4_t *puc, const uint32_t *s);
493 
494 /* Backward iteration step.  Advances the pointer to point to the previous
495    character, or returns NULL if the beginning of the string had been reached.
496    Puts the character's 'ucs4_t' representation in *PUC.  */
497 extern const uint8_t *
498        u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start);
499 extern const uint16_t *
500        u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start);
501 extern const uint32_t *
502        u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start);
503 
504 /* Return the number of units in S.  */
505 /* Similar to strlen(), wcslen().  */
506 extern size_t
507        u8_strlen (const uint8_t *s)
508        _UC_ATTRIBUTE_PURE;
509 extern size_t
510        u16_strlen (const uint16_t *s)
511        _UC_ATTRIBUTE_PURE;
512 extern size_t
513        u32_strlen (const uint32_t *s)
514        _UC_ATTRIBUTE_PURE;
515 
516 /* Return the number of units in S, but at most MAXLEN.  */
517 /* Similar to strnlen(), wcsnlen().  */
518 extern size_t
519        u8_strnlen (const uint8_t *s, size_t maxlen)
520        _UC_ATTRIBUTE_PURE;
521 extern size_t
522        u16_strnlen (const uint16_t *s, size_t maxlen)
523        _UC_ATTRIBUTE_PURE;
524 extern size_t
525        u32_strnlen (const uint32_t *s, size_t maxlen)
526        _UC_ATTRIBUTE_PURE;
527 
528 /* Copy SRC to DEST.  */
529 /* Similar to strcpy(), wcscpy().  */
530 extern uint8_t *
531        u8_strcpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src);
532 extern uint16_t *
533        u16_strcpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src);
534 extern uint32_t *
535        u32_strcpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src);
536 
537 /* Copy SRC to DEST, returning the address of the terminating NUL in DEST.  */
538 /* Similar to stpcpy().  */
539 extern uint8_t *
540        u8_stpcpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src);
541 extern uint16_t *
542        u16_stpcpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src);
543 extern uint32_t *
544        u32_stpcpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src);
545 
546 /* Copy no more than N units of SRC to DEST.  */
547 /* Similar to strncpy(), wcsncpy().  */
548 extern uint8_t *
549        u8_strncpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
550 extern uint16_t *
551        u16_strncpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
552 extern uint32_t *
553        u32_strncpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
554 
555 /* Copy no more than N units of SRC to DEST.  Return a pointer past the last
556    non-NUL unit written into DEST.  */
557 /* Similar to stpncpy().  */
558 extern uint8_t *
559        u8_stpncpy (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
560 extern uint16_t *
561        u16_stpncpy (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
562 extern uint32_t *
563        u32_stpncpy (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
564 
565 /* Append SRC onto DEST.  */
566 /* Similar to strcat(), wcscat().  */
567 extern uint8_t *
568        u8_strcat (uint8_t *_UC_RESTRICT dest, const uint8_t *src);
569 extern uint16_t *
570        u16_strcat (uint16_t *_UC_RESTRICT dest, const uint16_t *src);
571 extern uint32_t *
572        u32_strcat (uint32_t *_UC_RESTRICT dest, const uint32_t *src);
573 
574 /* Append no more than N units of SRC onto DEST.  */
575 /* Similar to strncat(), wcsncat().  */
576 extern uint8_t *
577        u8_strncat (uint8_t *_UC_RESTRICT dest, const uint8_t *src, size_t n);
578 extern uint16_t *
579        u16_strncat (uint16_t *_UC_RESTRICT dest, const uint16_t *src, size_t n);
580 extern uint32_t *
581        u32_strncat (uint32_t *_UC_RESTRICT dest, const uint32_t *src, size_t n);
582 
583 /* Compare S1 and S2.  */
584 /* Similar to strcmp(), wcscmp().  */
585 #ifdef __sun
586 /* Avoid a collision with the u8_strcmp() function in Solaris 11 libc.  */
587 extern int
588        u8_strcmp_gnu (const uint8_t *s1, const uint8_t *s2)
589        _UC_ATTRIBUTE_PURE;
590 # define u8_strcmp u8_strcmp_gnu
591 #else
592 extern int
593        u8_strcmp (const uint8_t *s1, const uint8_t *s2)
594        _UC_ATTRIBUTE_PURE;
595 #endif
596 extern int
597        u16_strcmp (const uint16_t *s1, const uint16_t *s2)
598        _UC_ATTRIBUTE_PURE;
599 extern int
600        u32_strcmp (const uint32_t *s1, const uint32_t *s2)
601        _UC_ATTRIBUTE_PURE;
602 
603 /* Compare S1 and S2 using the collation rules of the current locale.
604    Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2.
605    Upon failure, set errno and return any value.  */
606 /* Similar to strcoll(), wcscoll().  */
607 extern int
608        u8_strcoll (const uint8_t *s1, const uint8_t *s2);
609 extern int
610        u16_strcoll (const uint16_t *s1, const uint16_t *s2);
611 extern int
612        u32_strcoll (const uint32_t *s1, const uint32_t *s2);
613 
614 /* Compare no more than N units of S1 and S2.  */
615 /* Similar to strncmp(), wcsncmp().  */
616 extern int
617        u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n)
618        _UC_ATTRIBUTE_PURE;
619 extern int
620        u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n)
621        _UC_ATTRIBUTE_PURE;
622 extern int
623        u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n)
624        _UC_ATTRIBUTE_PURE;
625 
626 /* Duplicate S, returning an identical malloc'd string.  */
627 /* Similar to strdup(), wcsdup().  */
628 extern uint8_t *
629        u8_strdup (const uint8_t *s);
630 extern uint16_t *
631        u16_strdup (const uint16_t *s);
632 extern uint32_t *
633        u32_strdup (const uint32_t *s);
634 
635 /* Find the first occurrence of UC in STR.  */
636 /* Similar to strchr(), wcschr().  */
637 extern uint8_t *
638        u8_strchr (const uint8_t *str, ucs4_t uc)
639        _UC_ATTRIBUTE_PURE;
640 extern uint16_t *
641        u16_strchr (const uint16_t *str, ucs4_t uc)
642        _UC_ATTRIBUTE_PURE;
643 extern uint32_t *
644        u32_strchr (const uint32_t *str, ucs4_t uc)
645        _UC_ATTRIBUTE_PURE;
646 
647 /* Find the last occurrence of UC in STR.  */
648 /* Similar to strrchr(), wcsrchr().  */
649 extern uint8_t *
650        u8_strrchr (const uint8_t *str, ucs4_t uc)
651        _UC_ATTRIBUTE_PURE;
652 extern uint16_t *
653        u16_strrchr (const uint16_t *str, ucs4_t uc)
654        _UC_ATTRIBUTE_PURE;
655 extern uint32_t *
656        u32_strrchr (const uint32_t *str, ucs4_t uc)
657        _UC_ATTRIBUTE_PURE;
658 
659 /* Return the length of the initial segment of STR which consists entirely
660    of Unicode characters not in REJECT.  */
661 /* Similar to strcspn(), wcscspn().  */
662 extern size_t
663        u8_strcspn (const uint8_t *str, const uint8_t *reject)
664        _UC_ATTRIBUTE_PURE;
665 extern size_t
666        u16_strcspn (const uint16_t *str, const uint16_t *reject)
667        _UC_ATTRIBUTE_PURE;
668 extern size_t
669        u32_strcspn (const uint32_t *str, const uint32_t *reject)
670        _UC_ATTRIBUTE_PURE;
671 
672 /* Return the length of the initial segment of STR which consists entirely
673    of Unicode characters in ACCEPT.  */
674 /* Similar to strspn(), wcsspn().  */
675 extern size_t
676        u8_strspn (const uint8_t *str, const uint8_t *accept)
677        _UC_ATTRIBUTE_PURE;
678 extern size_t
679        u16_strspn (const uint16_t *str, const uint16_t *accept)
680        _UC_ATTRIBUTE_PURE;
681 extern size_t
682        u32_strspn (const uint32_t *str, const uint32_t *accept)
683        _UC_ATTRIBUTE_PURE;
684 
685 /* Find the first occurrence in STR of any character in ACCEPT.  */
686 /* Similar to strpbrk(), wcspbrk().  */
687 extern uint8_t *
688        u8_strpbrk (const uint8_t *str, const uint8_t *accept)
689        _UC_ATTRIBUTE_PURE;
690 extern uint16_t *
691        u16_strpbrk (const uint16_t *str, const uint16_t *accept)
692        _UC_ATTRIBUTE_PURE;
693 extern uint32_t *
694        u32_strpbrk (const uint32_t *str, const uint32_t *accept)
695        _UC_ATTRIBUTE_PURE;
696 
697 /* Find the first occurrence of NEEDLE in HAYSTACK.  */
698 /* Similar to strstr(), wcsstr().  */
699 extern uint8_t *
700        u8_strstr (const uint8_t *haystack, const uint8_t *needle)
701        _UC_ATTRIBUTE_PURE;
702 extern uint16_t *
703        u16_strstr (const uint16_t *haystack, const uint16_t *needle)
704        _UC_ATTRIBUTE_PURE;
705 extern uint32_t *
706        u32_strstr (const uint32_t *haystack, const uint32_t *needle)
707        _UC_ATTRIBUTE_PURE;
708 
709 /* Test whether STR starts with PREFIX.  */
710 extern bool
711        u8_startswith (const uint8_t *str, const uint8_t *prefix)
712        _UC_ATTRIBUTE_PURE;
713 extern bool
714        u16_startswith (const uint16_t *str, const uint16_t *prefix)
715        _UC_ATTRIBUTE_PURE;
716 extern bool
717        u32_startswith (const uint32_t *str, const uint32_t *prefix)
718        _UC_ATTRIBUTE_PURE;
719 
720 /* Test whether STR ends with SUFFIX.  */
721 extern bool
722        u8_endswith (const uint8_t *str, const uint8_t *suffix)
723        _UC_ATTRIBUTE_PURE;
724 extern bool
725        u16_endswith (const uint16_t *str, const uint16_t *suffix)
726        _UC_ATTRIBUTE_PURE;
727 extern bool
728        u32_endswith (const uint32_t *str, const uint32_t *suffix)
729        _UC_ATTRIBUTE_PURE;
730 
731 /* Divide STR into tokens separated by characters in DELIM.
732    This interface is actually more similar to wcstok than to strtok.  */
733 /* Similar to strtok_r(), wcstok().  */
734 extern uint8_t *
735        u8_strtok (uint8_t *_UC_RESTRICT str, const uint8_t *delim,
736                   uint8_t **ptr);
737 extern uint16_t *
738        u16_strtok (uint16_t *_UC_RESTRICT str, const uint16_t *delim,
739                    uint16_t **ptr);
740 extern uint32_t *
741        u32_strtok (uint32_t *_UC_RESTRICT str, const uint32_t *delim,
742                    uint32_t **ptr);
743 
744 
745 #ifdef __cplusplus
746 }
747 #endif
748 
749 #endif /* _UNISTR_H */
750