1@node unistr.h
2@chapter Elementary Unicode string functions @code{<unistr.h>}
3
4This include file declares elementary functions for Unicode strings.  It is
5essentially the equivalent of what @code{<string.h>} is for C strings.
6
7@menu
8* Elementary string checks::
9* Elementary string conversions::
10* Elementary string functions::
11* Elementary string functions with memory allocation::
12* Elementary string functions on NUL terminated strings::
13@end menu
14
15@node Elementary string checks
16@section Elementary string checks
17
18@cindex validity
19@cindex verification
20@cindex well-formed
21The following function is available to verify the integrity of a Unicode string.
22
23@deftypefun {const uint8_t *} u8_check (const uint8_t *@var{s}, size_t @var{n})
24@deftypefunx {const uint16_t *} u16_check (const uint16_t *@var{s}, size_t @var{n})
25@deftypefunx {const uint32_t *} u32_check (const uint32_t *@var{s}, size_t @var{n})
26This function checks whether a Unicode string is well-formed.
27It returns NULL if valid, or a pointer to the first invalid unit otherwise.
28@end deftypefun
29
30@node Elementary string conversions
31@section Elementary string conversions
32
33@cindex converting
34The following functions perform conversions between the different forms of Unicode strings.
35
36@deftypefun {uint16_t *} u8_to_u16 (const uint8_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
37Converts an UTF-8 string to an UTF-16 string.
38
39The @var{resultbuf} and @var{lengthp} arguments are as described in
40chapter @ref{Conventions}.
41@end deftypefun
42
43@deftypefun {uint32_t *} u8_to_u32 (const uint8_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
44Converts an UTF-8 string to an UTF-32 string.
45
46The @var{resultbuf} and @var{lengthp} arguments are as described in
47chapter @ref{Conventions}.
48@end deftypefun
49
50@deftypefun {uint8_t *} u16_to_u8 (const uint16_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
51Converts an UTF-16 string to an UTF-8 string.
52
53The @var{resultbuf} and @var{lengthp} arguments are as described in
54chapter @ref{Conventions}.
55@end deftypefun
56
57@deftypefun {uint32_t *} u16_to_u32 (const uint16_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp})
58Converts an UTF-16 string to an UTF-32 string.
59
60The @var{resultbuf} and @var{lengthp} arguments are as described in
61chapter @ref{Conventions}.
62@end deftypefun
63
64@deftypefun {uint8_t *} u32_to_u8 (const uint32_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp})
65Converts an UTF-32 string to an UTF-8 string.
66
67The @var{resultbuf} and @var{lengthp} arguments are as described in
68chapter @ref{Conventions}.
69@end deftypefun
70
71@deftypefun {uint16_t *} u32_to_u16 (const uint32_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp})
72Converts an UTF-32 string to an UTF-16 string.
73
74The @var{resultbuf} and @var{lengthp} arguments are as described in
75chapter @ref{Conventions}.
76@end deftypefun
77
78@node Elementary string functions
79@section Elementary string functions
80
81@menu
82* Iterating::
83* Creating Unicode strings::
84* Copying Unicode strings::
85* Comparing Unicode strings::
86* Searching for a character::
87* Counting characters::
88@end menu
89
90@node Iterating
91@subsection Iterating over a Unicode string
92
93@cindex iterating
94The following functions inspect and return details about the first character
95in a Unicode string.
96
97@deftypefun int u8_mblen (const uint8_t *@var{s}, size_t @var{n})
98@deftypefunx int u16_mblen (const uint16_t *@var{s}, size_t @var{n})
99@deftypefunx int u32_mblen (const uint32_t *@var{s}, size_t @var{n})
100Returns the length (number of units) of the first character in @var{s}, which
101is no longer than @var{n}.  Returns 0 if it is the NUL character.  Returns -1
102upon failure.
103
104This function is similar to @posixfunc{mblen}, except that it operates on a
105Unicode string and that @var{s} must not be NULL.
106@end deftypefun
107
108@deftypefun int u8_mbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n})
109@deftypefunx int u16_mbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n})
110@deftypefunx int u32_mbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n})
111Returns the length (number of units) of the first character in @var{s},
112putting its @code{ucs4_t} representation in @code{*@var{puc}}.  Upon failure,
113@code{*@var{puc}} is set to @code{0xfffd}, and an appropriate number of units
114is returned.
115
116The number of available units, @var{n}, must be > 0.
117
118This function fails if an invalid sequence of units is encountered at the
119beginning of @var{s}, or if additional units (after the @var{n} provided units)
120would be needed to form a character.
121
122This function is similar to @posixfunc{mbtowc}, except that it operates on a
123Unicode string, @var{puc} and @var{s} must not be NULL, @var{n} must be > 0,
124and the NUL character is not treated specially.
125@end deftypefun
126
127@deftypefun int u8_mbtouc_unsafe (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n})
128@deftypefunx int u16_mbtouc_unsafe (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n})
129@deftypefunx int u32_mbtouc_unsafe (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n})
130This function is identical to @code{u8_mbtouc}/@code{u16_mbtouc}/@code{u32_mbtouc}.
131Earlier versions of this function performed fewer range-checks on the sequence
132of units.
133@end deftypefun
134
135@deftypefun int u8_mbtoucr (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n})
136@deftypefunx int u16_mbtoucr (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n})
137@deftypefunx int u32_mbtoucr (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n})
138Returns the length (number of units) of the first character in @var{s},
139putting its @code{ucs4_t} representation in @code{*@var{puc}}.  Upon failure,
140@code{*@var{puc}} is set to @code{0xfffd}, and -1 is returned for an invalid
141sequence of units, -2 is returned for an incomplete sequence of units.
142
143The number of available units, @var{n}, must be > 0.
144
145This function is similar to @code{u8_mbtouc}, except that the return value
146gives more details about the failure, similar to @posixfunc{mbrtowc}.
147@end deftypefun
148
149@node Creating Unicode strings
150@subsection Creating Unicode strings one character at a time
151
152The following function stores a Unicode character as a Unicode string in
153memory.
154
155@deftypefun int u8_uctomb (uint8_t *@var{s}, ucs4_t @var{uc}, int @var{n})
156@deftypefunx int u16_uctomb (uint16_t *@var{s}, ucs4_t @var{uc}, int @var{n})
157@deftypefunx int u32_uctomb (uint32_t *@var{s}, ucs4_t @var{uc}, int @var{n})
158Puts the multibyte character represented by @var{uc} in @var{s}, returning its
159length.  Returns -1 upon failure, -2 if the number of available units, @var{n},
160is too small.  The latter case cannot occur if @var{n} >= 6/2/1, respectively.
161
162This function is similar to @posixfunc{wctomb}, except that it operates on a
163Unicode strings, @var{s} must not be NULL, and the argument @var{n} must be
164specified.
165@end deftypefun
166
167@node Copying Unicode strings
168@subsection Copying Unicode strings
169
170@cindex copying
171The following functions copy Unicode strings in memory.
172
173@deftypefun {uint8_t *} u8_cpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n})
174@deftypefunx {uint16_t *} u16_cpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n})
175@deftypefunx {uint32_t *} u32_cpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n})
176Copies @var{n} units from @var{src} to @var{dest}.
177
178This function is similar to @posixfunc{memcpy}, except that it operates on
179Unicode strings.
180@end deftypefun
181
182@deftypefun {uint8_t *} u8_move (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n})
183@deftypefunx {uint16_t *} u16_move (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n})
184@deftypefunx {uint32_t *} u32_move (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n})
185Copies @var{n} units from @var{src} to @var{dest}, guaranteeing correct
186behavior for overlapping memory areas.
187
188This function is similar to @posixfunc{memmove}, except that it operates on
189Unicode strings.
190@end deftypefun
191
192The following function fills a Unicode string.
193
194@deftypefun {uint8_t *} u8_set (uint8_t *@var{s}, ucs4_t @var{uc}, size_t @var{n})
195@deftypefunx {uint16_t *} u16_set (uint16_t *@var{s}, ucs4_t @var{uc}, size_t @var{n})
196@deftypefunx {uint32_t *} u32_set (uint32_t *@var{s}, ucs4_t @var{uc}, size_t @var{n})
197Sets the first @var{n} characters of @var{s} to @var{uc}.  @var{uc} should be
198a character that occupies only 1 unit.
199
200This function is similar to @posixfunc{memset}, except that it operates on
201Unicode strings.
202@end deftypefun
203
204@node Comparing Unicode strings
205@subsection Comparing Unicode strings
206
207@cindex comparing
208The following function compares two Unicode strings of the same length.
209
210@deftypefun int u8_cmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n})
211@deftypefunx int u16_cmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n})
212@deftypefunx int u32_cmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n})
213Compares @var{s1} and @var{s2}, each of length @var{n}, lexicographically.
214Returns a negative value if @var{s1} compares smaller than @var{s2},
215a positive value if @var{s1} compares larger than @var{s2}, or 0 if
216they compare equal.
217
218This function is similar to @posixfunc{memcmp}, except that it operates on
219Unicode strings.
220@end deftypefun
221
222The following function compares two Unicode strings of possibly different
223lengths.
224
225@deftypefun int u8_cmp2 (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2})
226@deftypefunx int u16_cmp2 (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2})
227@deftypefunx int u32_cmp2 (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2})
228Compares @var{s1} and @var{s2}, lexicographically.
229Returns a negative value if @var{s1} compares smaller than @var{s2},
230a positive value if @var{s1} compares larger than @var{s2}, or 0 if
231they compare equal.
232
233This function is similar to the gnulib function @func{memcmp2}, except that it
234operates on Unicode strings.
235@end deftypefun
236
237@node Searching for a character
238@subsection Searching for a character in a Unicode string
239
240@cindex searching, for a character
241The following function searches for a given Unicode character.
242
243@deftypefun {uint8_t *} u8_chr (const uint8_t *@var{s}, size_t @var{n}, ucs4_t @var{uc})
244@deftypefunx {uint16_t *} u16_chr (const uint16_t *@var{s}, size_t @var{n}, ucs4_t @var{uc})
245@deftypefunx {uint32_t *} u32_chr (const uint32_t *@var{s}, size_t @var{n}, ucs4_t @var{uc})
246Searches the string at @var{s} for @var{uc}.  Returns a pointer to the first
247occurrence of @var{uc} in @var{s}, or NULL if @var{uc} does not occur in
248@var{s}.
249
250This function is similar to @posixfunc{memchr}, except that it operates on
251Unicode strings.
252@end deftypefun
253
254@node Counting characters
255@subsection Counting the characters in a Unicode string
256
257@cindex counting
258The following function counts the number of Unicode characters.
259
260@deftypefun size_t u8_mbsnlen (const uint8_t *@var{s}, size_t @var{n})
261@deftypefunx size_t u16_mbsnlen (const uint16_t *@var{s}, size_t @var{n})
262@deftypefunx size_t u32_mbsnlen (const uint32_t *@var{s}, size_t @var{n})
263Counts and returns the number of Unicode characters in the @var{n} units
264from @var{s}.
265
266This function is similar to the gnulib function @func{mbsnlen}, except that
267it operates on Unicode strings.
268@end deftypefun
269
270@node Elementary string functions with memory allocation
271@section Elementary string functions with memory allocation
272
273@cindex duplicating
274The following function copies a Unicode string.
275
276@deftypefun {uint8_t *} u8_cpy_alloc (const uint8_t *@var{s}, size_t @var{n})
277@deftypefunx {uint16_t *} u16_cpy_alloc (const uint16_t *@var{s}, size_t @var{n})
278@deftypefunx {uint32_t *} u32_cpy_alloc (const uint32_t *@var{s}, size_t @var{n})
279Makes a freshly allocated copy of @var{s}, of length @var{n}.
280@end deftypefun
281
282@node Elementary string functions on NUL terminated strings
283@section Elementary string functions on NUL terminated strings
284
285@menu
286* Iterating over a NUL terminated Unicode string::
287* Length::
288* Copying a NUL terminated Unicode string::
289* Comparing NUL terminated Unicode strings::
290* Duplicating a NUL terminated Unicode string::
291* Searching for a character in a NUL terminated Unicode string::
292* Searching for a substring::
293* Tokenizing::
294@end menu
295
296@node Iterating over a NUL terminated Unicode string
297@subsection Iterating over a NUL terminated Unicode string
298
299The following functions inspect and return details about the first character
300in a Unicode string.
301
302@deftypefun int u8_strmblen (const uint8_t *@var{s})
303@deftypefunx int u16_strmblen (const uint16_t *@var{s})
304@deftypefunx int u32_strmblen (const uint32_t *@var{s})
305Returns the length (number of units) of the first character in @var{s}.
306Returns 0 if it is the NUL character.  Returns -1 upon failure.
307@end deftypefun
308
309@cindex iterating
310@deftypefun int u8_strmbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s})
311@deftypefunx int u16_strmbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s})
312@deftypefunx int u32_strmbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s})
313Returns the length (number of units) of the first character in @var{s},
314putting its @code{ucs4_t} representation in @code{*@var{puc}}.  Returns 0
315if it is the NUL character.  Returns -1 upon failure.
316@end deftypefun
317
318@deftypefun {const uint8_t *} u8_next (ucs4_t *@var{puc}, const uint8_t *@var{s})
319@deftypefunx {const uint16_t *} u16_next (ucs4_t *@var{puc}, const uint16_t *@var{s})
320@deftypefunx {const uint32_t *} u32_next (ucs4_t *@var{puc}, const uint32_t *@var{s})
321Forward iteration step.  Advances the pointer past the next character,
322or returns NULL if the end of the string has been reached.  Puts the
323character's @code{ucs4_t} representation in @code{*@var{puc}}.
324@end deftypefun
325
326The following function inspects and returns details about the previous
327character in a Unicode string.
328
329@deftypefun {const uint8_t *} u8_prev (ucs4_t *@var{puc}, const uint8_t *@var{s}, const uint8_t *@var{start})
330@deftypefunx {const uint16_t *} u16_prev (ucs4_t *@var{puc}, const uint16_t *@var{s}, const uint16_t *@var{start})
331@deftypefunx {const uint32_t *} u32_prev (ucs4_t *@var{puc}, const uint32_t *@var{s}, const uint32_t *@var{start})
332Backward iteration step.  Advances the pointer to point to the previous
333character (the one that ends at @code{@var{s}}), or returns NULL if the
334beginning of the string (specified by @code{@var{start}}) had been reached.
335Puts the character's @code{ucs4_t} representation in @code{*@var{puc}}.
336Note that this function works only on well-formed Unicode strings.
337@end deftypefun
338
339@node Length
340@subsection Length of a NUL terminated Unicode string
341
342The following functions determine the length of a Unicode string.
343
344@deftypefun size_t u8_strlen (const uint8_t *@var{s})
345@deftypefunx size_t u16_strlen (const uint16_t *@var{s})
346@deftypefunx size_t u32_strlen (const uint32_t *@var{s})
347Returns the number of units in @var{s}.
348
349This function is similar to @posixfunc{strlen} and @posixfunc{wcslen}, except
350that it operates on Unicode strings.
351@end deftypefun
352
353@deftypefun size_t u8_strnlen (const uint8_t *@var{s}, size_t @var{maxlen})
354@deftypefunx size_t u16_strnlen (const uint16_t *@var{s}, size_t @var{maxlen})
355@deftypefunx size_t u32_strnlen (const uint32_t *@var{s}, size_t @var{maxlen})
356Returns the number of units in @var{s}, but at most @var{maxlen}.
357
358This function is similar to @posixfunc{strnlen} and @posixfunc{wcsnlen}, except
359that it operates on Unicode strings.
360@end deftypefun
361
362@node Copying a NUL terminated Unicode string
363@subsection Copying a NUL terminated Unicode string
364
365@cindex copying
366The following functions copy portions of Unicode strings in memory.
367
368@deftypefun {uint8_t *} u8_strcpy (uint8_t *@var{dest}, const uint8_t *@var{src})
369@deftypefunx {uint16_t *} u16_strcpy (uint16_t *@var{dest}, const uint16_t *@var{src})
370@deftypefunx {uint32_t *} u32_strcpy (uint32_t *@var{dest}, const uint32_t *@var{src})
371Copies @var{src} to @var{dest}.
372
373This function is similar to @posixfunc{strcpy} and @posixfunc{wcscpy}, except
374that it operates on Unicode strings.
375@end deftypefun
376
377@deftypefun {uint8_t *} u8_stpcpy (uint8_t *@var{dest}, const uint8_t *@var{src})
378@deftypefunx {uint16_t *} u16_stpcpy (uint16_t *@var{dest}, const uint16_t *@var{src})
379@deftypefunx {uint32_t *} u32_stpcpy (uint32_t *@var{dest}, const uint32_t *@var{src})
380Copies @var{src} to @var{dest}, returning the address of the terminating NUL
381in @var{dest}.
382
383This function is similar to @posixfunc{stpcpy}, except that it operates on
384Unicode strings.
385@end deftypefun
386
387@deftypefun {uint8_t *} u8_strncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n})
388@deftypefunx {uint16_t *} u16_strncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n})
389@deftypefunx {uint32_t *} u32_strncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n})
390Copies no more than @var{n} units of @var{src} to @var{dest}.
391
392This function is similar to @posixfunc{strncpy} and @posixfunc{wcsncpy}, except
393that it operates on Unicode strings.
394@end deftypefun
395
396@deftypefun {uint8_t *} u8_stpncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n})
397@deftypefunx {uint16_t *} u16_stpncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n})
398@deftypefunx {uint32_t *} u32_stpncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n})
399Copies no more than @var{n} units of @var{src} to @var{dest}.  Returns a
400pointer past the last non-NUL unit written into @var{dest}.  In other words,
401if the units written into @var{dest} include a NUL, the return value is the
402address of the first such NUL unit, otherwise it is
403@code{@var{dest} + @var{n}}.
404
405This function is similar to @posixfunc{stpncpy}, except that it operates on
406Unicode strings.
407@end deftypefun
408
409@deftypefun {uint8_t *} u8_strcat (uint8_t *@var{dest}, const uint8_t *@var{src})
410@deftypefunx {uint16_t *} u16_strcat (uint16_t *@var{dest}, const uint16_t *@var{src})
411@deftypefunx {uint32_t *} u32_strcat (uint32_t *@var{dest}, const uint32_t *@var{src})
412Appends @var{src} onto @var{dest}.
413
414This function is similar to @posixfunc{strcat} and @posixfunc{wcscat}, except
415that it operates on Unicode strings.
416@end deftypefun
417
418@deftypefun {uint8_t *} u8_strncat (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n})
419@deftypefunx {uint16_t *} u16_strncat (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n})
420@deftypefunx {uint32_t *} u32_strncat (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n})
421Appends no more than @var{n} units of @var{src} onto @var{dest}.
422
423This function is similar to @posixfunc{strncat} and @posixfunc{wcsncat}, except
424that it operates on Unicode strings.
425@end deftypefun
426
427@node Comparing NUL terminated Unicode strings
428@subsection Comparing NUL terminated Unicode strings
429
430@cindex comparing
431The following functions compare two Unicode strings.
432
433@deftypefun int u8_strcmp (const uint8_t *@var{s1}, const uint8_t *@var{s2})
434@deftypefunx int u16_strcmp (const uint16_t *@var{s1}, const uint16_t *@var{s2})
435@deftypefunx int u32_strcmp (const uint32_t *@var{s1}, const uint32_t *@var{s2})
436Compares @var{s1} and @var{s2}, lexicographically.
437Returns a negative value if @var{s1} compares smaller than @var{s2},
438a positive value if @var{s1} compares larger than @var{s2}, or 0 if
439they compare equal.
440
441This function is similar to @posixfunc{strcmp} and @posixfunc{wcscmp}, except
442that it operates on Unicode strings.
443@end deftypefun
444
445@cindex comparing, with collation rules
446@deftypefun int u8_strcoll (const uint8_t *@var{s1}, const uint8_t *@var{s2})
447@deftypefunx int u16_strcoll (const uint16_t *@var{s1}, const uint16_t *@var{s2})
448@deftypefunx int u32_strcoll (const uint32_t *@var{s1}, const uint32_t *@var{s2})
449Compares @var{s1} and @var{s2} using the collation rules of the current
450locale.
451Returns -1 if @var{s1} < @var{s2}, 0 if @var{s1} = @var{s2}, 1 if
452@var{s1} > @var{s2}.  Upon failure, sets @code{errno} and returns any value.
453
454This function is similar to @posixfunc{strcoll} and @posixfunc{wcscoll}, except
455that it operates on Unicode strings.
456
457Note that this function may consider different canonical normalizations
458of the same string as having a large distance.  It is therefore better to
459use the function @code{u8_normcoll} instead of this one; see @ref{uninorm.h}.
460@end deftypefun
461
462@deftypefun int u8_strncmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n})
463@deftypefunx int u16_strncmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n})
464@deftypefunx int u32_strncmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n})
465Compares no more than @var{n} units of @var{s1} and @var{s2}.
466
467This function is similar to @posixfunc{strncmp} and @posixfunc{wcsncmp}, except
468that it operates on Unicode strings.
469@end deftypefun
470
471@node Duplicating a NUL terminated Unicode string
472@subsection Duplicating a NUL terminated Unicode string
473
474@cindex duplicating
475The following function allocates a duplicate of a Unicode string.
476
477@deftypefun {uint8_t *} u8_strdup (const uint8_t *@var{s})
478@deftypefunx {uint16_t *} u16_strdup (const uint16_t *@var{s})
479@deftypefunx {uint32_t *} u32_strdup (const uint32_t *@var{s})
480Duplicates @var{s}, returning an identical malloc'd string.
481
482This function is similar to @posixfunc{strdup} and @posixfunc{wcsdup}, except
483that it operates on Unicode strings.
484@end deftypefun
485
486@node Searching for a character in a NUL terminated Unicode string
487@subsection Searching for a character in a NUL terminated Unicode string
488
489@cindex searching, for a character
490The following functions search for a given Unicode character.
491
492@deftypefun {uint8_t *} u8_strchr (const uint8_t *@var{str}, ucs4_t @var{uc})
493@deftypefunx {uint16_t *} u16_strchr (const uint16_t *@var{str}, ucs4_t @var{uc})
494@deftypefunx {uint32_t *} u32_strchr (const uint32_t *@var{str}, ucs4_t @var{uc})
495Finds the first occurrence of @var{uc} in @var{str}.
496
497This function is similar to @posixfunc{strchr} and @posixfunc{wcschr}, except
498that it operates on Unicode strings.
499@end deftypefun
500
501@deftypefun {uint8_t *} u8_strrchr (const uint8_t *@var{str}, ucs4_t @var{uc})
502@deftypefunx {uint16_t *} u16_strrchr (const uint16_t *@var{str}, ucs4_t @var{uc})
503@deftypefunx {uint32_t *} u32_strrchr (const uint32_t *@var{str}, ucs4_t @var{uc})
504Finds the last occurrence of @var{uc} in @var{str}.
505
506This function is similar to @posixfunc{strrchr} and @posixfunc{wcsrchr}, except
507that it operates on Unicode strings.
508@end deftypefun
509
510The following functions search for the first occurrence of some Unicode
511character in or outside a given set of Unicode characters.
512
513@deftypefun size_t u8_strcspn (const uint8_t *@var{str}, const uint8_t *@var{reject})
514@deftypefunx size_t u16_strcspn (const uint16_t *@var{str}, const uint16_t *@var{reject})
515@deftypefunx size_t u32_strcspn (const uint32_t *@var{str}, const uint32_t *@var{reject})
516Returns the length of the initial segment of @var{str} which consists entirely
517of Unicode characters not in @var{reject}.
518
519This function is similar to @posixfunc{strcspn} and @posixfunc{wcscspn}, except
520that it operates on Unicode strings.
521@end deftypefun
522
523@deftypefun size_t u8_strspn (const uint8_t *@var{str}, const uint8_t *@var{accept})
524@deftypefunx size_t u16_strspn (const uint16_t *@var{str}, const uint16_t *@var{accept})
525@deftypefunx size_t u32_strspn (const uint32_t *@var{str}, const uint32_t *@var{accept})
526Returns the length of the initial segment of @var{str} which consists entirely
527of Unicode characters in @var{accept}.
528
529This function is similar to @posixfunc{strspn} and @posixfunc{wcsspn}, except
530that it operates on Unicode strings.
531@end deftypefun
532
533@deftypefun {uint8_t *} u8_strpbrk (const uint8_t *@var{str}, const uint8_t *@var{accept})
534@deftypefunx {uint16_t *} u16_strpbrk (const uint16_t *@var{str}, const uint16_t *@var{accept})
535@deftypefunx {uint32_t *} u32_strpbrk (const uint32_t *@var{str}, const uint32_t *@var{accept})
536Finds the first occurrence in @var{str} of any character in @var{accept}.
537
538This function is similar to @posixfunc{strpbrk} and @posixfunc{wcspbrk}, except
539that it operates on Unicode strings.
540@end deftypefun
541
542@node Searching for a substring
543@subsection Searching for a substring in a NUL terminated Unicode string
544
545@cindex searching, for a substring
546The following functions search whether a given Unicode string is a substring
547of another Unicode string.
548
549@deftypefun {uint8_t *} u8_strstr (const uint8_t *@var{haystack}, const uint8_t *@var{needle})
550@deftypefunx {uint16_t *} u16_strstr (const uint16_t *@var{haystack}, const uint16_t *@var{needle})
551@deftypefunx {uint32_t *} u32_strstr (const uint32_t *@var{haystack}, const uint32_t *@var{needle})
552Finds the first occurrence of @var{needle} in @var{haystack}.
553
554This function is similar to @posixfunc{strstr} and @posixfunc{wcsstr}, except
555that it operates on Unicode strings.
556@end deftypefun
557
558@deftypefun bool u8_startswith (const uint8_t *@var{str}, const uint8_t *@var{prefix})
559@deftypefunx bool u16_startswith (const uint16_t *@var{str}, const uint16_t *@var{prefix})
560@deftypefunx bool u32_startswith (const uint32_t *@var{str}, const uint32_t *@var{prefix})
561Tests whether @var{str} starts with @var{prefix}.
562@end deftypefun
563
564@deftypefun bool u8_endswith (const uint8_t *@var{str}, const uint8_t *@var{suffix})
565@deftypefunx bool u16_endswith (const uint16_t *@var{str}, const uint16_t *@var{suffix})
566@deftypefunx bool u32_endswith (const uint32_t *@var{str}, const uint32_t *@var{suffix})
567Tests whether @var{str} ends with @var{suffix}.
568@end deftypefun
569
570@node Tokenizing
571@subsection Tokenizing a NUL terminated Unicode string
572
573The following function does one step in tokenizing a Unicode string.
574
575@deftypefun {uint8_t *} u8_strtok (uint8_t *@var{str}, const uint8_t *@var{delim}, uint8_t **@var{ptr})
576@deftypefunx {uint16_t *} u16_strtok (uint16_t *@var{str}, const uint16_t *@var{delim}, uint16_t **@var{ptr})
577@deftypefunx {uint32_t *} u32_strtok (uint32_t *@var{str}, const uint32_t *@var{delim}, uint32_t **@var{ptr})
578Divides @var{str} into tokens separated by characters in @var{delim}.
579
580This function is similar to @posixfunc{strtok_r} and @posixfunc{wcstok}, except
581that it operates on Unicode strings.  Its interface is actually more similar to
582@code{wcstok} than to @code{strtok}.
583@end deftypefun
584