1@node unistr.h 2@chapter Elementary Unicode string functions @code{<unistr.h>} 3 4This include file declares elementary functions for Unicode strings. It is 5essentially the equivalent of what @code{<string.h>} is for C strings. 6 7@menu 8* Elementary string checks:: 9* Elementary string conversions:: 10* Elementary string functions:: 11* Elementary string functions with memory allocation:: 12* Elementary string functions on NUL terminated strings:: 13@end menu 14 15@node Elementary string checks 16@section Elementary string checks 17 18@cindex validity 19@cindex verification 20@cindex well-formed 21The following function is available to verify the integrity of a Unicode string. 22 23@deftypefun {const uint8_t *} u8_check (const uint8_t *@var{s}, size_t @var{n}) 24@deftypefunx {const uint16_t *} u16_check (const uint16_t *@var{s}, size_t @var{n}) 25@deftypefunx {const uint32_t *} u32_check (const uint32_t *@var{s}, size_t @var{n}) 26This function checks whether a Unicode string is well-formed. 27It returns NULL if valid, or a pointer to the first invalid unit otherwise. 28@end deftypefun 29 30@node Elementary string conversions 31@section Elementary string conversions 32 33@cindex converting 34The following functions perform conversions between the different forms of Unicode strings. 35 36@deftypefun {uint16_t *} u8_to_u16 (const uint8_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) 37Converts an UTF-8 string to an UTF-16 string. 38 39The @var{resultbuf} and @var{lengthp} arguments are as described in 40chapter @ref{Conventions}. 41@end deftypefun 42 43@deftypefun {uint32_t *} u8_to_u32 (const uint8_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) 44Converts an UTF-8 string to an UTF-32 string. 45 46The @var{resultbuf} and @var{lengthp} arguments are as described in 47chapter @ref{Conventions}. 48@end deftypefun 49 50@deftypefun {uint8_t *} u16_to_u8 (const uint16_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) 51Converts an UTF-16 string to an UTF-8 string. 52 53The @var{resultbuf} and @var{lengthp} arguments are as described in 54chapter @ref{Conventions}. 55@end deftypefun 56 57@deftypefun {uint32_t *} u16_to_u32 (const uint16_t *@var{s}, size_t @var{n}, uint32_t *@var{resultbuf}, size_t *@var{lengthp}) 58Converts an UTF-16 string to an UTF-32 string. 59 60The @var{resultbuf} and @var{lengthp} arguments are as described in 61chapter @ref{Conventions}. 62@end deftypefun 63 64@deftypefun {uint8_t *} u32_to_u8 (const uint32_t *@var{s}, size_t @var{n}, uint8_t *@var{resultbuf}, size_t *@var{lengthp}) 65Converts an UTF-32 string to an UTF-8 string. 66 67The @var{resultbuf} and @var{lengthp} arguments are as described in 68chapter @ref{Conventions}. 69@end deftypefun 70 71@deftypefun {uint16_t *} u32_to_u16 (const uint32_t *@var{s}, size_t @var{n}, uint16_t *@var{resultbuf}, size_t *@var{lengthp}) 72Converts an UTF-32 string to an UTF-16 string. 73 74The @var{resultbuf} and @var{lengthp} arguments are as described in 75chapter @ref{Conventions}. 76@end deftypefun 77 78@node Elementary string functions 79@section Elementary string functions 80 81@menu 82* Iterating:: 83* Creating Unicode strings:: 84* Copying Unicode strings:: 85* Comparing Unicode strings:: 86* Searching for a character:: 87* Counting characters:: 88@end menu 89 90@node Iterating 91@subsection Iterating over a Unicode string 92 93@cindex iterating 94The following functions inspect and return details about the first character 95in a Unicode string. 96 97@deftypefun int u8_mblen (const uint8_t *@var{s}, size_t @var{n}) 98@deftypefunx int u16_mblen (const uint16_t *@var{s}, size_t @var{n}) 99@deftypefunx int u32_mblen (const uint32_t *@var{s}, size_t @var{n}) 100Returns the length (number of units) of the first character in @var{s}, which 101is no longer than @var{n}. Returns 0 if it is the NUL character. Returns -1 102upon failure. 103 104This function is similar to @posixfunc{mblen}, except that it operates on a 105Unicode string and that @var{s} must not be NULL. 106@end deftypefun 107 108@deftypefun int u8_mbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) 109@deftypefunx int u16_mbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) 110@deftypefunx int u32_mbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) 111Returns the length (number of units) of the first character in @var{s}, 112putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure, 113@code{*@var{puc}} is set to @code{0xfffd}, and an appropriate number of units 114is returned. 115 116The number of available units, @var{n}, must be > 0. 117 118This function fails if an invalid sequence of units is encountered at the 119beginning of @var{s}, or if additional units (after the @var{n} provided units) 120would be needed to form a character. 121 122This function is similar to @posixfunc{mbtowc}, except that it operates on a 123Unicode string, @var{puc} and @var{s} must not be NULL, @var{n} must be > 0, 124and the NUL character is not treated specially. 125@end deftypefun 126 127@deftypefun int u8_mbtouc_unsafe (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) 128@deftypefunx int u16_mbtouc_unsafe (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) 129@deftypefunx int u32_mbtouc_unsafe (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) 130This function is identical to @code{u8_mbtouc}/@code{u16_mbtouc}/@code{u32_mbtouc}. 131Earlier versions of this function performed fewer range-checks on the sequence 132of units. 133@end deftypefun 134 135@deftypefun int u8_mbtoucr (ucs4_t *@var{puc}, const uint8_t *@var{s}, size_t @var{n}) 136@deftypefunx int u16_mbtoucr (ucs4_t *@var{puc}, const uint16_t *@var{s}, size_t @var{n}) 137@deftypefunx int u32_mbtoucr (ucs4_t *@var{puc}, const uint32_t *@var{s}, size_t @var{n}) 138Returns the length (number of units) of the first character in @var{s}, 139putting its @code{ucs4_t} representation in @code{*@var{puc}}. Upon failure, 140@code{*@var{puc}} is set to @code{0xfffd}, and -1 is returned for an invalid 141sequence of units, -2 is returned for an incomplete sequence of units. 142 143The number of available units, @var{n}, must be > 0. 144 145This function is similar to @code{u8_mbtouc}, except that the return value 146gives more details about the failure, similar to @posixfunc{mbrtowc}. 147@end deftypefun 148 149@node Creating Unicode strings 150@subsection Creating Unicode strings one character at a time 151 152The following function stores a Unicode character as a Unicode string in 153memory. 154 155@deftypefun int u8_uctomb (uint8_t *@var{s}, ucs4_t @var{uc}, int @var{n}) 156@deftypefunx int u16_uctomb (uint16_t *@var{s}, ucs4_t @var{uc}, int @var{n}) 157@deftypefunx int u32_uctomb (uint32_t *@var{s}, ucs4_t @var{uc}, int @var{n}) 158Puts the multibyte character represented by @var{uc} in @var{s}, returning its 159length. Returns -1 upon failure, -2 if the number of available units, @var{n}, 160is too small. The latter case cannot occur if @var{n} >= 6/2/1, respectively. 161 162This function is similar to @posixfunc{wctomb}, except that it operates on a 163Unicode strings, @var{s} must not be NULL, and the argument @var{n} must be 164specified. 165@end deftypefun 166 167@node Copying Unicode strings 168@subsection Copying Unicode strings 169 170@cindex copying 171The following functions copy Unicode strings in memory. 172 173@deftypefun {uint8_t *} u8_cpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 174@deftypefunx {uint16_t *} u16_cpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 175@deftypefunx {uint32_t *} u32_cpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 176Copies @var{n} units from @var{src} to @var{dest}. 177 178This function is similar to @posixfunc{memcpy}, except that it operates on 179Unicode strings. 180@end deftypefun 181 182@deftypefun {uint8_t *} u8_move (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 183@deftypefunx {uint16_t *} u16_move (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 184@deftypefunx {uint32_t *} u32_move (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 185Copies @var{n} units from @var{src} to @var{dest}, guaranteeing correct 186behavior for overlapping memory areas. 187 188This function is similar to @posixfunc{memmove}, except that it operates on 189Unicode strings. 190@end deftypefun 191 192The following function fills a Unicode string. 193 194@deftypefun {uint8_t *} u8_set (uint8_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) 195@deftypefunx {uint16_t *} u16_set (uint16_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) 196@deftypefunx {uint32_t *} u32_set (uint32_t *@var{s}, ucs4_t @var{uc}, size_t @var{n}) 197Sets the first @var{n} characters of @var{s} to @var{uc}. @var{uc} should be 198a character that occupies only 1 unit. 199 200This function is similar to @posixfunc{memset}, except that it operates on 201Unicode strings. 202@end deftypefun 203 204@node Comparing Unicode strings 205@subsection Comparing Unicode strings 206 207@cindex comparing 208The following function compares two Unicode strings of the same length. 209 210@deftypefun int u8_cmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n}) 211@deftypefunx int u16_cmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n}) 212@deftypefunx int u32_cmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n}) 213Compares @var{s1} and @var{s2}, each of length @var{n}, lexicographically. 214Returns a negative value if @var{s1} compares smaller than @var{s2}, 215a positive value if @var{s1} compares larger than @var{s2}, or 0 if 216they compare equal. 217 218This function is similar to @posixfunc{memcmp}, except that it operates on 219Unicode strings. 220@end deftypefun 221 222The following function compares two Unicode strings of possibly different 223lengths. 224 225@deftypefun int u8_cmp2 (const uint8_t *@var{s1}, size_t @var{n1}, const uint8_t *@var{s2}, size_t @var{n2}) 226@deftypefunx int u16_cmp2 (const uint16_t *@var{s1}, size_t @var{n1}, const uint16_t *@var{s2}, size_t @var{n2}) 227@deftypefunx int u32_cmp2 (const uint32_t *@var{s1}, size_t @var{n1}, const uint32_t *@var{s2}, size_t @var{n2}) 228Compares @var{s1} and @var{s2}, lexicographically. 229Returns a negative value if @var{s1} compares smaller than @var{s2}, 230a positive value if @var{s1} compares larger than @var{s2}, or 0 if 231they compare equal. 232 233This function is similar to the gnulib function @func{memcmp2}, except that it 234operates on Unicode strings. 235@end deftypefun 236 237@node Searching for a character 238@subsection Searching for a character in a Unicode string 239 240@cindex searching, for a character 241The following function searches for a given Unicode character. 242 243@deftypefun {uint8_t *} u8_chr (const uint8_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) 244@deftypefunx {uint16_t *} u16_chr (const uint16_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) 245@deftypefunx {uint32_t *} u32_chr (const uint32_t *@var{s}, size_t @var{n}, ucs4_t @var{uc}) 246Searches the string at @var{s} for @var{uc}. Returns a pointer to the first 247occurrence of @var{uc} in @var{s}, or NULL if @var{uc} does not occur in 248@var{s}. 249 250This function is similar to @posixfunc{memchr}, except that it operates on 251Unicode strings. 252@end deftypefun 253 254@node Counting characters 255@subsection Counting the characters in a Unicode string 256 257@cindex counting 258The following function counts the number of Unicode characters. 259 260@deftypefun size_t u8_mbsnlen (const uint8_t *@var{s}, size_t @var{n}) 261@deftypefunx size_t u16_mbsnlen (const uint16_t *@var{s}, size_t @var{n}) 262@deftypefunx size_t u32_mbsnlen (const uint32_t *@var{s}, size_t @var{n}) 263Counts and returns the number of Unicode characters in the @var{n} units 264from @var{s}. 265 266This function is similar to the gnulib function @func{mbsnlen}, except that 267it operates on Unicode strings. 268@end deftypefun 269 270@node Elementary string functions with memory allocation 271@section Elementary string functions with memory allocation 272 273@cindex duplicating 274The following function copies a Unicode string. 275 276@deftypefun {uint8_t *} u8_cpy_alloc (const uint8_t *@var{s}, size_t @var{n}) 277@deftypefunx {uint16_t *} u16_cpy_alloc (const uint16_t *@var{s}, size_t @var{n}) 278@deftypefunx {uint32_t *} u32_cpy_alloc (const uint32_t *@var{s}, size_t @var{n}) 279Makes a freshly allocated copy of @var{s}, of length @var{n}. 280@end deftypefun 281 282@node Elementary string functions on NUL terminated strings 283@section Elementary string functions on NUL terminated strings 284 285@menu 286* Iterating over a NUL terminated Unicode string:: 287* Length:: 288* Copying a NUL terminated Unicode string:: 289* Comparing NUL terminated Unicode strings:: 290* Duplicating a NUL terminated Unicode string:: 291* Searching for a character in a NUL terminated Unicode string:: 292* Searching for a substring:: 293* Tokenizing:: 294@end menu 295 296@node Iterating over a NUL terminated Unicode string 297@subsection Iterating over a NUL terminated Unicode string 298 299The following functions inspect and return details about the first character 300in a Unicode string. 301 302@deftypefun int u8_strmblen (const uint8_t *@var{s}) 303@deftypefunx int u16_strmblen (const uint16_t *@var{s}) 304@deftypefunx int u32_strmblen (const uint32_t *@var{s}) 305Returns the length (number of units) of the first character in @var{s}. 306Returns 0 if it is the NUL character. Returns -1 upon failure. 307@end deftypefun 308 309@cindex iterating 310@deftypefun int u8_strmbtouc (ucs4_t *@var{puc}, const uint8_t *@var{s}) 311@deftypefunx int u16_strmbtouc (ucs4_t *@var{puc}, const uint16_t *@var{s}) 312@deftypefunx int u32_strmbtouc (ucs4_t *@var{puc}, const uint32_t *@var{s}) 313Returns the length (number of units) of the first character in @var{s}, 314putting its @code{ucs4_t} representation in @code{*@var{puc}}. Returns 0 315if it is the NUL character. Returns -1 upon failure. 316@end deftypefun 317 318@deftypefun {const uint8_t *} u8_next (ucs4_t *@var{puc}, const uint8_t *@var{s}) 319@deftypefunx {const uint16_t *} u16_next (ucs4_t *@var{puc}, const uint16_t *@var{s}) 320@deftypefunx {const uint32_t *} u32_next (ucs4_t *@var{puc}, const uint32_t *@var{s}) 321Forward iteration step. Advances the pointer past the next character, 322or returns NULL if the end of the string has been reached. Puts the 323character's @code{ucs4_t} representation in @code{*@var{puc}}. 324@end deftypefun 325 326The following function inspects and returns details about the previous 327character in a Unicode string. 328 329@deftypefun {const uint8_t *} u8_prev (ucs4_t *@var{puc}, const uint8_t *@var{s}, const uint8_t *@var{start}) 330@deftypefunx {const uint16_t *} u16_prev (ucs4_t *@var{puc}, const uint16_t *@var{s}, const uint16_t *@var{start}) 331@deftypefunx {const uint32_t *} u32_prev (ucs4_t *@var{puc}, const uint32_t *@var{s}, const uint32_t *@var{start}) 332Backward iteration step. Advances the pointer to point to the previous 333character (the one that ends at @code{@var{s}}), or returns NULL if the 334beginning of the string (specified by @code{@var{start}}) had been reached. 335Puts the character's @code{ucs4_t} representation in @code{*@var{puc}}. 336Note that this function works only on well-formed Unicode strings. 337@end deftypefun 338 339@node Length 340@subsection Length of a NUL terminated Unicode string 341 342The following functions determine the length of a Unicode string. 343 344@deftypefun size_t u8_strlen (const uint8_t *@var{s}) 345@deftypefunx size_t u16_strlen (const uint16_t *@var{s}) 346@deftypefunx size_t u32_strlen (const uint32_t *@var{s}) 347Returns the number of units in @var{s}. 348 349This function is similar to @posixfunc{strlen} and @posixfunc{wcslen}, except 350that it operates on Unicode strings. 351@end deftypefun 352 353@deftypefun size_t u8_strnlen (const uint8_t *@var{s}, size_t @var{maxlen}) 354@deftypefunx size_t u16_strnlen (const uint16_t *@var{s}, size_t @var{maxlen}) 355@deftypefunx size_t u32_strnlen (const uint32_t *@var{s}, size_t @var{maxlen}) 356Returns the number of units in @var{s}, but at most @var{maxlen}. 357 358This function is similar to @posixfunc{strnlen} and @posixfunc{wcsnlen}, except 359that it operates on Unicode strings. 360@end deftypefun 361 362@node Copying a NUL terminated Unicode string 363@subsection Copying a NUL terminated Unicode string 364 365@cindex copying 366The following functions copy portions of Unicode strings in memory. 367 368@deftypefun {uint8_t *} u8_strcpy (uint8_t *@var{dest}, const uint8_t *@var{src}) 369@deftypefunx {uint16_t *} u16_strcpy (uint16_t *@var{dest}, const uint16_t *@var{src}) 370@deftypefunx {uint32_t *} u32_strcpy (uint32_t *@var{dest}, const uint32_t *@var{src}) 371Copies @var{src} to @var{dest}. 372 373This function is similar to @posixfunc{strcpy} and @posixfunc{wcscpy}, except 374that it operates on Unicode strings. 375@end deftypefun 376 377@deftypefun {uint8_t *} u8_stpcpy (uint8_t *@var{dest}, const uint8_t *@var{src}) 378@deftypefunx {uint16_t *} u16_stpcpy (uint16_t *@var{dest}, const uint16_t *@var{src}) 379@deftypefunx {uint32_t *} u32_stpcpy (uint32_t *@var{dest}, const uint32_t *@var{src}) 380Copies @var{src} to @var{dest}, returning the address of the terminating NUL 381in @var{dest}. 382 383This function is similar to @posixfunc{stpcpy}, except that it operates on 384Unicode strings. 385@end deftypefun 386 387@deftypefun {uint8_t *} u8_strncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 388@deftypefunx {uint16_t *} u16_strncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 389@deftypefunx {uint32_t *} u32_strncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 390Copies no more than @var{n} units of @var{src} to @var{dest}. 391 392This function is similar to @posixfunc{strncpy} and @posixfunc{wcsncpy}, except 393that it operates on Unicode strings. 394@end deftypefun 395 396@deftypefun {uint8_t *} u8_stpncpy (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 397@deftypefunx {uint16_t *} u16_stpncpy (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 398@deftypefunx {uint32_t *} u32_stpncpy (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 399Copies no more than @var{n} units of @var{src} to @var{dest}. Returns a 400pointer past the last non-NUL unit written into @var{dest}. In other words, 401if the units written into @var{dest} include a NUL, the return value is the 402address of the first such NUL unit, otherwise it is 403@code{@var{dest} + @var{n}}. 404 405This function is similar to @posixfunc{stpncpy}, except that it operates on 406Unicode strings. 407@end deftypefun 408 409@deftypefun {uint8_t *} u8_strcat (uint8_t *@var{dest}, const uint8_t *@var{src}) 410@deftypefunx {uint16_t *} u16_strcat (uint16_t *@var{dest}, const uint16_t *@var{src}) 411@deftypefunx {uint32_t *} u32_strcat (uint32_t *@var{dest}, const uint32_t *@var{src}) 412Appends @var{src} onto @var{dest}. 413 414This function is similar to @posixfunc{strcat} and @posixfunc{wcscat}, except 415that it operates on Unicode strings. 416@end deftypefun 417 418@deftypefun {uint8_t *} u8_strncat (uint8_t *@var{dest}, const uint8_t *@var{src}, size_t @var{n}) 419@deftypefunx {uint16_t *} u16_strncat (uint16_t *@var{dest}, const uint16_t *@var{src}, size_t @var{n}) 420@deftypefunx {uint32_t *} u32_strncat (uint32_t *@var{dest}, const uint32_t *@var{src}, size_t @var{n}) 421Appends no more than @var{n} units of @var{src} onto @var{dest}. 422 423This function is similar to @posixfunc{strncat} and @posixfunc{wcsncat}, except 424that it operates on Unicode strings. 425@end deftypefun 426 427@node Comparing NUL terminated Unicode strings 428@subsection Comparing NUL terminated Unicode strings 429 430@cindex comparing 431The following functions compare two Unicode strings. 432 433@deftypefun int u8_strcmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}) 434@deftypefunx int u16_strcmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}) 435@deftypefunx int u32_strcmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}) 436Compares @var{s1} and @var{s2}, lexicographically. 437Returns a negative value if @var{s1} compares smaller than @var{s2}, 438a positive value if @var{s1} compares larger than @var{s2}, or 0 if 439they compare equal. 440 441This function is similar to @posixfunc{strcmp} and @posixfunc{wcscmp}, except 442that it operates on Unicode strings. 443@end deftypefun 444 445@cindex comparing, with collation rules 446@deftypefun int u8_strcoll (const uint8_t *@var{s1}, const uint8_t *@var{s2}) 447@deftypefunx int u16_strcoll (const uint16_t *@var{s1}, const uint16_t *@var{s2}) 448@deftypefunx int u32_strcoll (const uint32_t *@var{s1}, const uint32_t *@var{s2}) 449Compares @var{s1} and @var{s2} using the collation rules of the current 450locale. 451Returns -1 if @var{s1} < @var{s2}, 0 if @var{s1} = @var{s2}, 1 if 452@var{s1} > @var{s2}. Upon failure, sets @code{errno} and returns any value. 453 454This function is similar to @posixfunc{strcoll} and @posixfunc{wcscoll}, except 455that it operates on Unicode strings. 456 457Note that this function may consider different canonical normalizations 458of the same string as having a large distance. It is therefore better to 459use the function @code{u8_normcoll} instead of this one; see @ref{uninorm.h}. 460@end deftypefun 461 462@deftypefun int u8_strncmp (const uint8_t *@var{s1}, const uint8_t *@var{s2}, size_t @var{n}) 463@deftypefunx int u16_strncmp (const uint16_t *@var{s1}, const uint16_t *@var{s2}, size_t @var{n}) 464@deftypefunx int u32_strncmp (const uint32_t *@var{s1}, const uint32_t *@var{s2}, size_t @var{n}) 465Compares no more than @var{n} units of @var{s1} and @var{s2}. 466 467This function is similar to @posixfunc{strncmp} and @posixfunc{wcsncmp}, except 468that it operates on Unicode strings. 469@end deftypefun 470 471@node Duplicating a NUL terminated Unicode string 472@subsection Duplicating a NUL terminated Unicode string 473 474@cindex duplicating 475The following function allocates a duplicate of a Unicode string. 476 477@deftypefun {uint8_t *} u8_strdup (const uint8_t *@var{s}) 478@deftypefunx {uint16_t *} u16_strdup (const uint16_t *@var{s}) 479@deftypefunx {uint32_t *} u32_strdup (const uint32_t *@var{s}) 480Duplicates @var{s}, returning an identical malloc'd string. 481 482This function is similar to @posixfunc{strdup} and @posixfunc{wcsdup}, except 483that it operates on Unicode strings. 484@end deftypefun 485 486@node Searching for a character in a NUL terminated Unicode string 487@subsection Searching for a character in a NUL terminated Unicode string 488 489@cindex searching, for a character 490The following functions search for a given Unicode character. 491 492@deftypefun {uint8_t *} u8_strchr (const uint8_t *@var{str}, ucs4_t @var{uc}) 493@deftypefunx {uint16_t *} u16_strchr (const uint16_t *@var{str}, ucs4_t @var{uc}) 494@deftypefunx {uint32_t *} u32_strchr (const uint32_t *@var{str}, ucs4_t @var{uc}) 495Finds the first occurrence of @var{uc} in @var{str}. 496 497This function is similar to @posixfunc{strchr} and @posixfunc{wcschr}, except 498that it operates on Unicode strings. 499@end deftypefun 500 501@deftypefun {uint8_t *} u8_strrchr (const uint8_t *@var{str}, ucs4_t @var{uc}) 502@deftypefunx {uint16_t *} u16_strrchr (const uint16_t *@var{str}, ucs4_t @var{uc}) 503@deftypefunx {uint32_t *} u32_strrchr (const uint32_t *@var{str}, ucs4_t @var{uc}) 504Finds the last occurrence of @var{uc} in @var{str}. 505 506This function is similar to @posixfunc{strrchr} and @posixfunc{wcsrchr}, except 507that it operates on Unicode strings. 508@end deftypefun 509 510The following functions search for the first occurrence of some Unicode 511character in or outside a given set of Unicode characters. 512 513@deftypefun size_t u8_strcspn (const uint8_t *@var{str}, const uint8_t *@var{reject}) 514@deftypefunx size_t u16_strcspn (const uint16_t *@var{str}, const uint16_t *@var{reject}) 515@deftypefunx size_t u32_strcspn (const uint32_t *@var{str}, const uint32_t *@var{reject}) 516Returns the length of the initial segment of @var{str} which consists entirely 517of Unicode characters not in @var{reject}. 518 519This function is similar to @posixfunc{strcspn} and @posixfunc{wcscspn}, except 520that it operates on Unicode strings. 521@end deftypefun 522 523@deftypefun size_t u8_strspn (const uint8_t *@var{str}, const uint8_t *@var{accept}) 524@deftypefunx size_t u16_strspn (const uint16_t *@var{str}, const uint16_t *@var{accept}) 525@deftypefunx size_t u32_strspn (const uint32_t *@var{str}, const uint32_t *@var{accept}) 526Returns the length of the initial segment of @var{str} which consists entirely 527of Unicode characters in @var{accept}. 528 529This function is similar to @posixfunc{strspn} and @posixfunc{wcsspn}, except 530that it operates on Unicode strings. 531@end deftypefun 532 533@deftypefun {uint8_t *} u8_strpbrk (const uint8_t *@var{str}, const uint8_t *@var{accept}) 534@deftypefunx {uint16_t *} u16_strpbrk (const uint16_t *@var{str}, const uint16_t *@var{accept}) 535@deftypefunx {uint32_t *} u32_strpbrk (const uint32_t *@var{str}, const uint32_t *@var{accept}) 536Finds the first occurrence in @var{str} of any character in @var{accept}. 537 538This function is similar to @posixfunc{strpbrk} and @posixfunc{wcspbrk}, except 539that it operates on Unicode strings. 540@end deftypefun 541 542@node Searching for a substring 543@subsection Searching for a substring in a NUL terminated Unicode string 544 545@cindex searching, for a substring 546The following functions search whether a given Unicode string is a substring 547of another Unicode string. 548 549@deftypefun {uint8_t *} u8_strstr (const uint8_t *@var{haystack}, const uint8_t *@var{needle}) 550@deftypefunx {uint16_t *} u16_strstr (const uint16_t *@var{haystack}, const uint16_t *@var{needle}) 551@deftypefunx {uint32_t *} u32_strstr (const uint32_t *@var{haystack}, const uint32_t *@var{needle}) 552Finds the first occurrence of @var{needle} in @var{haystack}. 553 554This function is similar to @posixfunc{strstr} and @posixfunc{wcsstr}, except 555that it operates on Unicode strings. 556@end deftypefun 557 558@deftypefun bool u8_startswith (const uint8_t *@var{str}, const uint8_t *@var{prefix}) 559@deftypefunx bool u16_startswith (const uint16_t *@var{str}, const uint16_t *@var{prefix}) 560@deftypefunx bool u32_startswith (const uint32_t *@var{str}, const uint32_t *@var{prefix}) 561Tests whether @var{str} starts with @var{prefix}. 562@end deftypefun 563 564@deftypefun bool u8_endswith (const uint8_t *@var{str}, const uint8_t *@var{suffix}) 565@deftypefunx bool u16_endswith (const uint16_t *@var{str}, const uint16_t *@var{suffix}) 566@deftypefunx bool u32_endswith (const uint32_t *@var{str}, const uint32_t *@var{suffix}) 567Tests whether @var{str} ends with @var{suffix}. 568@end deftypefun 569 570@node Tokenizing 571@subsection Tokenizing a NUL terminated Unicode string 572 573The following function does one step in tokenizing a Unicode string. 574 575@deftypefun {uint8_t *} u8_strtok (uint8_t *@var{str}, const uint8_t *@var{delim}, uint8_t **@var{ptr}) 576@deftypefunx {uint16_t *} u16_strtok (uint16_t *@var{str}, const uint16_t *@var{delim}, uint16_t **@var{ptr}) 577@deftypefunx {uint32_t *} u32_strtok (uint32_t *@var{str}, const uint32_t *@var{delim}, uint32_t **@var{ptr}) 578Divides @var{str} into tokens separated by characters in @var{delim}. 579 580This function is similar to @posixfunc{strtok_r} and @posixfunc{wcstok}, except 581that it operates on Unicode strings. Its interface is actually more similar to 582@code{wcstok} than to @code{strtok}. 583@end deftypefun 584