1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /*
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
23 */
24
25 /*
26 * MT safe
27 */
28
29 #include "config.h"
30
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <locale.h>
35 #include <string.h>
36 #include <locale.h>
37 #include <errno.h>
38 #include <garray.h>
39 #include <ctype.h> /* For tolower() */
40
41 #ifdef HAVE_XLOCALE_H
42 /* Needed on BSD/OS X for e.g. strtod_l */
43 #include <xlocale.h>
44 #endif
45
46 #ifdef G_OS_WIN32
47 #include <windows.h>
48 #endif
49
50 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
51
52 #include "gstrfuncs.h"
53
54 #include "gprintf.h"
55 #include "gprintfint.h"
56 #include "glibintl.h"
57
58
59 /**
60 * SECTION:string_utils
61 * @title: String Utility Functions
62 * @short_description: various string-related functions
63 *
64 * This section describes a number of utility functions for creating,
65 * duplicating, and manipulating strings.
66 *
67 * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
68 * g_vprintf(), g_vfprintf(), g_vsprintf() and g_vasprintf()
69 * are declared in the header `gprintf.h` which is not included in `glib.h`
70 * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
71 * explicitly include `<glib/gprintf.h>` in order to use the GLib
72 * printf() functions.
73 *
74 * ## String precision pitfalls # {#string-precision}
75 *
76 * While you may use the printf() functions to format UTF-8 strings,
77 * notice that the precision of a \%Ns parameter is interpreted
78 * as the number of bytes, not characters to print. On top of that,
79 * the GNU libc implementation of the printf() functions has the
80 * "feature" that it checks that the string given for the \%Ns
81 * parameter consists of a whole number of characters in the current
82 * encoding. So, unless you are sure you are always going to be in an
83 * UTF-8 locale or your know your text is restricted to ASCII, avoid
84 * using \%Ns. If your intention is to format strings for a
85 * certain number of columns, then \%Ns is not a correct solution
86 * anyway, since it fails to take wide characters (see g_unichar_iswide())
87 * into account.
88 *
89 * Note also that there are various printf() parameters which are platform
90 * dependent. GLib provides platform independent macros for these parameters
91 * which should be used instead. A common example is %G_GUINT64_FORMAT, which
92 * should be used instead of `%llu` or similar parameters for formatting
93 * 64-bit integers. These macros are all named `G_*_FORMAT`; see
94 * [Basic Types][glib-Basic-Types].
95 */
96
97 /**
98 * g_ascii_isalnum:
99 * @c: any character
100 *
101 * Determines whether a character is alphanumeric.
102 *
103 * Unlike the standard C library isalnum() function, this only
104 * recognizes standard ASCII letters and ignores the locale,
105 * returning %FALSE for all non-ASCII characters. Also, unlike
106 * the standard library function, this takes a char, not an int,
107 * so don't call it on %EOF, but no need to cast to #guchar before
108 * passing a possibly non-ASCII character in.
109 *
110 * Returns: %TRUE if @c is an ASCII alphanumeric character
111 */
112
113 /**
114 * g_ascii_isalpha:
115 * @c: any character
116 *
117 * Determines whether a character is alphabetic (i.e. a letter).
118 *
119 * Unlike the standard C library isalpha() function, this only
120 * recognizes standard ASCII letters and ignores the locale,
121 * returning %FALSE for all non-ASCII characters. Also, unlike
122 * the standard library function, this takes a char, not an int,
123 * so don't call it on %EOF, but no need to cast to #guchar before
124 * passing a possibly non-ASCII character in.
125 *
126 * Returns: %TRUE if @c is an ASCII alphabetic character
127 */
128
129 /**
130 * g_ascii_iscntrl:
131 * @c: any character
132 *
133 * Determines whether a character is a control character.
134 *
135 * Unlike the standard C library iscntrl() function, this only
136 * recognizes standard ASCII control characters and ignores the
137 * locale, returning %FALSE for all non-ASCII characters. Also,
138 * unlike the standard library function, this takes a char, not
139 * an int, so don't call it on %EOF, but no need to cast to #guchar
140 * before passing a possibly non-ASCII character in.
141 *
142 * Returns: %TRUE if @c is an ASCII control character.
143 */
144
145 /**
146 * g_ascii_isdigit:
147 * @c: any character
148 *
149 * Determines whether a character is digit (0-9).
150 *
151 * Unlike the standard C library isdigit() function, this takes
152 * a char, not an int, so don't call it on %EOF, but no need to
153 * cast to #guchar before passing a possibly non-ASCII character in.
154 *
155 * Returns: %TRUE if @c is an ASCII digit.
156 */
157
158 /**
159 * g_ascii_isgraph:
160 * @c: any character
161 *
162 * Determines whether a character is a printing character and not a space.
163 *
164 * Unlike the standard C library isgraph() function, this only
165 * recognizes standard ASCII characters and ignores the locale,
166 * returning %FALSE for all non-ASCII characters. Also, unlike
167 * the standard library function, this takes a char, not an int,
168 * so don't call it on %EOF, but no need to cast to #guchar before
169 * passing a possibly non-ASCII character in.
170 *
171 * Returns: %TRUE if @c is an ASCII printing character other than space.
172 */
173
174 /**
175 * g_ascii_islower:
176 * @c: any character
177 *
178 * Determines whether a character is an ASCII lower case letter.
179 *
180 * Unlike the standard C library islower() function, this only
181 * recognizes standard ASCII letters and ignores the locale,
182 * returning %FALSE for all non-ASCII characters. Also, unlike
183 * the standard library function, this takes a char, not an int,
184 * so don't call it on %EOF, but no need to worry about casting
185 * to #guchar before passing a possibly non-ASCII character in.
186 *
187 * Returns: %TRUE if @c is an ASCII lower case letter
188 */
189
190 /**
191 * g_ascii_isprint:
192 * @c: any character
193 *
194 * Determines whether a character is a printing character.
195 *
196 * Unlike the standard C library isprint() function, this only
197 * recognizes standard ASCII characters and ignores the locale,
198 * returning %FALSE for all non-ASCII characters. Also, unlike
199 * the standard library function, this takes a char, not an int,
200 * so don't call it on %EOF, but no need to cast to #guchar before
201 * passing a possibly non-ASCII character in.
202 *
203 * Returns: %TRUE if @c is an ASCII printing character.
204 */
205
206 /**
207 * g_ascii_ispunct:
208 * @c: any character
209 *
210 * Determines whether a character is a punctuation character.
211 *
212 * Unlike the standard C library ispunct() function, this only
213 * recognizes standard ASCII letters and ignores the locale,
214 * returning %FALSE for all non-ASCII characters. Also, unlike
215 * the standard library function, this takes a char, not an int,
216 * so don't call it on %EOF, but no need to cast to #guchar before
217 * passing a possibly non-ASCII character in.
218 *
219 * Returns: %TRUE if @c is an ASCII punctuation character.
220 */
221
222 /**
223 * g_ascii_isspace:
224 * @c: any character
225 *
226 * Determines whether a character is a white-space character.
227 *
228 * Unlike the standard C library isspace() function, this only
229 * recognizes standard ASCII white-space and ignores the locale,
230 * returning %FALSE for all non-ASCII characters. Also, unlike
231 * the standard library function, this takes a char, not an int,
232 * so don't call it on %EOF, but no need to cast to #guchar before
233 * passing a possibly non-ASCII character in.
234 *
235 * Returns: %TRUE if @c is an ASCII white-space character
236 */
237
238 /**
239 * g_ascii_isupper:
240 * @c: any character
241 *
242 * Determines whether a character is an ASCII upper case letter.
243 *
244 * Unlike the standard C library isupper() function, this only
245 * recognizes standard ASCII letters and ignores the locale,
246 * returning %FALSE for all non-ASCII characters. Also, unlike
247 * the standard library function, this takes a char, not an int,
248 * so don't call it on %EOF, but no need to worry about casting
249 * to #guchar before passing a possibly non-ASCII character in.
250 *
251 * Returns: %TRUE if @c is an ASCII upper case letter
252 */
253
254 /**
255 * g_ascii_isxdigit:
256 * @c: any character
257 *
258 * Determines whether a character is a hexadecimal-digit character.
259 *
260 * Unlike the standard C library isxdigit() function, this takes
261 * a char, not an int, so don't call it on %EOF, but no need to
262 * cast to #guchar before passing a possibly non-ASCII character in.
263 *
264 * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
265 */
266
267 /**
268 * G_ASCII_DTOSTR_BUF_SIZE:
269 *
270 * A good size for a buffer to be passed into g_ascii_dtostr().
271 * It is guaranteed to be enough for all output of that function
272 * on systems with 64bit IEEE-compatible doubles.
273 *
274 * The typical usage would be something like:
275 * |[<!-- language="C" -->
276 * char buf[G_ASCII_DTOSTR_BUF_SIZE];
277 *
278 * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
279 * ]|
280 */
281
282 /**
283 * g_strstrip:
284 * @string: a string to remove the leading and trailing whitespace from
285 *
286 * Removes leading and trailing whitespace from a string.
287 * See g_strchomp() and g_strchug().
288 *
289 * Returns: @string
290 */
291
292 /**
293 * G_STR_DELIMITERS:
294 *
295 * The standard delimiters, used in g_strdelimit().
296 */
297
298 static const guint16 ascii_table_data[256] = {
299 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
300 0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
301 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
302 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
303 0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
304 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
305 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
306 0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
307 0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
308 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
309 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
310 0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
311 0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
312 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
313 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
314 0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
315 /* the upper 128 are all zeroes */
316 };
317
318 const guint16 * const g_ascii_table = ascii_table_data;
319
320 #if defined (HAVE_NEWLOCALE) && \
321 defined (HAVE_USELOCALE) && \
322 defined (HAVE_STRTOD_L) && \
323 defined (HAVE_STRTOULL_L) && \
324 defined (HAVE_STRTOLL_L)
325 #define USE_XLOCALE 1
326 #endif
327
328 #ifdef USE_XLOCALE
329 static locale_t
get_C_locale(void)330 get_C_locale (void)
331 {
332 static gsize initialized = FALSE;
333 static locale_t C_locale = NULL;
334
335 if (g_once_init_enter (&initialized))
336 {
337 C_locale = newlocale (LC_ALL_MASK, "C", NULL);
338 g_once_init_leave (&initialized, TRUE);
339 }
340
341 return C_locale;
342 }
343 #endif
344
345 /**
346 * g_strdup:
347 * @str: (nullable): the string to duplicate
348 *
349 * Duplicates a string. If @str is %NULL it returns %NULL.
350 * The returned string should be freed with g_free()
351 * when no longer needed.
352 *
353 * Returns: a newly-allocated copy of @str
354 */
355 gchar*
g_strdup(const gchar * str)356 g_strdup (const gchar *str)
357 {
358 gchar *new_str;
359 gsize length;
360
361 if (str)
362 {
363 length = strlen (str) + 1;
364 new_str = g_new (char, length);
365 memcpy (new_str, str, length);
366 }
367 else
368 new_str = NULL;
369
370 return new_str;
371 }
372
373 /**
374 * g_memdup:
375 * @mem: the memory to copy.
376 * @byte_size: the number of bytes to copy.
377 *
378 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
379 * from @mem. If @mem is %NULL it returns %NULL.
380 *
381 * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
382 * is %NULL.
383 * Deprecated: 2.68: Use g_memdup2() instead, as it accepts a #gsize argument
384 * for @byte_size, avoiding the possibility of overflow in a #gsize → #guint
385 * conversion
386 */
387 gpointer
g_memdup(gconstpointer mem,guint byte_size)388 g_memdup (gconstpointer mem,
389 guint byte_size)
390 {
391 gpointer new_mem;
392
393 if (mem && byte_size != 0)
394 {
395 new_mem = g_malloc (byte_size);
396 memcpy (new_mem, mem, byte_size);
397 }
398 else
399 new_mem = NULL;
400
401 return new_mem;
402 }
403
404 /**
405 * g_memdup2:
406 * @mem: (nullable): the memory to copy.
407 * @byte_size: the number of bytes to copy.
408 *
409 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
410 * from @mem. If @mem is %NULL it returns %NULL.
411 *
412 * This replaces g_memdup(), which was prone to integer overflows when
413 * converting the argument from a #gsize to a #guint.
414 *
415 * Returns: (nullable): a pointer to the newly-allocated copy of the memory,
416 * or %NULL if @mem is %NULL.
417 * Since: 2.68
418 */
419 gpointer
g_memdup2(gconstpointer mem,gsize byte_size)420 g_memdup2 (gconstpointer mem,
421 gsize byte_size)
422 {
423 gpointer new_mem;
424
425 if (mem && byte_size != 0)
426 {
427 new_mem = g_malloc (byte_size);
428 memcpy (new_mem, mem, byte_size);
429 }
430 else
431 new_mem = NULL;
432
433 return new_mem;
434 }
435
436 /**
437 * g_strndup:
438 * @str: the string to duplicate
439 * @n: the maximum number of bytes to copy from @str
440 *
441 * Duplicates the first @n bytes of a string, returning a newly-allocated
442 * buffer @n + 1 bytes long which will always be nul-terminated. If @str
443 * is less than @n bytes long the buffer is padded with nuls. If @str is
444 * %NULL it returns %NULL. The returned value should be freed when no longer
445 * needed.
446 *
447 * To copy a number of characters from a UTF-8 encoded string,
448 * use g_utf8_strncpy() instead.
449 *
450 * Returns: a newly-allocated buffer containing the first @n bytes
451 * of @str, nul-terminated
452 */
453 gchar*
g_strndup(const gchar * str,gsize n)454 g_strndup (const gchar *str,
455 gsize n)
456 {
457 gchar *new_str;
458
459 if (str)
460 {
461 new_str = g_new (gchar, n + 1);
462 strncpy (new_str, str, n);
463 new_str[n] = '\0';
464 }
465 else
466 new_str = NULL;
467
468 return new_str;
469 }
470
471 /**
472 * g_strnfill:
473 * @length: the length of the new string
474 * @fill_char: the byte to fill the string with
475 *
476 * Creates a new string @length bytes long filled with @fill_char.
477 * The returned string should be freed when no longer needed.
478 *
479 * Returns: a newly-allocated string filled the @fill_char
480 */
481 gchar*
g_strnfill(gsize length,gchar fill_char)482 g_strnfill (gsize length,
483 gchar fill_char)
484 {
485 gchar *str;
486
487 str = g_new (gchar, length + 1);
488 memset (str, (guchar)fill_char, length);
489 str[length] = '\0';
490
491 return str;
492 }
493
494 /**
495 * g_stpcpy:
496 * @dest: destination buffer.
497 * @src: source string.
498 *
499 * Copies a nul-terminated string into the dest buffer, include the
500 * trailing nul, and return a pointer to the trailing nul byte.
501 * This is useful for concatenating multiple strings together
502 * without having to repeatedly scan for the end.
503 *
504 * Returns: a pointer to trailing nul byte.
505 **/
506 gchar *
g_stpcpy(gchar * dest,const gchar * src)507 g_stpcpy (gchar *dest,
508 const gchar *src)
509 {
510 #ifdef HAVE_STPCPY
511 g_return_val_if_fail (dest != NULL, NULL);
512 g_return_val_if_fail (src != NULL, NULL);
513 return stpcpy (dest, src);
514 #else
515 gchar *d = dest;
516 const gchar *s = src;
517
518 g_return_val_if_fail (dest != NULL, NULL);
519 g_return_val_if_fail (src != NULL, NULL);
520 do
521 *d++ = *s;
522 while (*s++ != '\0');
523
524 return d - 1;
525 #endif
526 }
527
528 /**
529 * g_strdup_vprintf:
530 * @format: (not nullable): a standard printf() format string, but notice
531 * [string precision pitfalls][string-precision]
532 * @args: the list of parameters to insert into the format string
533 *
534 * Similar to the standard C vsprintf() function but safer, since it
535 * calculates the maximum space required and allocates memory to hold
536 * the result. The returned string should be freed with g_free() when
537 * no longer needed.
538 *
539 * The returned string is guaranteed to be non-NULL, unless @format
540 * contains `%lc` or `%ls` conversions, which can fail if no multibyte
541 * representation is available for the given character.
542 *
543 * See also g_vasprintf(), which offers the same functionality, but
544 * additionally returns the length of the allocated string.
545 *
546 * Returns: a newly-allocated string holding the result
547 */
548 gchar*
g_strdup_vprintf(const gchar * format,va_list args)549 g_strdup_vprintf (const gchar *format,
550 va_list args)
551 {
552 gchar *string = NULL;
553
554 g_vasprintf (&string, format, args);
555
556 return string;
557 }
558
559 /**
560 * g_strdup_printf:
561 * @format: (not nullable): a standard printf() format string, but notice
562 * [string precision pitfalls][string-precision]
563 * @...: the parameters to insert into the format string
564 *
565 * Similar to the standard C sprintf() function but safer, since it
566 * calculates the maximum space required and allocates memory to hold
567 * the result. The returned string should be freed with g_free() when no
568 * longer needed.
569 *
570 * The returned string is guaranteed to be non-NULL, unless @format
571 * contains `%lc` or `%ls` conversions, which can fail if no multibyte
572 * representation is available for the given character.
573 *
574 * Returns: a newly-allocated string holding the result
575 */
576 gchar*
g_strdup_printf(const gchar * format,...)577 g_strdup_printf (const gchar *format,
578 ...)
579 {
580 gchar *buffer;
581 va_list args;
582
583 va_start (args, format);
584 buffer = g_strdup_vprintf (format, args);
585 va_end (args);
586
587 return buffer;
588 }
589
590 /**
591 * g_strconcat:
592 * @string1: the first string to add, which must not be %NULL
593 * @...: a %NULL-terminated list of strings to append to the string
594 *
595 * Concatenates all of the given strings into one long string. The
596 * returned string should be freed with g_free() when no longer needed.
597 *
598 * The variable argument list must end with %NULL. If you forget the %NULL,
599 * g_strconcat() will start appending random memory junk to your string.
600 *
601 * Note that this function is usually not the right function to use to
602 * assemble a translated message from pieces, since proper translation
603 * often requires the pieces to be reordered.
604 *
605 * Returns: a newly-allocated string containing all the string arguments
606 */
607 gchar*
g_strconcat(const gchar * string1,...)608 g_strconcat (const gchar *string1, ...)
609 {
610 gsize l;
611 va_list args;
612 gchar *s;
613 gchar *concat;
614 gchar *ptr;
615
616 if (!string1)
617 return NULL;
618
619 l = 1 + strlen (string1);
620 va_start (args, string1);
621 s = va_arg (args, gchar*);
622 while (s)
623 {
624 l += strlen (s);
625 s = va_arg (args, gchar*);
626 }
627 va_end (args);
628
629 concat = g_new (gchar, l);
630 ptr = concat;
631
632 ptr = g_stpcpy (ptr, string1);
633 va_start (args, string1);
634 s = va_arg (args, gchar*);
635 while (s)
636 {
637 ptr = g_stpcpy (ptr, s);
638 s = va_arg (args, gchar*);
639 }
640 va_end (args);
641
642 return concat;
643 }
644
645 /**
646 * g_strtod:
647 * @nptr: the string to convert to a numeric value.
648 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
649 * character after the last character used in the conversion.
650 *
651 * Converts a string to a #gdouble value.
652 * It calls the standard strtod() function to handle the conversion, but
653 * if the string is not completely converted it attempts the conversion
654 * again with g_ascii_strtod(), and returns the best match.
655 *
656 * This function should seldom be used. The normal situation when reading
657 * numbers not for human consumption is to use g_ascii_strtod(). Only when
658 * you know that you must expect both locale formatted and C formatted numbers
659 * should you use this. Make sure that you don't pass strings such as comma
660 * separated lists of values, since the commas may be interpreted as a decimal
661 * point in some locales, causing unexpected results.
662 *
663 * Returns: the #gdouble value.
664 **/
665 gdouble
g_strtod(const gchar * nptr,gchar ** endptr)666 g_strtod (const gchar *nptr,
667 gchar **endptr)
668 {
669 gchar *fail_pos_1;
670 gchar *fail_pos_2;
671 gdouble val_1;
672 gdouble val_2 = 0;
673
674 g_return_val_if_fail (nptr != NULL, 0);
675
676 fail_pos_1 = NULL;
677 fail_pos_2 = NULL;
678
679 val_1 = strtod (nptr, &fail_pos_1);
680
681 if (fail_pos_1 && fail_pos_1[0] != 0)
682 val_2 = g_ascii_strtod (nptr, &fail_pos_2);
683
684 if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
685 {
686 if (endptr)
687 *endptr = fail_pos_1;
688 return val_1;
689 }
690 else
691 {
692 if (endptr)
693 *endptr = fail_pos_2;
694 return val_2;
695 }
696 }
697
698 /**
699 * g_ascii_strtod:
700 * @nptr: the string to convert to a numeric value.
701 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
702 * character after the last character used in the conversion.
703 *
704 * Converts a string to a #gdouble value.
705 *
706 * This function behaves like the standard strtod() function
707 * does in the C locale. It does this without actually changing
708 * the current locale, since that would not be thread-safe.
709 * A limitation of the implementation is that this function
710 * will still accept localized versions of infinities and NANs.
711 *
712 * This function is typically used when reading configuration
713 * files or other non-user input that should be locale independent.
714 * To handle input from the user you should normally use the
715 * locale-sensitive system strtod() function.
716 *
717 * To convert from a #gdouble to a string in a locale-insensitive
718 * way, use g_ascii_dtostr().
719 *
720 * If the correct value would cause overflow, plus or minus %HUGE_VAL
721 * is returned (according to the sign of the value), and %ERANGE is
722 * stored in %errno. If the correct value would cause underflow,
723 * zero is returned and %ERANGE is stored in %errno.
724 *
725 * This function resets %errno before calling strtod() so that
726 * you can reliably detect overflow and underflow.
727 *
728 * Returns: the #gdouble value.
729 */
730 gdouble
g_ascii_strtod(const gchar * nptr,gchar ** endptr)731 g_ascii_strtod (const gchar *nptr,
732 gchar **endptr)
733 {
734 #ifdef USE_XLOCALE
735
736 g_return_val_if_fail (nptr != NULL, 0);
737
738 errno = 0;
739
740 return strtod_l (nptr, endptr, get_C_locale ());
741
742 #else
743
744 gchar *fail_pos;
745 gdouble val;
746 #ifndef __BIONIC__
747 struct lconv *locale_data;
748 #endif
749 const char *decimal_point;
750 gsize decimal_point_len;
751 const char *p, *decimal_point_pos;
752 const char *end = NULL; /* Silence gcc */
753 int strtod_errno;
754
755 g_return_val_if_fail (nptr != NULL, 0);
756
757 fail_pos = NULL;
758
759 #ifndef __BIONIC__
760 locale_data = localeconv ();
761 decimal_point = locale_data->decimal_point;
762 decimal_point_len = strlen (decimal_point);
763 #else
764 decimal_point = ".";
765 decimal_point_len = 1;
766 #endif
767
768 g_assert (decimal_point_len != 0);
769
770 decimal_point_pos = NULL;
771 end = NULL;
772
773 if (decimal_point[0] != '.' ||
774 decimal_point[1] != 0)
775 {
776 p = nptr;
777 /* Skip leading space */
778 while (g_ascii_isspace (*p))
779 p++;
780
781 /* Skip leading optional sign */
782 if (*p == '+' || *p == '-')
783 p++;
784
785 if (p[0] == '0' &&
786 (p[1] == 'x' || p[1] == 'X'))
787 {
788 p += 2;
789 /* HEX - find the (optional) decimal point */
790
791 while (g_ascii_isxdigit (*p))
792 p++;
793
794 if (*p == '.')
795 decimal_point_pos = p++;
796
797 while (g_ascii_isxdigit (*p))
798 p++;
799
800 if (*p == 'p' || *p == 'P')
801 p++;
802 if (*p == '+' || *p == '-')
803 p++;
804 while (g_ascii_isdigit (*p))
805 p++;
806
807 end = p;
808 }
809 else if (g_ascii_isdigit (*p) || *p == '.')
810 {
811 while (g_ascii_isdigit (*p))
812 p++;
813
814 if (*p == '.')
815 decimal_point_pos = p++;
816
817 while (g_ascii_isdigit (*p))
818 p++;
819
820 if (*p == 'e' || *p == 'E')
821 p++;
822 if (*p == '+' || *p == '-')
823 p++;
824 while (g_ascii_isdigit (*p))
825 p++;
826
827 end = p;
828 }
829 /* For the other cases, we need not convert the decimal point */
830 }
831
832 if (decimal_point_pos)
833 {
834 char *copy, *c;
835
836 /* We need to convert the '.' to the locale specific decimal point */
837 copy = g_malloc (end - nptr + 1 + decimal_point_len);
838
839 c = copy;
840 memcpy (c, nptr, decimal_point_pos - nptr);
841 c += decimal_point_pos - nptr;
842 memcpy (c, decimal_point, decimal_point_len);
843 c += decimal_point_len;
844 memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
845 c += end - (decimal_point_pos + 1);
846 *c = 0;
847
848 errno = 0;
849 val = strtod (copy, &fail_pos);
850 strtod_errno = errno;
851
852 if (fail_pos)
853 {
854 if (fail_pos - copy > decimal_point_pos - nptr)
855 fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
856 else
857 fail_pos = (char *)nptr + (fail_pos - copy);
858 }
859
860 g_free (copy);
861
862 }
863 else if (end)
864 {
865 char *copy;
866
867 copy = g_malloc (end - (char *)nptr + 1);
868 memcpy (copy, nptr, end - nptr);
869 *(copy + (end - (char *)nptr)) = 0;
870
871 errno = 0;
872 val = strtod (copy, &fail_pos);
873 strtod_errno = errno;
874
875 if (fail_pos)
876 {
877 fail_pos = (char *)nptr + (fail_pos - copy);
878 }
879
880 g_free (copy);
881 }
882 else
883 {
884 errno = 0;
885 val = strtod (nptr, &fail_pos);
886 strtod_errno = errno;
887 }
888
889 if (endptr)
890 *endptr = fail_pos;
891
892 errno = strtod_errno;
893
894 return val;
895 #endif
896 }
897
898
899 /**
900 * g_ascii_dtostr:
901 * @buffer: A buffer to place the resulting string in
902 * @buf_len: The length of the buffer.
903 * @d: The #gdouble to convert
904 *
905 * Converts a #gdouble to a string, using the '.' as
906 * decimal point.
907 *
908 * This function generates enough precision that converting
909 * the string back using g_ascii_strtod() gives the same machine-number
910 * (on machines with IEEE compatible 64bit doubles). It is
911 * guaranteed that the size of the resulting string will never
912 * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
913 * nul character, which is always added.
914 *
915 * Returns: The pointer to the buffer with the converted string.
916 **/
917 gchar *
g_ascii_dtostr(gchar * buffer,gint buf_len,gdouble d)918 g_ascii_dtostr (gchar *buffer,
919 gint buf_len,
920 gdouble d)
921 {
922 return g_ascii_formatd (buffer, buf_len, "%.17g", d);
923 }
924
925 #pragma GCC diagnostic push
926 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
927
928 /**
929 * g_ascii_formatd:
930 * @buffer: A buffer to place the resulting string in
931 * @buf_len: The length of the buffer.
932 * @format: The printf()-style format to use for the
933 * code to use for converting.
934 * @d: The #gdouble to convert
935 *
936 * Converts a #gdouble to a string, using the '.' as
937 * decimal point. To format the number you pass in
938 * a printf()-style format string. Allowed conversion
939 * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
940 *
941 * The returned buffer is guaranteed to be nul-terminated.
942 *
943 * If you just want to want to serialize the value into a
944 * string, use g_ascii_dtostr().
945 *
946 * Returns: The pointer to the buffer with the converted string.
947 */
948 gchar *
g_ascii_formatd(gchar * buffer,gint buf_len,const gchar * format,gdouble d)949 g_ascii_formatd (gchar *buffer,
950 gint buf_len,
951 const gchar *format,
952 gdouble d)
953 {
954 #ifdef USE_XLOCALE
955 locale_t old_locale;
956
957 old_locale = uselocale (get_C_locale ());
958 _g_snprintf (buffer, buf_len, format, d);
959 uselocale (old_locale);
960
961 return buffer;
962 #else
963 #ifndef __BIONIC__
964 struct lconv *locale_data;
965 #endif
966 const char *decimal_point;
967 gsize decimal_point_len;
968 gchar *p;
969 int rest_len;
970 gchar format_char;
971
972 g_return_val_if_fail (buffer != NULL, NULL);
973 g_return_val_if_fail (format[0] == '%', NULL);
974 g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
975
976 format_char = format[strlen (format) - 1];
977
978 g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
979 format_char == 'f' || format_char == 'F' ||
980 format_char == 'g' || format_char == 'G',
981 NULL);
982
983 if (format[0] != '%')
984 return NULL;
985
986 if (strpbrk (format + 1, "'l%"))
987 return NULL;
988
989 if (!(format_char == 'e' || format_char == 'E' ||
990 format_char == 'f' || format_char == 'F' ||
991 format_char == 'g' || format_char == 'G'))
992 return NULL;
993
994 _g_snprintf (buffer, buf_len, format, d);
995
996 #ifndef __BIONIC__
997 locale_data = localeconv ();
998 decimal_point = locale_data->decimal_point;
999 decimal_point_len = strlen (decimal_point);
1000 #else
1001 decimal_point = ".";
1002 decimal_point_len = 1;
1003 #endif
1004
1005 g_assert (decimal_point_len != 0);
1006
1007 if (decimal_point[0] != '.' ||
1008 decimal_point[1] != 0)
1009 {
1010 p = buffer;
1011
1012 while (g_ascii_isspace (*p))
1013 p++;
1014
1015 if (*p == '+' || *p == '-')
1016 p++;
1017
1018 while (isdigit ((guchar)*p))
1019 p++;
1020
1021 if (strncmp (p, decimal_point, decimal_point_len) == 0)
1022 {
1023 *p = '.';
1024 p++;
1025 if (decimal_point_len > 1)
1026 {
1027 rest_len = strlen (p + (decimal_point_len - 1));
1028 memmove (p, p + (decimal_point_len - 1), rest_len);
1029 p[rest_len] = 0;
1030 }
1031 }
1032 }
1033
1034 return buffer;
1035 #endif
1036 }
1037 #pragma GCC diagnostic pop
1038
1039 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
1040 (c) == '\r' || (c) == '\t' || (c) == '\v')
1041 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
1042 #define ISLOWER(c) ((c) >= 'a' && (c) <= 'z')
1043 #define ISALPHA(c) (ISUPPER (c) || ISLOWER (c))
1044 #define TOUPPER(c) (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1045 #define TOLOWER(c) (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1046
1047 #ifndef USE_XLOCALE
1048
1049 static guint64
g_parse_long_long(const gchar * nptr,const gchar ** endptr,guint base,gboolean * negative)1050 g_parse_long_long (const gchar *nptr,
1051 const gchar **endptr,
1052 guint base,
1053 gboolean *negative)
1054 {
1055 /* this code is based on on the strtol(3) code from GNU libc released under
1056 * the GNU Lesser General Public License.
1057 *
1058 * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1059 * Free Software Foundation, Inc.
1060 */
1061 gboolean overflow;
1062 guint64 cutoff;
1063 guint64 cutlim;
1064 guint64 ui64;
1065 const gchar *s, *save;
1066 guchar c;
1067
1068 g_return_val_if_fail (nptr != NULL, 0);
1069
1070 *negative = FALSE;
1071 if (base == 1 || base > 36)
1072 {
1073 errno = EINVAL;
1074 if (endptr)
1075 *endptr = nptr;
1076 return 0;
1077 }
1078
1079 save = s = nptr;
1080
1081 /* Skip white space. */
1082 while (ISSPACE (*s))
1083 ++s;
1084
1085 if (G_UNLIKELY (!*s))
1086 goto noconv;
1087
1088 /* Check for a sign. */
1089 if (*s == '-')
1090 {
1091 *negative = TRUE;
1092 ++s;
1093 }
1094 else if (*s == '+')
1095 ++s;
1096
1097 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
1098 if (*s == '0')
1099 {
1100 if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1101 {
1102 s += 2;
1103 base = 16;
1104 }
1105 else if (base == 0)
1106 base = 8;
1107 }
1108 else if (base == 0)
1109 base = 10;
1110
1111 /* Save the pointer so we can check later if anything happened. */
1112 save = s;
1113 cutoff = G_MAXUINT64 / base;
1114 cutlim = G_MAXUINT64 % base;
1115
1116 overflow = FALSE;
1117 ui64 = 0;
1118 c = *s;
1119 for (; c; c = *++s)
1120 {
1121 if (c >= '0' && c <= '9')
1122 c -= '0';
1123 else if (ISALPHA (c))
1124 c = TOUPPER (c) - 'A' + 10;
1125 else
1126 break;
1127 if (c >= base)
1128 break;
1129 /* Check for overflow. */
1130 if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1131 overflow = TRUE;
1132 else
1133 {
1134 ui64 *= base;
1135 ui64 += c;
1136 }
1137 }
1138
1139 /* Check if anything actually happened. */
1140 if (s == save)
1141 goto noconv;
1142
1143 /* Store in ENDPTR the address of one character
1144 past the last character we converted. */
1145 if (endptr)
1146 *endptr = s;
1147
1148 if (G_UNLIKELY (overflow))
1149 {
1150 errno = ERANGE;
1151 return G_MAXUINT64;
1152 }
1153
1154 return ui64;
1155
1156 noconv:
1157 /* We must handle a special case here: the base is 0 or 16 and the
1158 first two characters are '0' and 'x', but the rest are no
1159 hexadecimal digits. This is no error case. We return 0 and
1160 ENDPTR points to the `x`. */
1161 if (endptr)
1162 {
1163 if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1164 && save[-2] == '0')
1165 *endptr = &save[-1];
1166 else
1167 /* There was no number to convert. */
1168 *endptr = nptr;
1169 }
1170 return 0;
1171 }
1172 #endif /* !USE_XLOCALE */
1173
1174 /**
1175 * g_ascii_strtoull:
1176 * @nptr: the string to convert to a numeric value.
1177 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1178 * character after the last character used in the conversion.
1179 * @base: to be used for the conversion, 2..36 or 0
1180 *
1181 * Converts a string to a #guint64 value.
1182 * This function behaves like the standard strtoull() function
1183 * does in the C locale. It does this without actually
1184 * changing the current locale, since that would not be
1185 * thread-safe.
1186 *
1187 * Note that input with a leading minus sign (`-`) is accepted, and will return
1188 * the negation of the parsed number, unless that would overflow a #guint64.
1189 * Critically, this means you cannot assume that a short fixed length input will
1190 * never result in a low return value, as the input could have a leading `-`.
1191 *
1192 * This function is typically used when reading configuration
1193 * files or other non-user input that should be locale independent.
1194 * To handle input from the user you should normally use the
1195 * locale-sensitive system strtoull() function.
1196 *
1197 * If the correct value would cause overflow, %G_MAXUINT64
1198 * is returned, and `ERANGE` is stored in `errno`.
1199 * If the base is outside the valid range, zero is returned, and
1200 * `EINVAL` is stored in `errno`.
1201 * If the string conversion fails, zero is returned, and @endptr returns
1202 * @nptr (if @endptr is non-%NULL).
1203 *
1204 * Returns: the #guint64 value or zero on error.
1205 *
1206 * Since: 2.2
1207 */
1208 guint64
g_ascii_strtoull(const gchar * nptr,gchar ** endptr,guint base)1209 g_ascii_strtoull (const gchar *nptr,
1210 gchar **endptr,
1211 guint base)
1212 {
1213 #ifdef USE_XLOCALE
1214 return strtoull_l (nptr, endptr, base, get_C_locale ());
1215 #else
1216 gboolean negative;
1217 guint64 result;
1218
1219 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1220
1221 /* Return the result of the appropriate sign. */
1222 return negative ? -result : result;
1223 #endif
1224 }
1225
1226 /**
1227 * g_ascii_strtoll:
1228 * @nptr: the string to convert to a numeric value.
1229 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1230 * character after the last character used in the conversion.
1231 * @base: to be used for the conversion, 2..36 or 0
1232 *
1233 * Converts a string to a #gint64 value.
1234 * This function behaves like the standard strtoll() function
1235 * does in the C locale. It does this without actually
1236 * changing the current locale, since that would not be
1237 * thread-safe.
1238 *
1239 * This function is typically used when reading configuration
1240 * files or other non-user input that should be locale independent.
1241 * To handle input from the user you should normally use the
1242 * locale-sensitive system strtoll() function.
1243 *
1244 * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1245 * is returned, and `ERANGE` is stored in `errno`.
1246 * If the base is outside the valid range, zero is returned, and
1247 * `EINVAL` is stored in `errno`. If the
1248 * string conversion fails, zero is returned, and @endptr returns @nptr
1249 * (if @endptr is non-%NULL).
1250 *
1251 * Returns: the #gint64 value or zero on error.
1252 *
1253 * Since: 2.12
1254 */
1255 gint64
g_ascii_strtoll(const gchar * nptr,gchar ** endptr,guint base)1256 g_ascii_strtoll (const gchar *nptr,
1257 gchar **endptr,
1258 guint base)
1259 {
1260 #ifdef USE_XLOCALE
1261 return strtoll_l (nptr, endptr, base, get_C_locale ());
1262 #else
1263 gboolean negative;
1264 guint64 result;
1265
1266 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1267
1268 if (negative && result > (guint64) G_MININT64)
1269 {
1270 errno = ERANGE;
1271 return G_MININT64;
1272 }
1273 else if (!negative && result > (guint64) G_MAXINT64)
1274 {
1275 errno = ERANGE;
1276 return G_MAXINT64;
1277 }
1278 else if (negative)
1279 return - (gint64) result;
1280 else
1281 return (gint64) result;
1282 #endif
1283 }
1284
1285 /**
1286 * g_strerror:
1287 * @errnum: the system error number. See the standard C %errno
1288 * documentation
1289 *
1290 * Returns a string corresponding to the given error code, e.g. "no
1291 * such process". Unlike strerror(), this always returns a string in
1292 * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1293 * the lifetime of the process.
1294 *
1295 * Note that the string may be translated according to the current locale.
1296 *
1297 * The value of %errno will not be changed by this function. However, it may
1298 * be changed by intermediate function calls, so you should save its value
1299 * as soon as the call returns:
1300 * |[
1301 * int saved_errno;
1302 *
1303 * ret = read (blah);
1304 * saved_errno = errno;
1305 *
1306 * g_strerror (saved_errno);
1307 * ]|
1308 *
1309 * Returns: a UTF-8 string describing the error code. If the error code
1310 * is unknown, it returns a string like "unknown error (<code>)".
1311 */
1312 const gchar *
g_strerror(gint errnum)1313 g_strerror (gint errnum)
1314 {
1315 static GHashTable *errors;
1316 G_LOCK_DEFINE_STATIC (errors);
1317 const gchar *msg;
1318 gint saved_errno = errno;
1319
1320 G_LOCK (errors);
1321 if (errors)
1322 msg = g_hash_table_lookup (errors, GINT_TO_POINTER (errnum));
1323 else
1324 {
1325 errors = g_hash_table_new (NULL, NULL);
1326 msg = NULL;
1327 }
1328
1329 if (!msg)
1330 {
1331 gchar buf[1024];
1332 GError *error = NULL;
1333
1334 #if defined(G_OS_WIN32)
1335 strerror_s (buf, sizeof (buf), errnum);
1336 msg = buf;
1337 #elif defined(HAVE_STRERROR_R)
1338 /* Match the condition in strerror_r(3) for glibc */
1339 # if defined(STRERROR_R_CHAR_P)
1340 msg = strerror_r (errnum, buf, sizeof (buf));
1341 # else
1342 (void) strerror_r (errnum, buf, sizeof (buf));
1343 msg = buf;
1344 # endif /* HAVE_STRERROR_R */
1345 #else
1346 g_strlcpy (buf, strerror (errnum), sizeof (buf));
1347 msg = buf;
1348 #endif
1349 if (!g_get_console_charset (NULL))
1350 {
1351 msg = g_locale_to_utf8 (msg, -1, NULL, NULL, &error);
1352 if (error)
1353 g_print ("%s\n", error->message);
1354 }
1355 else if (msg == (const gchar *)buf)
1356 msg = g_strdup (buf);
1357
1358 g_hash_table_insert (errors, GINT_TO_POINTER (errnum), (char *) msg);
1359 }
1360 G_UNLOCK (errors);
1361
1362 errno = saved_errno;
1363 return msg;
1364 }
1365
1366 /**
1367 * g_strsignal:
1368 * @signum: the signal number. See the `signal` documentation
1369 *
1370 * Returns a string describing the given signal, e.g. "Segmentation fault".
1371 * You should use this function in preference to strsignal(), because it
1372 * returns a string in UTF-8 encoding, and since not all platforms support
1373 * the strsignal() function.
1374 *
1375 * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1376 * it returns "unknown signal (<signum>)".
1377 */
1378 const gchar *
g_strsignal(gint signum)1379 g_strsignal (gint signum)
1380 {
1381 gchar *msg;
1382 gchar *tofree;
1383 const gchar *ret;
1384
1385 msg = tofree = NULL;
1386
1387 #ifdef HAVE_STRSIGNAL
1388 msg = strsignal (signum);
1389 if (!g_get_console_charset (NULL))
1390 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1391 #endif
1392
1393 if (!msg)
1394 msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1395 ret = g_intern_string (msg);
1396 g_free (tofree);
1397
1398 return ret;
1399 }
1400
1401 /* Functions g_strlcpy and g_strlcat were originally developed by
1402 * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1403 * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1404 * for more information.
1405 */
1406
1407 #ifdef HAVE_STRLCPY
1408 /* Use the native ones, if available; they might be implemented in assembly */
1409 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1410 g_strlcpy (gchar *dest,
1411 const gchar *src,
1412 gsize dest_size)
1413 {
1414 g_return_val_if_fail (dest != NULL, 0);
1415 g_return_val_if_fail (src != NULL, 0);
1416
1417 return strlcpy (dest, src, dest_size);
1418 }
1419
1420 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1421 g_strlcat (gchar *dest,
1422 const gchar *src,
1423 gsize dest_size)
1424 {
1425 g_return_val_if_fail (dest != NULL, 0);
1426 g_return_val_if_fail (src != NULL, 0);
1427
1428 return strlcat (dest, src, dest_size);
1429 }
1430
1431 #else /* ! HAVE_STRLCPY */
1432 /**
1433 * g_strlcpy:
1434 * @dest: destination buffer
1435 * @src: source buffer
1436 * @dest_size: length of @dest in bytes
1437 *
1438 * Portability wrapper that calls strlcpy() on systems which have it,
1439 * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1440 * guaranteed to be nul-terminated; @src must be nul-terminated;
1441 * @dest_size is the buffer size, not the number of bytes to copy.
1442 *
1443 * At most @dest_size - 1 characters will be copied. Always nul-terminates
1444 * (unless @dest_size is 0). This function does not allocate memory. Unlike
1445 * strncpy(), this function doesn't pad @dest (so it's often faster). It
1446 * returns the size of the attempted result, strlen (src), so if
1447 * @retval >= @dest_size, truncation occurred.
1448 *
1449 * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1450 * but if you really want to avoid screwups, g_strdup() is an even better
1451 * idea.
1452 *
1453 * Returns: length of @src
1454 */
1455 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1456 g_strlcpy (gchar *dest,
1457 const gchar *src,
1458 gsize dest_size)
1459 {
1460 gchar *d = dest;
1461 const gchar *s = src;
1462 gsize n = dest_size;
1463
1464 g_return_val_if_fail (dest != NULL, 0);
1465 g_return_val_if_fail (src != NULL, 0);
1466
1467 /* Copy as many bytes as will fit */
1468 if (n != 0 && --n != 0)
1469 do
1470 {
1471 gchar c = *s++;
1472
1473 *d++ = c;
1474 if (c == 0)
1475 break;
1476 }
1477 while (--n != 0);
1478
1479 /* If not enough room in dest, add NUL and traverse rest of src */
1480 if (n == 0)
1481 {
1482 if (dest_size != 0)
1483 *d = 0;
1484 while (*s++)
1485 ;
1486 }
1487
1488 return s - src - 1; /* count does not include NUL */
1489 }
1490
1491 /**
1492 * g_strlcat:
1493 * @dest: destination buffer, already containing one nul-terminated string
1494 * @src: source buffer
1495 * @dest_size: length of @dest buffer in bytes (not length of existing string
1496 * inside @dest)
1497 *
1498 * Portability wrapper that calls strlcat() on systems which have it,
1499 * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1500 * guaranteeing nul-termination for @dest. The total size of @dest won't
1501 * exceed @dest_size.
1502 *
1503 * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1504 * @dest_size is the full size of dest, not the space left over. This
1505 * function does not allocate memory. It always nul-terminates (unless
1506 * @dest_size == 0 or there were no nul characters in the @dest_size
1507 * characters of dest to start with).
1508 *
1509 * Caveat: this is supposedly a more secure alternative to strcat() or
1510 * strncat(), but for real security g_strconcat() is harder to mess up.
1511 *
1512 * Returns: size of attempted result, which is MIN (dest_size, strlen
1513 * (original dest)) + strlen (src), so if retval >= dest_size,
1514 * truncation occurred.
1515 */
1516 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1517 g_strlcat (gchar *dest,
1518 const gchar *src,
1519 gsize dest_size)
1520 {
1521 gchar *d = dest;
1522 const gchar *s = src;
1523 gsize bytes_left = dest_size;
1524 gsize dlength; /* Logically, MIN (strlen (d), dest_size) */
1525
1526 g_return_val_if_fail (dest != NULL, 0);
1527 g_return_val_if_fail (src != NULL, 0);
1528
1529 /* Find the end of dst and adjust bytes left but don't go past end */
1530 while (*d != 0 && bytes_left-- != 0)
1531 d++;
1532 dlength = d - dest;
1533 bytes_left = dest_size - dlength;
1534
1535 if (bytes_left == 0)
1536 return dlength + strlen (s);
1537
1538 while (*s != 0)
1539 {
1540 if (bytes_left != 1)
1541 {
1542 *d++ = *s;
1543 bytes_left--;
1544 }
1545 s++;
1546 }
1547 *d = 0;
1548
1549 return dlength + (s - src); /* count does not include NUL */
1550 }
1551 #endif /* ! HAVE_STRLCPY */
1552
1553 /**
1554 * g_ascii_strdown:
1555 * @str: a string
1556 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1557 *
1558 * Converts all upper case ASCII letters to lower case ASCII letters.
1559 *
1560 * Returns: a newly-allocated string, with all the upper case
1561 * characters in @str converted to lower case, with semantics that
1562 * exactly match g_ascii_tolower(). (Note that this is unlike the
1563 * old g_strdown(), which modified the string in place.)
1564 */
1565 gchar*
g_ascii_strdown(const gchar * str,gssize len)1566 g_ascii_strdown (const gchar *str,
1567 gssize len)
1568 {
1569 gchar *result, *s;
1570
1571 g_return_val_if_fail (str != NULL, NULL);
1572
1573 if (len < 0)
1574 len = (gssize) strlen (str);
1575
1576 result = g_strndup (str, (gsize) len);
1577 for (s = result; *s; s++)
1578 *s = g_ascii_tolower (*s);
1579
1580 return result;
1581 }
1582
1583 /**
1584 * g_ascii_strup:
1585 * @str: a string
1586 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1587 *
1588 * Converts all lower case ASCII letters to upper case ASCII letters.
1589 *
1590 * Returns: a newly allocated string, with all the lower case
1591 * characters in @str converted to upper case, with semantics that
1592 * exactly match g_ascii_toupper(). (Note that this is unlike the
1593 * old g_strup(), which modified the string in place.)
1594 */
1595 gchar*
g_ascii_strup(const gchar * str,gssize len)1596 g_ascii_strup (const gchar *str,
1597 gssize len)
1598 {
1599 gchar *result, *s;
1600
1601 g_return_val_if_fail (str != NULL, NULL);
1602
1603 if (len < 0)
1604 len = (gssize) strlen (str);
1605
1606 result = g_strndup (str, (gsize) len);
1607 for (s = result; *s; s++)
1608 *s = g_ascii_toupper (*s);
1609
1610 return result;
1611 }
1612
1613 /**
1614 * g_str_is_ascii:
1615 * @str: a string
1616 *
1617 * Determines if a string is pure ASCII. A string is pure ASCII if it
1618 * contains no bytes with the high bit set.
1619 *
1620 * Returns: %TRUE if @str is ASCII
1621 *
1622 * Since: 2.40
1623 */
1624 gboolean
g_str_is_ascii(const gchar * str)1625 g_str_is_ascii (const gchar *str)
1626 {
1627 gsize i;
1628
1629 for (i = 0; str[i]; i++)
1630 if (str[i] & 0x80)
1631 return FALSE;
1632
1633 return TRUE;
1634 }
1635
1636 /**
1637 * g_strdown:
1638 * @string: the string to convert.
1639 *
1640 * Converts a string to lower case.
1641 *
1642 * Returns: the string
1643 *
1644 * Deprecated:2.2: This function is totally broken for the reasons discussed
1645 * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1646 * instead.
1647 **/
1648 gchar*
g_strdown(gchar * string)1649 g_strdown (gchar *string)
1650 {
1651 guchar *s;
1652
1653 g_return_val_if_fail (string != NULL, NULL);
1654
1655 s = (guchar *) string;
1656
1657 while (*s)
1658 {
1659 if (isupper (*s))
1660 *s = tolower (*s);
1661 s++;
1662 }
1663
1664 return (gchar *) string;
1665 }
1666
1667 /**
1668 * g_strup:
1669 * @string: the string to convert
1670 *
1671 * Converts a string to upper case.
1672 *
1673 * Returns: the string
1674 *
1675 * Deprecated:2.2: This function is totally broken for the reasons
1676 * discussed in the g_strncasecmp() docs - use g_ascii_strup()
1677 * or g_utf8_strup() instead.
1678 */
1679 gchar*
g_strup(gchar * string)1680 g_strup (gchar *string)
1681 {
1682 guchar *s;
1683
1684 g_return_val_if_fail (string != NULL, NULL);
1685
1686 s = (guchar *) string;
1687
1688 while (*s)
1689 {
1690 if (islower (*s))
1691 *s = toupper (*s);
1692 s++;
1693 }
1694
1695 return (gchar *) string;
1696 }
1697
1698 /**
1699 * g_strreverse:
1700 * @string: the string to reverse
1701 *
1702 * Reverses all of the bytes in a string. For example,
1703 * `g_strreverse ("abcdef")` will result in "fedcba".
1704 *
1705 * Note that g_strreverse() doesn't work on UTF-8 strings
1706 * containing multibyte characters. For that purpose, use
1707 * g_utf8_strreverse().
1708 *
1709 * Returns: the same pointer passed in as @string
1710 */
1711 gchar*
g_strreverse(gchar * string)1712 g_strreverse (gchar *string)
1713 {
1714 g_return_val_if_fail (string != NULL, NULL);
1715
1716 if (*string)
1717 {
1718 gchar *h, *t;
1719
1720 h = string;
1721 t = string + strlen (string) - 1;
1722
1723 while (h < t)
1724 {
1725 gchar c;
1726
1727 c = *h;
1728 *h = *t;
1729 h++;
1730 *t = c;
1731 t--;
1732 }
1733 }
1734
1735 return string;
1736 }
1737
1738 /**
1739 * g_ascii_tolower:
1740 * @c: any character
1741 *
1742 * Convert a character to ASCII lower case.
1743 *
1744 * Unlike the standard C library tolower() function, this only
1745 * recognizes standard ASCII letters and ignores the locale, returning
1746 * all non-ASCII characters unchanged, even if they are lower case
1747 * letters in a particular character set. Also unlike the standard
1748 * library function, this takes and returns a char, not an int, so
1749 * don't call it on %EOF but no need to worry about casting to #guchar
1750 * before passing a possibly non-ASCII character in.
1751 *
1752 * Returns: the result of converting @c to lower case. If @c is
1753 * not an ASCII upper case letter, @c is returned unchanged.
1754 */
1755 gchar
g_ascii_tolower(gchar c)1756 g_ascii_tolower (gchar c)
1757 {
1758 return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1759 }
1760
1761 /**
1762 * g_ascii_toupper:
1763 * @c: any character
1764 *
1765 * Convert a character to ASCII upper case.
1766 *
1767 * Unlike the standard C library toupper() function, this only
1768 * recognizes standard ASCII letters and ignores the locale, returning
1769 * all non-ASCII characters unchanged, even if they are upper case
1770 * letters in a particular character set. Also unlike the standard
1771 * library function, this takes and returns a char, not an int, so
1772 * don't call it on %EOF but no need to worry about casting to #guchar
1773 * before passing a possibly non-ASCII character in.
1774 *
1775 * Returns: the result of converting @c to upper case. If @c is not
1776 * an ASCII lower case letter, @c is returned unchanged.
1777 */
1778 gchar
g_ascii_toupper(gchar c)1779 g_ascii_toupper (gchar c)
1780 {
1781 return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1782 }
1783
1784 /**
1785 * g_ascii_digit_value:
1786 * @c: an ASCII character
1787 *
1788 * Determines the numeric value of a character as a decimal digit.
1789 * Differs from g_unichar_digit_value() because it takes a char, so
1790 * there's no worry about sign extension if characters are signed.
1791 *
1792 * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1793 * its numeric value. Otherwise, -1.
1794 */
1795 int
g_ascii_digit_value(gchar c)1796 g_ascii_digit_value (gchar c)
1797 {
1798 if (g_ascii_isdigit (c))
1799 return c - '0';
1800 return -1;
1801 }
1802
1803 /**
1804 * g_ascii_xdigit_value:
1805 * @c: an ASCII character.
1806 *
1807 * Determines the numeric value of a character as a hexadecimal
1808 * digit. Differs from g_unichar_xdigit_value() because it takes
1809 * a char, so there's no worry about sign extension if characters
1810 * are signed.
1811 *
1812 * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1813 * its numeric value. Otherwise, -1.
1814 */
1815 int
g_ascii_xdigit_value(gchar c)1816 g_ascii_xdigit_value (gchar c)
1817 {
1818 if (c >= 'A' && c <= 'F')
1819 return c - 'A' + 10;
1820 if (c >= 'a' && c <= 'f')
1821 return c - 'a' + 10;
1822 return g_ascii_digit_value (c);
1823 }
1824
1825 /**
1826 * g_ascii_strcasecmp:
1827 * @s1: string to compare with @s2
1828 * @s2: string to compare with @s1
1829 *
1830 * Compare two strings, ignoring the case of ASCII characters.
1831 *
1832 * Unlike the BSD strcasecmp() function, this only recognizes standard
1833 * ASCII letters and ignores the locale, treating all non-ASCII
1834 * bytes as if they are not letters.
1835 *
1836 * This function should be used only on strings that are known to be
1837 * in encodings where the bytes corresponding to ASCII letters always
1838 * represent themselves. This includes UTF-8 and the ISO-8859-*
1839 * charsets, but not for instance double-byte encodings like the
1840 * Windows Codepage 932, where the trailing bytes of double-byte
1841 * characters include all ASCII letters. If you compare two CP932
1842 * strings using this function, you will get false matches.
1843 *
1844 * Both @s1 and @s2 must be non-%NULL.
1845 *
1846 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1847 * or a positive value if @s1 > @s2.
1848 */
1849 gint
g_ascii_strcasecmp(const gchar * s1,const gchar * s2)1850 g_ascii_strcasecmp (const gchar *s1,
1851 const gchar *s2)
1852 {
1853 gint c1, c2;
1854
1855 g_return_val_if_fail (s1 != NULL, 0);
1856 g_return_val_if_fail (s2 != NULL, 0);
1857
1858 while (*s1 && *s2)
1859 {
1860 c1 = (gint)(guchar) TOLOWER (*s1);
1861 c2 = (gint)(guchar) TOLOWER (*s2);
1862 if (c1 != c2)
1863 return (c1 - c2);
1864 s1++; s2++;
1865 }
1866
1867 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1868 }
1869
1870 /**
1871 * g_ascii_strncasecmp:
1872 * @s1: string to compare with @s2
1873 * @s2: string to compare with @s1
1874 * @n: number of characters to compare
1875 *
1876 * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1877 * characters after the first @n in each string.
1878 *
1879 * Unlike the BSD strcasecmp() function, this only recognizes standard
1880 * ASCII letters and ignores the locale, treating all non-ASCII
1881 * characters as if they are not letters.
1882 *
1883 * The same warning as in g_ascii_strcasecmp() applies: Use this
1884 * function only on strings known to be in encodings where bytes
1885 * corresponding to ASCII letters always represent themselves.
1886 *
1887 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1888 * or a positive value if @s1 > @s2.
1889 */
1890 gint
g_ascii_strncasecmp(const gchar * s1,const gchar * s2,gsize n)1891 g_ascii_strncasecmp (const gchar *s1,
1892 const gchar *s2,
1893 gsize n)
1894 {
1895 gint c1, c2;
1896
1897 g_return_val_if_fail (s1 != NULL, 0);
1898 g_return_val_if_fail (s2 != NULL, 0);
1899
1900 while (n && *s1 && *s2)
1901 {
1902 n -= 1;
1903 c1 = (gint)(guchar) TOLOWER (*s1);
1904 c2 = (gint)(guchar) TOLOWER (*s2);
1905 if (c1 != c2)
1906 return (c1 - c2);
1907 s1++; s2++;
1908 }
1909
1910 if (n)
1911 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1912 else
1913 return 0;
1914 }
1915
1916 /**
1917 * g_strcasecmp:
1918 * @s1: a string
1919 * @s2: a string to compare with @s1
1920 *
1921 * A case-insensitive string comparison, corresponding to the standard
1922 * strcasecmp() function on platforms which support it.
1923 *
1924 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1925 * or a positive value if @s1 > @s2.
1926 *
1927 * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1928 * function is deprecated and how to replace it.
1929 */
1930 gint
g_strcasecmp(const gchar * s1,const gchar * s2)1931 g_strcasecmp (const gchar *s1,
1932 const gchar *s2)
1933 {
1934 #ifdef HAVE_STRCASECMP
1935 g_return_val_if_fail (s1 != NULL, 0);
1936 g_return_val_if_fail (s2 != NULL, 0);
1937
1938 return strcasecmp (s1, s2);
1939 #else
1940 gint c1, c2;
1941
1942 g_return_val_if_fail (s1 != NULL, 0);
1943 g_return_val_if_fail (s2 != NULL, 0);
1944
1945 while (*s1 && *s2)
1946 {
1947 /* According to A. Cox, some platforms have islower's that
1948 * don't work right on non-uppercase
1949 */
1950 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1951 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1952 if (c1 != c2)
1953 return (c1 - c2);
1954 s1++; s2++;
1955 }
1956
1957 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1958 #endif
1959 }
1960
1961 /**
1962 * g_strncasecmp:
1963 * @s1: a string
1964 * @s2: a string to compare with @s1
1965 * @n: the maximum number of characters to compare
1966 *
1967 * A case-insensitive string comparison, corresponding to the standard
1968 * strncasecmp() function on platforms which support it. It is similar
1969 * to g_strcasecmp() except it only compares the first @n characters of
1970 * the strings.
1971 *
1972 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1973 * or a positive value if @s1 > @s2.
1974 *
1975 * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1976 * the comparison by calling toupper()/tolower(). These functions
1977 * are locale-specific and operate on single bytes. However, it is
1978 * impossible to handle things correctly from an internationalization
1979 * standpoint by operating on bytes, since characters may be multibyte.
1980 * Thus g_strncasecmp() is broken if your string is guaranteed to be
1981 * ASCII, since it is locale-sensitive, and it's broken if your string
1982 * is localized, since it doesn't work on many encodings at all,
1983 * including UTF-8, EUC-JP, etc.
1984 *
1985 * There are therefore two replacement techniques: g_ascii_strncasecmp(),
1986 * which only works on ASCII and is not locale-sensitive, and
1987 * g_utf8_casefold() followed by strcmp() on the resulting strings,
1988 * which is good for case-insensitive sorting of UTF-8.
1989 */
1990 gint
g_strncasecmp(const gchar * s1,const gchar * s2,guint n)1991 g_strncasecmp (const gchar *s1,
1992 const gchar *s2,
1993 guint n)
1994 {
1995 #ifdef HAVE_STRNCASECMP
1996 return strncasecmp (s1, s2, n);
1997 #else
1998 gint c1, c2;
1999
2000 g_return_val_if_fail (s1 != NULL, 0);
2001 g_return_val_if_fail (s2 != NULL, 0);
2002
2003 while (n && *s1 && *s2)
2004 {
2005 n -= 1;
2006 /* According to A. Cox, some platforms have islower's that
2007 * don't work right on non-uppercase
2008 */
2009 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
2010 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
2011 if (c1 != c2)
2012 return (c1 - c2);
2013 s1++; s2++;
2014 }
2015
2016 if (n)
2017 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
2018 else
2019 return 0;
2020 #endif
2021 }
2022
2023 /**
2024 * g_strdelimit:
2025 * @string: the string to convert
2026 * @delimiters: (nullable): a string containing the current delimiters,
2027 * or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
2028 * @new_delimiter: the new delimiter character
2029 *
2030 * Converts any delimiter characters in @string to @new_delimiter.
2031 *
2032 * Any characters in @string which are found in @delimiters are
2033 * changed to the @new_delimiter character. Modifies @string in place,
2034 * and returns @string itself, not a copy.
2035 *
2036 * The return value is to allow nesting such as:
2037 *
2038 * |[<!-- language="C" -->
2039 * g_ascii_strup (g_strdelimit (str, "abc", '?'))
2040 * ]|
2041 *
2042 * In order to modify a copy, you may use g_strdup():
2043 *
2044 * |[<!-- language="C" -->
2045 * reformatted = g_strdelimit (g_strdup (const_str), "abc", '?');
2046 * ...
2047 * g_free (reformatted);
2048 * ]|
2049 *
2050 * Returns: the modified @string
2051 */
2052 gchar *
g_strdelimit(gchar * string,const gchar * delimiters,gchar new_delim)2053 g_strdelimit (gchar *string,
2054 const gchar *delimiters,
2055 gchar new_delim)
2056 {
2057 gchar *c;
2058
2059 g_return_val_if_fail (string != NULL, NULL);
2060
2061 if (!delimiters)
2062 delimiters = G_STR_DELIMITERS;
2063
2064 for (c = string; *c; c++)
2065 {
2066 if (strchr (delimiters, *c))
2067 *c = new_delim;
2068 }
2069
2070 return string;
2071 }
2072
2073 /**
2074 * g_strcanon:
2075 * @string: a nul-terminated array of bytes
2076 * @valid_chars: bytes permitted in @string
2077 * @substitutor: replacement character for disallowed bytes
2078 *
2079 * For each character in @string, if the character is not in @valid_chars,
2080 * replaces the character with @substitutor.
2081 *
2082 * Modifies @string in place, and return @string itself, not a copy. The
2083 * return value is to allow nesting such as:
2084 *
2085 * |[<!-- language="C" -->
2086 * g_ascii_strup (g_strcanon (str, "abc", '?'))
2087 * ]|
2088 *
2089 * In order to modify a copy, you may use g_strdup():
2090 *
2091 * |[<!-- language="C" -->
2092 * reformatted = g_strcanon (g_strdup (const_str), "abc", '?');
2093 * ...
2094 * g_free (reformatted);
2095 * ]|
2096 *
2097 * Returns: the modified @string
2098 */
2099 gchar *
g_strcanon(gchar * string,const gchar * valid_chars,gchar substitutor)2100 g_strcanon (gchar *string,
2101 const gchar *valid_chars,
2102 gchar substitutor)
2103 {
2104 gchar *c;
2105
2106 g_return_val_if_fail (string != NULL, NULL);
2107 g_return_val_if_fail (valid_chars != NULL, NULL);
2108
2109 for (c = string; *c; c++)
2110 {
2111 if (!strchr (valid_chars, *c))
2112 *c = substitutor;
2113 }
2114
2115 return string;
2116 }
2117
2118 /**
2119 * g_strcompress:
2120 * @source: a string to compress
2121 *
2122 * Replaces all escaped characters with their one byte equivalent.
2123 *
2124 * This function does the reverse conversion of g_strescape().
2125 *
2126 * Returns: a newly-allocated copy of @source with all escaped
2127 * character compressed
2128 */
2129 gchar *
g_strcompress(const gchar * source)2130 g_strcompress (const gchar *source)
2131 {
2132 const gchar *p = source, *octal;
2133 gchar *dest;
2134 gchar *q;
2135
2136 g_return_val_if_fail (source != NULL, NULL);
2137
2138 dest = g_malloc (strlen (source) + 1);
2139 q = dest;
2140
2141 while (*p)
2142 {
2143 if (*p == '\\')
2144 {
2145 p++;
2146 switch (*p)
2147 {
2148 case '\0':
2149 g_warning ("g_strcompress: trailing \\");
2150 goto out;
2151 case '0': case '1': case '2': case '3': case '4':
2152 case '5': case '6': case '7':
2153 *q = 0;
2154 octal = p;
2155 while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2156 {
2157 *q = (*q * 8) + (*p - '0');
2158 p++;
2159 }
2160 q++;
2161 p--;
2162 break;
2163 case 'b':
2164 *q++ = '\b';
2165 break;
2166 case 'f':
2167 *q++ = '\f';
2168 break;
2169 case 'n':
2170 *q++ = '\n';
2171 break;
2172 case 'r':
2173 *q++ = '\r';
2174 break;
2175 case 't':
2176 *q++ = '\t';
2177 break;
2178 case 'v':
2179 *q++ = '\v';
2180 break;
2181 default: /* Also handles \" and \\ */
2182 *q++ = *p;
2183 break;
2184 }
2185 }
2186 else
2187 *q++ = *p;
2188 p++;
2189 }
2190 out:
2191 *q = 0;
2192
2193 return dest;
2194 }
2195
2196 /**
2197 * g_strescape:
2198 * @source: a string to escape
2199 * @exceptions: (nullable): a string of characters not to escape in @source
2200 *
2201 * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2202 * and '"' in the string @source by inserting a '\' before
2203 * them. Additionally all characters in the range 0x01-0x1F (everything
2204 * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2205 * replaced with a '\' followed by their octal representation.
2206 * Characters supplied in @exceptions are not escaped.
2207 *
2208 * g_strcompress() does the reverse conversion.
2209 *
2210 * Returns: a newly-allocated copy of @source with certain
2211 * characters escaped. See above.
2212 */
2213 gchar *
g_strescape(const gchar * source,const gchar * exceptions)2214 g_strescape (const gchar *source,
2215 const gchar *exceptions)
2216 {
2217 const guchar *p;
2218 gchar *dest;
2219 gchar *q;
2220 guchar excmap[256];
2221
2222 g_return_val_if_fail (source != NULL, NULL);
2223
2224 p = (guchar *) source;
2225 /* Each source byte needs maximally four destination chars (\777) */
2226 q = dest = g_malloc (strlen (source) * 4 + 1);
2227
2228 memset (excmap, 0, 256);
2229 if (exceptions)
2230 {
2231 guchar *e = (guchar *) exceptions;
2232
2233 while (*e)
2234 {
2235 excmap[*e] = 1;
2236 e++;
2237 }
2238 }
2239
2240 while (*p)
2241 {
2242 if (excmap[*p])
2243 *q++ = *p;
2244 else
2245 {
2246 switch (*p)
2247 {
2248 case '\b':
2249 *q++ = '\\';
2250 *q++ = 'b';
2251 break;
2252 case '\f':
2253 *q++ = '\\';
2254 *q++ = 'f';
2255 break;
2256 case '\n':
2257 *q++ = '\\';
2258 *q++ = 'n';
2259 break;
2260 case '\r':
2261 *q++ = '\\';
2262 *q++ = 'r';
2263 break;
2264 case '\t':
2265 *q++ = '\\';
2266 *q++ = 't';
2267 break;
2268 case '\v':
2269 *q++ = '\\';
2270 *q++ = 'v';
2271 break;
2272 case '\\':
2273 *q++ = '\\';
2274 *q++ = '\\';
2275 break;
2276 case '"':
2277 *q++ = '\\';
2278 *q++ = '"';
2279 break;
2280 default:
2281 if ((*p < ' ') || (*p >= 0177))
2282 {
2283 *q++ = '\\';
2284 *q++ = '0' + (((*p) >> 6) & 07);
2285 *q++ = '0' + (((*p) >> 3) & 07);
2286 *q++ = '0' + ((*p) & 07);
2287 }
2288 else
2289 *q++ = *p;
2290 break;
2291 }
2292 }
2293 p++;
2294 }
2295 *q = 0;
2296 return dest;
2297 }
2298
2299 /**
2300 * g_strchug:
2301 * @string: a string to remove the leading whitespace from
2302 *
2303 * Removes leading whitespace from a string, by moving the rest
2304 * of the characters forward.
2305 *
2306 * This function doesn't allocate or reallocate any memory;
2307 * it modifies @string in place. Therefore, it cannot be used on
2308 * statically allocated strings.
2309 *
2310 * The pointer to @string is returned to allow the nesting of functions.
2311 *
2312 * Also see g_strchomp() and g_strstrip().
2313 *
2314 * Returns: @string
2315 */
2316 gchar *
g_strchug(gchar * string)2317 g_strchug (gchar *string)
2318 {
2319 guchar *start;
2320
2321 g_return_val_if_fail (string != NULL, NULL);
2322
2323 for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2324 ;
2325
2326 memmove (string, start, strlen ((gchar *) start) + 1);
2327
2328 return string;
2329 }
2330
2331 /**
2332 * g_strchomp:
2333 * @string: a string to remove the trailing whitespace from
2334 *
2335 * Removes trailing whitespace from a string.
2336 *
2337 * This function doesn't allocate or reallocate any memory;
2338 * it modifies @string in place. Therefore, it cannot be used
2339 * on statically allocated strings.
2340 *
2341 * The pointer to @string is returned to allow the nesting of functions.
2342 *
2343 * Also see g_strchug() and g_strstrip().
2344 *
2345 * Returns: @string
2346 */
2347 gchar *
g_strchomp(gchar * string)2348 g_strchomp (gchar *string)
2349 {
2350 gsize len;
2351
2352 g_return_val_if_fail (string != NULL, NULL);
2353
2354 len = strlen (string);
2355 while (len--)
2356 {
2357 if (g_ascii_isspace ((guchar) string[len]))
2358 string[len] = '\0';
2359 else
2360 break;
2361 }
2362
2363 return string;
2364 }
2365
2366 /**
2367 * g_strsplit:
2368 * @string: a string to split
2369 * @delimiter: a string which specifies the places at which to split
2370 * the string. The delimiter is not included in any of the resulting
2371 * strings, unless @max_tokens is reached.
2372 * @max_tokens: the maximum number of pieces to split @string into.
2373 * If this is less than 1, the string is split completely.
2374 *
2375 * Splits a string into a maximum of @max_tokens pieces, using the given
2376 * @delimiter. If @max_tokens is reached, the remainder of @string is
2377 * appended to the last token.
2378 *
2379 * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2380 * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2381 * and "".
2382 *
2383 * As a special case, the result of splitting the empty string "" is an empty
2384 * vector, not a vector containing a single string. The reason for this
2385 * special case is that being able to represent an empty vector is typically
2386 * more useful than consistent handling of empty elements. If you do need
2387 * to represent empty elements, you'll need to check for the empty string
2388 * before calling g_strsplit().
2389 *
2390 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2391 * g_strfreev() to free it.
2392 */
2393 gchar**
g_strsplit(const gchar * string,const gchar * delimiter,gint max_tokens)2394 g_strsplit (const gchar *string,
2395 const gchar *delimiter,
2396 gint max_tokens)
2397 {
2398 char *s;
2399 const gchar *remainder;
2400 GPtrArray *string_list;
2401
2402 g_return_val_if_fail (string != NULL, NULL);
2403 g_return_val_if_fail (delimiter != NULL, NULL);
2404 g_return_val_if_fail (delimiter[0] != '\0', NULL);
2405
2406 if (max_tokens < 1)
2407 max_tokens = G_MAXINT;
2408
2409 string_list = g_ptr_array_new ();
2410 remainder = string;
2411 s = strstr (remainder, delimiter);
2412 if (s)
2413 {
2414 gsize delimiter_len = strlen (delimiter);
2415
2416 while (--max_tokens && s)
2417 {
2418 gsize len;
2419
2420 len = s - remainder;
2421 g_ptr_array_add (string_list, g_strndup (remainder, len));
2422 remainder = s + delimiter_len;
2423 s = strstr (remainder, delimiter);
2424 }
2425 }
2426 if (*string)
2427 g_ptr_array_add (string_list, g_strdup (remainder));
2428
2429 g_ptr_array_add (string_list, NULL);
2430
2431 return (char **) g_ptr_array_free (string_list, FALSE);
2432 }
2433
2434 /**
2435 * g_strsplit_set:
2436 * @string: The string to be tokenized
2437 * @delimiters: A nul-terminated string containing bytes that are used
2438 * to split the string (it can accept an empty string, which will result
2439 * in no string splitting).
2440 * @max_tokens: The maximum number of tokens to split @string into.
2441 * If this is less than 1, the string is split completely
2442 *
2443 * Splits @string into a number of tokens not containing any of the characters
2444 * in @delimiter. A token is the (possibly empty) longest string that does not
2445 * contain any of the characters in @delimiters. If @max_tokens is reached, the
2446 * remainder is appended to the last token.
2447 *
2448 * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2449 * %NULL-terminated vector containing the three strings "abc", "def",
2450 * and "ghi".
2451 *
2452 * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2453 * vector containing the four strings "", "def", "ghi", and "".
2454 *
2455 * As a special case, the result of splitting the empty string "" is an empty
2456 * vector, not a vector containing a single string. The reason for this
2457 * special case is that being able to represent an empty vector is typically
2458 * more useful than consistent handling of empty elements. If you do need
2459 * to represent empty elements, you'll need to check for the empty string
2460 * before calling g_strsplit_set().
2461 *
2462 * Note that this function works on bytes not characters, so it can't be used
2463 * to delimit UTF-8 strings for anything but ASCII characters.
2464 *
2465 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2466 * g_strfreev() to free it.
2467 *
2468 * Since: 2.4
2469 **/
2470 gchar **
g_strsplit_set(const gchar * string,const gchar * delimiters,gint max_tokens)2471 g_strsplit_set (const gchar *string,
2472 const gchar *delimiters,
2473 gint max_tokens)
2474 {
2475 guint8 delim_table[256]; /* 1 = index is a separator; 0 otherwise */
2476 GSList *tokens, *list;
2477 gint n_tokens;
2478 const gchar *s;
2479 const gchar *current;
2480 gchar *token;
2481 gchar **result;
2482
2483 g_return_val_if_fail (string != NULL, NULL);
2484 g_return_val_if_fail (delimiters != NULL, NULL);
2485
2486 if (max_tokens < 1)
2487 max_tokens = G_MAXINT;
2488
2489 if (*string == '\0')
2490 {
2491 result = g_new (char *, 1);
2492 result[0] = NULL;
2493 return result;
2494 }
2495
2496 /* Check if each character in @string is a separator, by indexing by the
2497 * character value into the @delim_table, which has value 1 stored at an index
2498 * if that index is a separator. */
2499 memset (delim_table, FALSE, sizeof (delim_table));
2500 for (s = delimiters; *s != '\0'; ++s)
2501 delim_table[*(guchar *)s] = TRUE;
2502
2503 tokens = NULL;
2504 n_tokens = 0;
2505
2506 s = current = string;
2507 while (*s != '\0')
2508 {
2509 if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2510 {
2511 token = g_strndup (current, s - current);
2512 tokens = g_slist_prepend (tokens, token);
2513 ++n_tokens;
2514
2515 current = s + 1;
2516 }
2517
2518 ++s;
2519 }
2520
2521 token = g_strndup (current, s - current);
2522 tokens = g_slist_prepend (tokens, token);
2523 ++n_tokens;
2524
2525 result = g_new (gchar *, n_tokens + 1);
2526
2527 result[n_tokens] = NULL;
2528 for (list = tokens; list != NULL; list = list->next)
2529 result[--n_tokens] = list->data;
2530
2531 g_slist_free (tokens);
2532
2533 return result;
2534 }
2535
2536 /**
2537 * GStrv:
2538 *
2539 * A typedef alias for gchar**. This is mostly useful when used together with
2540 * g_auto().
2541 */
2542
2543 /**
2544 * g_strfreev:
2545 * @str_array: (nullable): a %NULL-terminated array of strings to free
2546 *
2547 * Frees a %NULL-terminated array of strings, as well as each
2548 * string it contains.
2549 *
2550 * If @str_array is %NULL, this function simply returns.
2551 */
2552 void
g_strfreev(gchar ** str_array)2553 g_strfreev (gchar **str_array)
2554 {
2555 if (str_array)
2556 {
2557 gsize i;
2558
2559 for (i = 0; str_array[i] != NULL; i++)
2560 g_free (str_array[i]);
2561
2562 g_free (str_array);
2563 }
2564 }
2565
2566 /**
2567 * g_strdupv:
2568 * @str_array: (nullable): a %NULL-terminated array of strings
2569 *
2570 * Copies %NULL-terminated array of strings. The copy is a deep copy;
2571 * the new array should be freed by first freeing each string, then
2572 * the array itself. g_strfreev() does this for you. If called
2573 * on a %NULL value, g_strdupv() simply returns %NULL.
2574 *
2575 * Returns: (nullable): a new %NULL-terminated array of strings.
2576 */
2577 gchar**
g_strdupv(gchar ** str_array)2578 g_strdupv (gchar **str_array)
2579 {
2580 if (str_array)
2581 {
2582 gsize i;
2583 gchar **retval;
2584
2585 i = 0;
2586 while (str_array[i])
2587 ++i;
2588
2589 retval = g_new (gchar*, i + 1);
2590
2591 i = 0;
2592 while (str_array[i])
2593 {
2594 retval[i] = g_strdup (str_array[i]);
2595 ++i;
2596 }
2597 retval[i] = NULL;
2598
2599 return retval;
2600 }
2601 else
2602 return NULL;
2603 }
2604
2605 /**
2606 * g_strjoinv:
2607 * @separator: (nullable): a string to insert between each of the
2608 * strings, or %NULL
2609 * @str_array: a %NULL-terminated array of strings to join
2610 *
2611 * Joins a number of strings together to form one long string, with the
2612 * optional @separator inserted between each of them. The returned string
2613 * should be freed with g_free().
2614 *
2615 * If @str_array has no items, the return value will be an
2616 * empty string. If @str_array contains a single item, @separator will not
2617 * appear in the resulting string.
2618 *
2619 * Returns: a newly-allocated string containing all of the strings joined
2620 * together, with @separator between them
2621 */
2622 gchar*
g_strjoinv(const gchar * separator,gchar ** str_array)2623 g_strjoinv (const gchar *separator,
2624 gchar **str_array)
2625 {
2626 gchar *string;
2627 gchar *ptr;
2628
2629 g_return_val_if_fail (str_array != NULL, NULL);
2630
2631 if (separator == NULL)
2632 separator = "";
2633
2634 if (*str_array)
2635 {
2636 gsize i;
2637 gsize len;
2638 gsize separator_len;
2639
2640 separator_len = strlen (separator);
2641 /* First part, getting length */
2642 len = 1 + strlen (str_array[0]);
2643 for (i = 1; str_array[i] != NULL; i++)
2644 len += strlen (str_array[i]);
2645 len += separator_len * (i - 1);
2646
2647 /* Second part, building string */
2648 string = g_new (gchar, len);
2649 ptr = g_stpcpy (string, *str_array);
2650 for (i = 1; str_array[i] != NULL; i++)
2651 {
2652 ptr = g_stpcpy (ptr, separator);
2653 ptr = g_stpcpy (ptr, str_array[i]);
2654 }
2655 }
2656 else
2657 string = g_strdup ("");
2658
2659 return string;
2660 }
2661
2662 /**
2663 * g_strjoin:
2664 * @separator: (nullable): a string to insert between each of the
2665 * strings, or %NULL
2666 * @...: a %NULL-terminated list of strings to join
2667 *
2668 * Joins a number of strings together to form one long string, with the
2669 * optional @separator inserted between each of them. The returned string
2670 * should be freed with g_free().
2671 *
2672 * Returns: a newly-allocated string containing all of the strings joined
2673 * together, with @separator between them
2674 */
2675 gchar*
g_strjoin(const gchar * separator,...)2676 g_strjoin (const gchar *separator,
2677 ...)
2678 {
2679 gchar *string, *s;
2680 va_list args;
2681 gsize len;
2682 gsize separator_len;
2683 gchar *ptr;
2684
2685 if (separator == NULL)
2686 separator = "";
2687
2688 separator_len = strlen (separator);
2689
2690 va_start (args, separator);
2691
2692 s = va_arg (args, gchar*);
2693
2694 if (s)
2695 {
2696 /* First part, getting length */
2697 len = 1 + strlen (s);
2698
2699 s = va_arg (args, gchar*);
2700 while (s)
2701 {
2702 len += separator_len + strlen (s);
2703 s = va_arg (args, gchar*);
2704 }
2705 va_end (args);
2706
2707 /* Second part, building string */
2708 string = g_new (gchar, len);
2709
2710 va_start (args, separator);
2711
2712 s = va_arg (args, gchar*);
2713 ptr = g_stpcpy (string, s);
2714
2715 s = va_arg (args, gchar*);
2716 while (s)
2717 {
2718 ptr = g_stpcpy (ptr, separator);
2719 ptr = g_stpcpy (ptr, s);
2720 s = va_arg (args, gchar*);
2721 }
2722 }
2723 else
2724 string = g_strdup ("");
2725
2726 va_end (args);
2727
2728 return string;
2729 }
2730
2731
2732 /**
2733 * g_strstr_len:
2734 * @haystack: a nul-terminated string
2735 * @haystack_len: the maximum length of @haystack in bytes. A length of -1
2736 * can be used to mean "search the entire string", like `strstr()`.
2737 * @needle: the string to search for
2738 *
2739 * Searches the string @haystack for the first occurrence
2740 * of the string @needle, limiting the length of the search
2741 * to @haystack_len.
2742 *
2743 * Returns: a pointer to the found occurrence, or
2744 * %NULL if not found.
2745 */
2746 gchar *
g_strstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2747 g_strstr_len (const gchar *haystack,
2748 gssize haystack_len,
2749 const gchar *needle)
2750 {
2751 g_return_val_if_fail (haystack != NULL, NULL);
2752 g_return_val_if_fail (needle != NULL, NULL);
2753
2754 if (haystack_len < 0)
2755 return strstr (haystack, needle);
2756 else
2757 {
2758 const gchar *p = haystack;
2759 gsize needle_len = strlen (needle);
2760 gsize haystack_len_unsigned = haystack_len;
2761 const gchar *end;
2762 gsize i;
2763
2764 if (needle_len == 0)
2765 return (gchar *)haystack;
2766
2767 if (haystack_len_unsigned < needle_len)
2768 return NULL;
2769
2770 end = haystack + haystack_len - needle_len;
2771
2772 while (p <= end && *p)
2773 {
2774 for (i = 0; i < needle_len; i++)
2775 if (p[i] != needle[i])
2776 goto next;
2777
2778 return (gchar *)p;
2779
2780 next:
2781 p++;
2782 }
2783
2784 return NULL;
2785 }
2786 }
2787
2788 /**
2789 * g_strrstr:
2790 * @haystack: a nul-terminated string
2791 * @needle: the nul-terminated string to search for
2792 *
2793 * Searches the string @haystack for the last occurrence
2794 * of the string @needle.
2795 *
2796 * Returns: a pointer to the found occurrence, or
2797 * %NULL if not found.
2798 */
2799 gchar *
g_strrstr(const gchar * haystack,const gchar * needle)2800 g_strrstr (const gchar *haystack,
2801 const gchar *needle)
2802 {
2803 gsize i;
2804 gsize needle_len;
2805 gsize haystack_len;
2806 const gchar *p;
2807
2808 g_return_val_if_fail (haystack != NULL, NULL);
2809 g_return_val_if_fail (needle != NULL, NULL);
2810
2811 needle_len = strlen (needle);
2812 haystack_len = strlen (haystack);
2813
2814 if (needle_len == 0)
2815 return (gchar *)haystack;
2816
2817 if (haystack_len < needle_len)
2818 return NULL;
2819
2820 p = haystack + haystack_len - needle_len;
2821
2822 while (p >= haystack)
2823 {
2824 for (i = 0; i < needle_len; i++)
2825 if (p[i] != needle[i])
2826 goto next;
2827
2828 return (gchar *)p;
2829
2830 next:
2831 p--;
2832 }
2833
2834 return NULL;
2835 }
2836
2837 /**
2838 * g_strrstr_len:
2839 * @haystack: a nul-terminated string
2840 * @haystack_len: the maximum length of @haystack in bytes. A length of -1
2841 * can be used to mean "search the entire string", like g_strrstr().
2842 * @needle: the nul-terminated string to search for
2843 *
2844 * Searches the string @haystack for the last occurrence
2845 * of the string @needle, limiting the length of the search
2846 * to @haystack_len.
2847 *
2848 * Returns: a pointer to the found occurrence, or
2849 * %NULL if not found.
2850 */
2851 gchar *
g_strrstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2852 g_strrstr_len (const gchar *haystack,
2853 gssize haystack_len,
2854 const gchar *needle)
2855 {
2856 g_return_val_if_fail (haystack != NULL, NULL);
2857 g_return_val_if_fail (needle != NULL, NULL);
2858
2859 if (haystack_len < 0)
2860 return g_strrstr (haystack, needle);
2861 else
2862 {
2863 gsize needle_len = strlen (needle);
2864 const gchar *haystack_max = haystack + haystack_len;
2865 const gchar *p = haystack;
2866 gsize i;
2867
2868 while (p < haystack_max && *p)
2869 p++;
2870
2871 if (p < haystack + needle_len)
2872 return NULL;
2873
2874 p -= needle_len;
2875
2876 while (p >= haystack)
2877 {
2878 for (i = 0; i < needle_len; i++)
2879 if (p[i] != needle[i])
2880 goto next;
2881
2882 return (gchar *)p;
2883
2884 next:
2885 p--;
2886 }
2887
2888 return NULL;
2889 }
2890 }
2891
2892
2893 /**
2894 * g_str_has_suffix:
2895 * @str: a nul-terminated string
2896 * @suffix: the nul-terminated suffix to look for
2897 *
2898 * Looks whether the string @str ends with @suffix.
2899 *
2900 * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2901 *
2902 * Since: 2.2
2903 */
2904 gboolean
g_str_has_suffix(const gchar * str,const gchar * suffix)2905 g_str_has_suffix (const gchar *str,
2906 const gchar *suffix)
2907 {
2908 gsize str_len;
2909 gsize suffix_len;
2910
2911 g_return_val_if_fail (str != NULL, FALSE);
2912 g_return_val_if_fail (suffix != NULL, FALSE);
2913
2914 str_len = strlen (str);
2915 suffix_len = strlen (suffix);
2916
2917 if (str_len < suffix_len)
2918 return FALSE;
2919
2920 return strcmp (str + str_len - suffix_len, suffix) == 0;
2921 }
2922
2923 /**
2924 * g_str_has_prefix:
2925 * @str: a nul-terminated string
2926 * @prefix: the nul-terminated prefix to look for
2927 *
2928 * Looks whether the string @str begins with @prefix.
2929 *
2930 * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2931 *
2932 * Since: 2.2
2933 */
2934 gboolean
g_str_has_prefix(const gchar * str,const gchar * prefix)2935 g_str_has_prefix (const gchar *str,
2936 const gchar *prefix)
2937 {
2938 g_return_val_if_fail (str != NULL, FALSE);
2939 g_return_val_if_fail (prefix != NULL, FALSE);
2940
2941 return strncmp (str, prefix, strlen (prefix)) == 0;
2942 }
2943
2944 /**
2945 * g_strv_length:
2946 * @str_array: a %NULL-terminated array of strings
2947 *
2948 * Returns the length of the given %NULL-terminated
2949 * string array @str_array. @str_array must not be %NULL.
2950 *
2951 * Returns: length of @str_array.
2952 *
2953 * Since: 2.6
2954 */
2955 guint
g_strv_length(gchar ** str_array)2956 g_strv_length (gchar **str_array)
2957 {
2958 guint i = 0;
2959
2960 g_return_val_if_fail (str_array != NULL, 0);
2961
2962 while (str_array[i])
2963 ++i;
2964
2965 return i;
2966 }
2967
2968 static void
index_add_folded(GPtrArray * array,const gchar * start,const gchar * end)2969 index_add_folded (GPtrArray *array,
2970 const gchar *start,
2971 const gchar *end)
2972 {
2973 gchar *normal;
2974
2975 normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2976
2977 /* TODO: Invent time machine. Converse with Mustafa Ataturk... */
2978 if (strstr (normal, "ı") || strstr (normal, "İ"))
2979 {
2980 gchar *s = normal;
2981 GString *tmp;
2982
2983 tmp = g_string_new (NULL);
2984
2985 while (*s)
2986 {
2987 gchar *i, *I, *e;
2988
2989 i = strstr (s, "ı");
2990 I = strstr (s, "İ");
2991
2992 if (!i && !I)
2993 break;
2994 else if (i && !I)
2995 e = i;
2996 else if (I && !i)
2997 e = I;
2998 else if (i < I)
2999 e = i;
3000 else
3001 e = I;
3002
3003 g_string_append_len (tmp, s, e - s);
3004 g_string_append_c (tmp, 'i');
3005 s = g_utf8_next_char (e);
3006 }
3007
3008 g_string_append (tmp, s);
3009 g_free (normal);
3010 normal = g_string_free (tmp, FALSE);
3011 }
3012
3013 g_ptr_array_add (array, g_utf8_casefold (normal, -1));
3014 g_free (normal);
3015 }
3016
3017 static gchar **
split_words(const gchar * value)3018 split_words (const gchar *value)
3019 {
3020 const gchar *start = NULL;
3021 GPtrArray *result;
3022 const gchar *s;
3023
3024 result = g_ptr_array_new ();
3025
3026 for (s = value; *s; s = g_utf8_next_char (s))
3027 {
3028 gunichar c = g_utf8_get_char (s);
3029
3030 if (start == NULL)
3031 {
3032 if (g_unichar_isalnum (c) || g_unichar_ismark (c))
3033 start = s;
3034 }
3035 else
3036 {
3037 if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
3038 {
3039 index_add_folded (result, start, s);
3040 start = NULL;
3041 }
3042 }
3043 }
3044
3045 if (start)
3046 index_add_folded (result, start, s);
3047
3048 g_ptr_array_add (result, NULL);
3049
3050 return (gchar **) g_ptr_array_free (result, FALSE);
3051 }
3052
3053 /**
3054 * g_str_tokenize_and_fold:
3055 * @string: a string
3056 * @translit_locale: (nullable): the language code (like 'de' or
3057 * 'en_GB') from which @string originates
3058 * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
3059 * return location for ASCII alternates
3060 *
3061 * Tokenises @string and performs folding on each token.
3062 *
3063 * A token is a non-empty sequence of alphanumeric characters in the
3064 * source string, separated by non-alphanumeric characters. An
3065 * "alphanumeric" character for this purpose is one that matches
3066 * g_unichar_isalnum() or g_unichar_ismark().
3067 *
3068 * Each token is then (Unicode) normalised and case-folded. If
3069 * @ascii_alternates is non-%NULL and some of the returned tokens
3070 * contain non-ASCII characters, ASCII alternatives will be generated.
3071 *
3072 * The number of ASCII alternatives that are generated and the method
3073 * for doing so is unspecified, but @translit_locale (if specified) may
3074 * improve the transliteration if the language of the source string is
3075 * known.
3076 *
3077 * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3078 *
3079 * Since: 2.40
3080 **/
3081 gchar **
g_str_tokenize_and_fold(const gchar * string,const gchar * translit_locale,gchar *** ascii_alternates)3082 g_str_tokenize_and_fold (const gchar *string,
3083 const gchar *translit_locale,
3084 gchar ***ascii_alternates)
3085 {
3086 gchar **result;
3087
3088 g_return_val_if_fail (string != NULL, NULL);
3089
3090 if (ascii_alternates && g_str_is_ascii (string))
3091 {
3092 *ascii_alternates = g_new0 (gchar *, 0 + 1);
3093 ascii_alternates = NULL;
3094 }
3095
3096 result = split_words (string);
3097
3098 if (ascii_alternates)
3099 {
3100 gint i, j, n;
3101
3102 n = g_strv_length (result);
3103 *ascii_alternates = g_new (gchar *, n + 1);
3104 j = 0;
3105
3106 for (i = 0; i < n; i++)
3107 {
3108 if (!g_str_is_ascii (result[i]))
3109 {
3110 gchar *composed;
3111 gchar *ascii;
3112 gint k;
3113
3114 composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
3115
3116 ascii = g_str_to_ascii (composed, translit_locale);
3117
3118 /* Only accept strings that are now entirely alnums */
3119 for (k = 0; ascii[k]; k++)
3120 if (!g_ascii_isalnum (ascii[k]))
3121 break;
3122
3123 if (ascii[k] == '\0')
3124 /* Made it to the end... */
3125 (*ascii_alternates)[j++] = ascii;
3126 else
3127 g_free (ascii);
3128
3129 g_free (composed);
3130 }
3131 }
3132
3133 (*ascii_alternates)[j] = NULL;
3134 }
3135
3136 return result;
3137 }
3138
3139 /**
3140 * g_str_match_string:
3141 * @search_term: the search term from the user
3142 * @potential_hit: the text that may be a hit
3143 * @accept_alternates: %TRUE to accept ASCII alternates
3144 *
3145 * Checks if a search conducted for @search_term should match
3146 * @potential_hit.
3147 *
3148 * This function calls g_str_tokenize_and_fold() on both
3149 * @search_term and @potential_hit. ASCII alternates are never taken
3150 * for @search_term but will be taken for @potential_hit according to
3151 * the value of @accept_alternates.
3152 *
3153 * A hit occurs when each folded token in @search_term is a prefix of a
3154 * folded token from @potential_hit.
3155 *
3156 * Depending on how you're performing the search, it will typically be
3157 * faster to call g_str_tokenize_and_fold() on each string in
3158 * your corpus and build an index on the returned folded tokens, then
3159 * call g_str_tokenize_and_fold() on the search term and
3160 * perform lookups into that index.
3161 *
3162 * As some examples, searching for ‘fred’ would match the potential hit
3163 * ‘Smith, Fred’ and also ‘Frédéric’. Searching for ‘Fréd’ would match
3164 * ‘Frédéric’ but not ‘Frederic’ (due to the one-directional nature of
3165 * accent matching). Searching ‘fo’ would match ‘Foo’ and ‘Bar Foo
3166 * Baz’, but not ‘SFO’ (because no word has ‘fo’ as a prefix).
3167 *
3168 * Returns: %TRUE if @potential_hit is a hit
3169 *
3170 * Since: 2.40
3171 **/
3172 gboolean
g_str_match_string(const gchar * search_term,const gchar * potential_hit,gboolean accept_alternates)3173 g_str_match_string (const gchar *search_term,
3174 const gchar *potential_hit,
3175 gboolean accept_alternates)
3176 {
3177 gchar **alternates = NULL;
3178 gchar **term_tokens;
3179 gchar **hit_tokens;
3180 gboolean matched;
3181 gint i, j;
3182
3183 g_return_val_if_fail (search_term != NULL, FALSE);
3184 g_return_val_if_fail (potential_hit != NULL, FALSE);
3185
3186 term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3187 hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3188
3189 matched = TRUE;
3190
3191 for (i = 0; term_tokens[i]; i++)
3192 {
3193 for (j = 0; hit_tokens[j]; j++)
3194 if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3195 goto one_matched;
3196
3197 if (accept_alternates)
3198 for (j = 0; alternates[j]; j++)
3199 if (g_str_has_prefix (alternates[j], term_tokens[i]))
3200 goto one_matched;
3201
3202 matched = FALSE;
3203 break;
3204
3205 one_matched:
3206 continue;
3207 }
3208
3209 g_strfreev (term_tokens);
3210 g_strfreev (hit_tokens);
3211 g_strfreev (alternates);
3212
3213 return matched;
3214 }
3215
3216 /**
3217 * g_strv_contains:
3218 * @strv: a %NULL-terminated array of strings
3219 * @str: a string
3220 *
3221 * Checks if @strv contains @str. @strv must not be %NULL.
3222 *
3223 * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3224 *
3225 * Since: 2.44
3226 */
3227 gboolean
g_strv_contains(const gchar * const * strv,const gchar * str)3228 g_strv_contains (const gchar * const *strv,
3229 const gchar *str)
3230 {
3231 g_return_val_if_fail (strv != NULL, FALSE);
3232 g_return_val_if_fail (str != NULL, FALSE);
3233
3234 for (; *strv != NULL; strv++)
3235 {
3236 if (g_str_equal (str, *strv))
3237 return TRUE;
3238 }
3239
3240 return FALSE;
3241 }
3242
3243 /**
3244 * g_strv_equal:
3245 * @strv1: a %NULL-terminated array of strings
3246 * @strv2: another %NULL-terminated array of strings
3247 *
3248 * Checks if @strv1 and @strv2 contain exactly the same elements in exactly the
3249 * same order. Elements are compared using g_str_equal(). To match independently
3250 * of order, sort the arrays first (using g_qsort_with_data() or similar).
3251 *
3252 * Two empty arrays are considered equal. Neither @strv1 not @strv2 may be
3253 * %NULL.
3254 *
3255 * Returns: %TRUE if @strv1 and @strv2 are equal
3256 * Since: 2.60
3257 */
3258 gboolean
g_strv_equal(const gchar * const * strv1,const gchar * const * strv2)3259 g_strv_equal (const gchar * const *strv1,
3260 const gchar * const *strv2)
3261 {
3262 g_return_val_if_fail (strv1 != NULL, FALSE);
3263 g_return_val_if_fail (strv2 != NULL, FALSE);
3264
3265 if (strv1 == strv2)
3266 return TRUE;
3267
3268 for (; *strv1 != NULL && *strv2 != NULL; strv1++, strv2++)
3269 {
3270 if (!g_str_equal (*strv1, *strv2))
3271 return FALSE;
3272 }
3273
3274 return (*strv1 == NULL && *strv2 == NULL);
3275 }
3276
3277 static gboolean
str_has_sign(const gchar * str)3278 str_has_sign (const gchar *str)
3279 {
3280 return str[0] == '-' || str[0] == '+';
3281 }
3282
3283 static gboolean
str_has_hex_prefix(const gchar * str)3284 str_has_hex_prefix (const gchar *str)
3285 {
3286 return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
3287 }
3288
3289 /**
3290 * g_ascii_string_to_signed:
3291 * @str: a string
3292 * @base: base of a parsed number
3293 * @min: a lower bound (inclusive)
3294 * @max: an upper bound (inclusive)
3295 * @out_num: (out) (optional): a return location for a number
3296 * @error: a return location for #GError
3297 *
3298 * A convenience function for converting a string to a signed number.
3299 *
3300 * This function assumes that @str contains only a number of the given
3301 * @base that is within inclusive bounds limited by @min and @max. If
3302 * this is true, then the converted number is stored in @out_num. An
3303 * empty string is not a valid input. A string with leading or
3304 * trailing whitespace is also an invalid input.
3305 *
3306 * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3307 * not be prefixed with "0x" or "0X". Such a problem does not exist
3308 * for octal numbers, since they were usually prefixed with a zero
3309 * which does not change the value of the parsed number.
3310 *
3311 * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3312 * domain. If the input is invalid, the error code will be
3313 * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3314 * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3315 *
3316 * See g_ascii_strtoll() if you have more complex needs such as
3317 * parsing a string which starts with a number, but then has other
3318 * characters.
3319 *
3320 * Returns: %TRUE if @str was a number, otherwise %FALSE.
3321 *
3322 * Since: 2.54
3323 */
3324 gboolean
g_ascii_string_to_signed(const gchar * str,guint base,gint64 min,gint64 max,gint64 * out_num,GError ** error)3325 g_ascii_string_to_signed (const gchar *str,
3326 guint base,
3327 gint64 min,
3328 gint64 max,
3329 gint64 *out_num,
3330 GError **error)
3331 {
3332 gint64 number;
3333 const gchar *end_ptr = NULL;
3334 gint saved_errno = 0;
3335
3336 g_return_val_if_fail (str != NULL, FALSE);
3337 g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3338 g_return_val_if_fail (min <= max, FALSE);
3339 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3340
3341 if (str[0] == '\0')
3342 {
3343 g_set_error_literal (error,
3344 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3345 _("Empty string is not a number"));
3346 return FALSE;
3347 }
3348
3349 errno = 0;
3350 number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
3351 saved_errno = errno;
3352
3353 if (/* We do not allow leading whitespace, but g_ascii_strtoll
3354 * accepts it and just skips it, so we need to check for it
3355 * ourselves.
3356 */
3357 g_ascii_isspace (str[0]) ||
3358 /* We don't support hexadecimal numbers prefixed with 0x or
3359 * 0X.
3360 */
3361 (base == 16 &&
3362 (str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
3363 (saved_errno != 0 && saved_errno != ERANGE) ||
3364 end_ptr == NULL ||
3365 *end_ptr != '\0')
3366 {
3367 g_set_error (error,
3368 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3369 _("“%s” is not a signed number"), str);
3370 return FALSE;
3371 }
3372 if (saved_errno == ERANGE || number < min || number > max)
3373 {
3374 gchar *min_str = g_strdup_printf ("%" G_GINT64_FORMAT, min);
3375 gchar *max_str = g_strdup_printf ("%" G_GINT64_FORMAT, max);
3376
3377 g_set_error (error,
3378 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3379 _("Number “%s” is out of bounds [%s, %s]"),
3380 str, min_str, max_str);
3381 g_free (min_str);
3382 g_free (max_str);
3383 return FALSE;
3384 }
3385 if (out_num != NULL)
3386 *out_num = number;
3387 return TRUE;
3388 }
3389
3390 /**
3391 * g_ascii_string_to_unsigned:
3392 * @str: a string
3393 * @base: base of a parsed number
3394 * @min: a lower bound (inclusive)
3395 * @max: an upper bound (inclusive)
3396 * @out_num: (out) (optional): a return location for a number
3397 * @error: a return location for #GError
3398 *
3399 * A convenience function for converting a string to an unsigned number.
3400 *
3401 * This function assumes that @str contains only a number of the given
3402 * @base that is within inclusive bounds limited by @min and @max. If
3403 * this is true, then the converted number is stored in @out_num. An
3404 * empty string is not a valid input. A string with leading or
3405 * trailing whitespace is also an invalid input. A string with a leading sign
3406 * (`-` or `+`) is not a valid input for the unsigned parser.
3407 *
3408 * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3409 * not be prefixed with "0x" or "0X". Such a problem does not exist
3410 * for octal numbers, since they were usually prefixed with a zero
3411 * which does not change the value of the parsed number.
3412 *
3413 * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3414 * domain. If the input is invalid, the error code will be
3415 * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3416 * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3417 *
3418 * See g_ascii_strtoull() if you have more complex needs such as
3419 * parsing a string which starts with a number, but then has other
3420 * characters.
3421 *
3422 * Returns: %TRUE if @str was a number, otherwise %FALSE.
3423 *
3424 * Since: 2.54
3425 */
3426 gboolean
g_ascii_string_to_unsigned(const gchar * str,guint base,guint64 min,guint64 max,guint64 * out_num,GError ** error)3427 g_ascii_string_to_unsigned (const gchar *str,
3428 guint base,
3429 guint64 min,
3430 guint64 max,
3431 guint64 *out_num,
3432 GError **error)
3433 {
3434 guint64 number;
3435 const gchar *end_ptr = NULL;
3436 gint saved_errno = 0;
3437
3438 g_return_val_if_fail (str != NULL, FALSE);
3439 g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3440 g_return_val_if_fail (min <= max, FALSE);
3441 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3442
3443 if (str[0] == '\0')
3444 {
3445 g_set_error_literal (error,
3446 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3447 _("Empty string is not a number"));
3448 return FALSE;
3449 }
3450
3451 errno = 0;
3452 number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
3453 saved_errno = errno;
3454
3455 if (/* We do not allow leading whitespace, but g_ascii_strtoull
3456 * accepts it and just skips it, so we need to check for it
3457 * ourselves.
3458 */
3459 g_ascii_isspace (str[0]) ||
3460 /* Unsigned number should have no sign.
3461 */
3462 str_has_sign (str) ||
3463 /* We don't support hexadecimal numbers prefixed with 0x or
3464 * 0X.
3465 */
3466 (base == 16 && str_has_hex_prefix (str)) ||
3467 (saved_errno != 0 && saved_errno != ERANGE) ||
3468 end_ptr == NULL ||
3469 *end_ptr != '\0')
3470 {
3471 g_set_error (error,
3472 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3473 _("“%s” is not an unsigned number"), str);
3474 return FALSE;
3475 }
3476 if (saved_errno == ERANGE || number < min || number > max)
3477 {
3478 gchar *min_str = g_strdup_printf ("%" G_GUINT64_FORMAT, min);
3479 gchar *max_str = g_strdup_printf ("%" G_GUINT64_FORMAT, max);
3480
3481 g_set_error (error,
3482 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3483 _("Number “%s” is out of bounds [%s, %s]"),
3484 str, min_str, max_str);
3485 g_free (min_str);
3486 g_free (max_str);
3487 return FALSE;
3488 }
3489 if (out_num != NULL)
3490 *out_num = number;
3491 return TRUE;
3492 }
3493
3494 G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)
3495