1 /* GLIB - Library of useful routines for C programming
2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 /*
19  * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
20  * file for a list of people on the GLib Team.  See the ChangeLog
21  * files for a list of changes.  These files are distributed with
22  * GLib at ftp://ftp.gtk.org/pub/gtk/.
23  */
24 
25 /*
26  * MT safe
27  */
28 
29 #include "config.h"
30 
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <locale.h>
35 #include <string.h>
36 #include <locale.h>
37 #include <errno.h>
38 #include <garray.h>
39 #include <ctype.h>              /* For tolower() */
40 
41 #ifdef HAVE_XLOCALE_H
42 /* Needed on BSD/OS X for e.g. strtod_l */
43 #include <xlocale.h>
44 #endif
45 
46 #ifdef G_OS_WIN32
47 #include <windows.h>
48 #endif
49 
50 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
51 
52 #include "gstrfuncs.h"
53 
54 #include "gprintf.h"
55 #include "gprintfint.h"
56 #include "glibintl.h"
57 
58 
59 /**
60  * SECTION:string_utils
61  * @title: String Utility Functions
62  * @short_description: various string-related functions
63  *
64  * This section describes a number of utility functions for creating,
65  * duplicating, and manipulating strings.
66  *
67  * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
68  * g_vprintf(), g_vfprintf(), g_vsprintf() and g_vasprintf()
69  * are declared in the header `gprintf.h` which is not included in `glib.h`
70  * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
71  * explicitly include `<glib/gprintf.h>` in order to use the GLib
72  * printf() functions.
73  *
74  * ## String precision pitfalls # {#string-precision}
75  *
76  * While you may use the printf() functions to format UTF-8 strings,
77  * notice that the precision of a \%Ns parameter is interpreted
78  * as the number of bytes, not characters to print. On top of that,
79  * the GNU libc implementation of the printf() functions has the
80  * "feature" that it checks that the string given for the \%Ns
81  * parameter consists of a whole number of characters in the current
82  * encoding. So, unless you are sure you are always going to be in an
83  * UTF-8 locale or your know your text is restricted to ASCII, avoid
84  * using \%Ns. If your intention is to format strings for a
85  * certain number of columns, then \%Ns is not a correct solution
86  * anyway, since it fails to take wide characters (see g_unichar_iswide())
87  * into account.
88  *
89  * Note also that there are various printf() parameters which are platform
90  * dependent. GLib provides platform independent macros for these parameters
91  * which should be used instead. A common example is %G_GUINT64_FORMAT, which
92  * should be used instead of `%llu` or similar parameters for formatting
93  * 64-bit integers. These macros are all named `G_*_FORMAT`; see
94  * [Basic Types][glib-Basic-Types].
95  */
96 
97 /**
98  * g_ascii_isalnum:
99  * @c: any character
100  *
101  * Determines whether a character is alphanumeric.
102  *
103  * Unlike the standard C library isalnum() function, this only
104  * recognizes standard ASCII letters and ignores the locale,
105  * returning %FALSE for all non-ASCII characters. Also, unlike
106  * the standard library function, this takes a char, not an int,
107  * so don't call it on %EOF, but no need to cast to #guchar before
108  * passing a possibly non-ASCII character in.
109  *
110  * Returns: %TRUE if @c is an ASCII alphanumeric character
111  */
112 
113 /**
114  * g_ascii_isalpha:
115  * @c: any character
116  *
117  * Determines whether a character is alphabetic (i.e. a letter).
118  *
119  * Unlike the standard C library isalpha() function, this only
120  * recognizes standard ASCII letters and ignores the locale,
121  * returning %FALSE for all non-ASCII characters. Also, unlike
122  * the standard library function, this takes a char, not an int,
123  * so don't call it on %EOF, but no need to cast to #guchar before
124  * passing a possibly non-ASCII character in.
125  *
126  * Returns: %TRUE if @c is an ASCII alphabetic character
127  */
128 
129 /**
130  * g_ascii_iscntrl:
131  * @c: any character
132  *
133  * Determines whether a character is a control character.
134  *
135  * Unlike the standard C library iscntrl() function, this only
136  * recognizes standard ASCII control characters and ignores the
137  * locale, returning %FALSE for all non-ASCII characters. Also,
138  * unlike the standard library function, this takes a char, not
139  * an int, so don't call it on %EOF, but no need to cast to #guchar
140  * before passing a possibly non-ASCII character in.
141  *
142  * Returns: %TRUE if @c is an ASCII control character.
143  */
144 
145 /**
146  * g_ascii_isdigit:
147  * @c: any character
148  *
149  * Determines whether a character is digit (0-9).
150  *
151  * Unlike the standard C library isdigit() function, this takes
152  * a char, not an int, so don't call it  on %EOF, but no need to
153  * cast to #guchar before passing a possibly non-ASCII character in.
154  *
155  * Returns: %TRUE if @c is an ASCII digit.
156  */
157 
158 /**
159  * g_ascii_isgraph:
160  * @c: any character
161  *
162  * Determines whether a character is a printing character and not a space.
163  *
164  * Unlike the standard C library isgraph() function, this only
165  * recognizes standard ASCII characters and ignores the locale,
166  * returning %FALSE for all non-ASCII characters. Also, unlike
167  * the standard library function, this takes a char, not an int,
168  * so don't call it on %EOF, but no need to cast to #guchar before
169  * passing a possibly non-ASCII character in.
170  *
171  * Returns: %TRUE if @c is an ASCII printing character other than space.
172  */
173 
174 /**
175  * g_ascii_islower:
176  * @c: any character
177  *
178  * Determines whether a character is an ASCII lower case letter.
179  *
180  * Unlike the standard C library islower() function, this only
181  * recognizes standard ASCII letters and ignores the locale,
182  * returning %FALSE for all non-ASCII characters. Also, unlike
183  * the standard library function, this takes a char, not an int,
184  * so don't call it on %EOF, but no need to worry about casting
185  * to #guchar before passing a possibly non-ASCII character in.
186  *
187  * Returns: %TRUE if @c is an ASCII lower case letter
188  */
189 
190 /**
191  * g_ascii_isprint:
192  * @c: any character
193  *
194  * Determines whether a character is a printing character.
195  *
196  * Unlike the standard C library isprint() function, this only
197  * recognizes standard ASCII characters and ignores the locale,
198  * returning %FALSE for all non-ASCII characters. Also, unlike
199  * the standard library function, this takes a char, not an int,
200  * so don't call it on %EOF, but no need to cast to #guchar before
201  * passing a possibly non-ASCII character in.
202  *
203  * Returns: %TRUE if @c is an ASCII printing character.
204  */
205 
206 /**
207  * g_ascii_ispunct:
208  * @c: any character
209  *
210  * Determines whether a character is a punctuation character.
211  *
212  * Unlike the standard C library ispunct() function, this only
213  * recognizes standard ASCII letters and ignores the locale,
214  * returning %FALSE for all non-ASCII characters. Also, unlike
215  * the standard library function, this takes a char, not an int,
216  * so don't call it on %EOF, but no need to cast to #guchar before
217  * passing a possibly non-ASCII character in.
218  *
219  * Returns: %TRUE if @c is an ASCII punctuation character.
220  */
221 
222 /**
223  * g_ascii_isspace:
224  * @c: any character
225  *
226  * Determines whether a character is a white-space character.
227  *
228  * Unlike the standard C library isspace() function, this only
229  * recognizes standard ASCII white-space and ignores the locale,
230  * returning %FALSE for all non-ASCII characters. Also, unlike
231  * the standard library function, this takes a char, not an int,
232  * so don't call it on %EOF, but no need to cast to #guchar before
233  * passing a possibly non-ASCII character in.
234  *
235  * Returns: %TRUE if @c is an ASCII white-space character
236  */
237 
238 /**
239  * g_ascii_isupper:
240  * @c: any character
241  *
242  * Determines whether a character is an ASCII upper case letter.
243  *
244  * Unlike the standard C library isupper() function, this only
245  * recognizes standard ASCII letters and ignores the locale,
246  * returning %FALSE for all non-ASCII characters. Also, unlike
247  * the standard library function, this takes a char, not an int,
248  * so don't call it on %EOF, but no need to worry about casting
249  * to #guchar before passing a possibly non-ASCII character in.
250  *
251  * Returns: %TRUE if @c is an ASCII upper case letter
252  */
253 
254 /**
255  * g_ascii_isxdigit:
256  * @c: any character
257  *
258  * Determines whether a character is a hexadecimal-digit character.
259  *
260  * Unlike the standard C library isxdigit() function, this takes
261  * a char, not an int, so don't call it on %EOF, but no need to
262  * cast to #guchar before passing a possibly non-ASCII character in.
263  *
264  * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
265  */
266 
267 /**
268  * G_ASCII_DTOSTR_BUF_SIZE:
269  *
270  * A good size for a buffer to be passed into g_ascii_dtostr().
271  * It is guaranteed to be enough for all output of that function
272  * on systems with 64bit IEEE-compatible doubles.
273  *
274  * The typical usage would be something like:
275  * |[<!-- language="C" -->
276  *   char buf[G_ASCII_DTOSTR_BUF_SIZE];
277  *
278  *   fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
279  * ]|
280  */
281 
282 /**
283  * g_strstrip:
284  * @string: a string to remove the leading and trailing whitespace from
285  *
286  * Removes leading and trailing whitespace from a string.
287  * See g_strchomp() and g_strchug().
288  *
289  * Returns: @string
290  */
291 
292 /**
293  * G_STR_DELIMITERS:
294  *
295  * The standard delimiters, used in g_strdelimit().
296  */
297 
298 static const guint16 ascii_table_data[256] = {
299   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
300   0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
301   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
302   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
303   0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
304   0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
305   0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
306   0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
307   0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
308   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
309   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
310   0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
311   0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
312   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
313   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
314   0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
315   /* the upper 128 are all zeroes */
316 };
317 
318 const guint16 * const g_ascii_table = ascii_table_data;
319 
320 #if defined (HAVE_NEWLOCALE) && \
321     defined (HAVE_USELOCALE) && \
322     defined (HAVE_STRTOD_L) && \
323     defined (HAVE_STRTOULL_L) && \
324     defined (HAVE_STRTOLL_L)
325 #define USE_XLOCALE 1
326 #endif
327 
328 #ifdef USE_XLOCALE
329 static locale_t
get_C_locale(void)330 get_C_locale (void)
331 {
332   static gsize initialized = FALSE;
333   static locale_t C_locale = NULL;
334 
335   if (g_once_init_enter (&initialized))
336     {
337       C_locale = newlocale (LC_ALL_MASK, "C", NULL);
338       g_once_init_leave (&initialized, TRUE);
339     }
340 
341   return C_locale;
342 }
343 #endif
344 
345 /**
346  * g_strdup:
347  * @str: (nullable): the string to duplicate
348  *
349  * Duplicates a string. If @str is %NULL it returns %NULL.
350  * The returned string should be freed with g_free()
351  * when no longer needed.
352  *
353  * Returns: a newly-allocated copy of @str
354  */
355 gchar*
g_strdup(const gchar * str)356 g_strdup (const gchar *str)
357 {
358   gchar *new_str;
359   gsize length;
360 
361   if (str)
362     {
363       length = strlen (str) + 1;
364       new_str = g_new (char, length);
365       memcpy (new_str, str, length);
366     }
367   else
368     new_str = NULL;
369 
370   return new_str;
371 }
372 
373 /**
374  * g_memdup:
375  * @mem: the memory to copy.
376  * @byte_size: the number of bytes to copy.
377  *
378  * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
379  * from @mem. If @mem is %NULL it returns %NULL.
380  *
381  * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
382  *  is %NULL.
383  * Deprecated: 2.68: Use g_memdup2() instead, as it accepts a #gsize argument
384  *     for @byte_size, avoiding the possibility of overflow in a #gsize → #guint
385  *     conversion
386  */
387 gpointer
g_memdup(gconstpointer mem,guint byte_size)388 g_memdup (gconstpointer mem,
389           guint         byte_size)
390 {
391   gpointer new_mem;
392 
393   if (mem && byte_size != 0)
394     {
395       new_mem = g_malloc (byte_size);
396       memcpy (new_mem, mem, byte_size);
397     }
398   else
399     new_mem = NULL;
400 
401   return new_mem;
402 }
403 
404 /**
405  * g_memdup2:
406  * @mem: (nullable): the memory to copy.
407  * @byte_size: the number of bytes to copy.
408  *
409  * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
410  * from @mem. If @mem is %NULL it returns %NULL.
411  *
412  * This replaces g_memdup(), which was prone to integer overflows when
413  * converting the argument from a #gsize to a #guint.
414  *
415  * Returns: (nullable): a pointer to the newly-allocated copy of the memory,
416  *    or %NULL if @mem is %NULL.
417  * Since: 2.68
418  */
419 gpointer
g_memdup2(gconstpointer mem,gsize byte_size)420 g_memdup2 (gconstpointer mem,
421            gsize         byte_size)
422 {
423   gpointer new_mem;
424 
425   if (mem && byte_size != 0)
426     {
427       new_mem = g_malloc (byte_size);
428       memcpy (new_mem, mem, byte_size);
429     }
430   else
431     new_mem = NULL;
432 
433   return new_mem;
434 }
435 
436 /**
437  * g_strndup:
438  * @str: the string to duplicate
439  * @n: the maximum number of bytes to copy from @str
440  *
441  * Duplicates the first @n bytes of a string, returning a newly-allocated
442  * buffer @n + 1 bytes long which will always be nul-terminated. If @str
443  * is less than @n bytes long the buffer is padded with nuls. If @str is
444  * %NULL it returns %NULL. The returned value should be freed when no longer
445  * needed.
446  *
447  * To copy a number of characters from a UTF-8 encoded string,
448  * use g_utf8_strncpy() instead.
449  *
450  * Returns: a newly-allocated buffer containing the first @n bytes
451  *     of @str, nul-terminated
452  */
453 gchar*
g_strndup(const gchar * str,gsize n)454 g_strndup (const gchar *str,
455            gsize        n)
456 {
457   gchar *new_str;
458 
459   if (str)
460     {
461       new_str = g_new (gchar, n + 1);
462       strncpy (new_str, str, n);
463       new_str[n] = '\0';
464     }
465   else
466     new_str = NULL;
467 
468   return new_str;
469 }
470 
471 /**
472  * g_strnfill:
473  * @length: the length of the new string
474  * @fill_char: the byte to fill the string with
475  *
476  * Creates a new string @length bytes long filled with @fill_char.
477  * The returned string should be freed when no longer needed.
478  *
479  * Returns: a newly-allocated string filled the @fill_char
480  */
481 gchar*
g_strnfill(gsize length,gchar fill_char)482 g_strnfill (gsize length,
483             gchar fill_char)
484 {
485   gchar *str;
486 
487   str = g_new (gchar, length + 1);
488   memset (str, (guchar)fill_char, length);
489   str[length] = '\0';
490 
491   return str;
492 }
493 
494 /**
495  * g_stpcpy:
496  * @dest: destination buffer.
497  * @src: source string.
498  *
499  * Copies a nul-terminated string into the dest buffer, include the
500  * trailing nul, and return a pointer to the trailing nul byte.
501  * This is useful for concatenating multiple strings together
502  * without having to repeatedly scan for the end.
503  *
504  * Returns: a pointer to trailing nul byte.
505  **/
506 gchar *
g_stpcpy(gchar * dest,const gchar * src)507 g_stpcpy (gchar       *dest,
508           const gchar *src)
509 {
510 #ifdef HAVE_STPCPY
511   g_return_val_if_fail (dest != NULL, NULL);
512   g_return_val_if_fail (src != NULL, NULL);
513   return stpcpy (dest, src);
514 #else
515   gchar *d = dest;
516   const gchar *s = src;
517 
518   g_return_val_if_fail (dest != NULL, NULL);
519   g_return_val_if_fail (src != NULL, NULL);
520   do
521     *d++ = *s;
522   while (*s++ != '\0');
523 
524   return d - 1;
525 #endif
526 }
527 
528 /**
529  * g_strdup_vprintf:
530  * @format: (not nullable): a standard printf() format string, but notice
531  *     [string precision pitfalls][string-precision]
532  * @args: the list of parameters to insert into the format string
533  *
534  * Similar to the standard C vsprintf() function but safer, since it
535  * calculates the maximum space required and allocates memory to hold
536  * the result. The returned string should be freed with g_free() when
537  * no longer needed.
538  *
539  * The returned string is guaranteed to be non-NULL, unless @format
540  * contains `%lc` or `%ls` conversions, which can fail if no multibyte
541  * representation is available for the given character.
542  *
543  * See also g_vasprintf(), which offers the same functionality, but
544  * additionally returns the length of the allocated string.
545  *
546  * Returns: a newly-allocated string holding the result
547  */
548 gchar*
g_strdup_vprintf(const gchar * format,va_list args)549 g_strdup_vprintf (const gchar *format,
550                   va_list      args)
551 {
552   gchar *string = NULL;
553 
554   g_vasprintf (&string, format, args);
555 
556   return string;
557 }
558 
559 /**
560  * g_strdup_printf:
561  * @format: (not nullable): a standard printf() format string, but notice
562  *     [string precision pitfalls][string-precision]
563  * @...: the parameters to insert into the format string
564  *
565  * Similar to the standard C sprintf() function but safer, since it
566  * calculates the maximum space required and allocates memory to hold
567  * the result. The returned string should be freed with g_free() when no
568  * longer needed.
569  *
570  * The returned string is guaranteed to be non-NULL, unless @format
571  * contains `%lc` or `%ls` conversions, which can fail if no multibyte
572  * representation is available for the given character.
573  *
574  * Returns: a newly-allocated string holding the result
575  */
576 gchar*
g_strdup_printf(const gchar * format,...)577 g_strdup_printf (const gchar *format,
578                  ...)
579 {
580   gchar *buffer;
581   va_list args;
582 
583   va_start (args, format);
584   buffer = g_strdup_vprintf (format, args);
585   va_end (args);
586 
587   return buffer;
588 }
589 
590 /**
591  * g_strconcat:
592  * @string1: the first string to add, which must not be %NULL
593  * @...: a %NULL-terminated list of strings to append to the string
594  *
595  * Concatenates all of the given strings into one long string. The
596  * returned string should be freed with g_free() when no longer needed.
597  *
598  * The variable argument list must end with %NULL. If you forget the %NULL,
599  * g_strconcat() will start appending random memory junk to your string.
600  *
601  * Note that this function is usually not the right function to use to
602  * assemble a translated message from pieces, since proper translation
603  * often requires the pieces to be reordered.
604  *
605  * Returns: a newly-allocated string containing all the string arguments
606  */
607 gchar*
g_strconcat(const gchar * string1,...)608 g_strconcat (const gchar *string1, ...)
609 {
610   gsize   l;
611   va_list args;
612   gchar   *s;
613   gchar   *concat;
614   gchar   *ptr;
615 
616   if (!string1)
617     return NULL;
618 
619   l = 1 + strlen (string1);
620   va_start (args, string1);
621   s = va_arg (args, gchar*);
622   while (s)
623     {
624       l += strlen (s);
625       s = va_arg (args, gchar*);
626     }
627   va_end (args);
628 
629   concat = g_new (gchar, l);
630   ptr = concat;
631 
632   ptr = g_stpcpy (ptr, string1);
633   va_start (args, string1);
634   s = va_arg (args, gchar*);
635   while (s)
636     {
637       ptr = g_stpcpy (ptr, s);
638       s = va_arg (args, gchar*);
639     }
640   va_end (args);
641 
642   return concat;
643 }
644 
645 /**
646  * g_strtod:
647  * @nptr:    the string to convert to a numeric value.
648  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
649  *           character after the last character used in the conversion.
650  *
651  * Converts a string to a #gdouble value.
652  * It calls the standard strtod() function to handle the conversion, but
653  * if the string is not completely converted it attempts the conversion
654  * again with g_ascii_strtod(), and returns the best match.
655  *
656  * This function should seldom be used. The normal situation when reading
657  * numbers not for human consumption is to use g_ascii_strtod(). Only when
658  * you know that you must expect both locale formatted and C formatted numbers
659  * should you use this. Make sure that you don't pass strings such as comma
660  * separated lists of values, since the commas may be interpreted as a decimal
661  * point in some locales, causing unexpected results.
662  *
663  * Returns: the #gdouble value.
664  **/
665 gdouble
g_strtod(const gchar * nptr,gchar ** endptr)666 g_strtod (const gchar *nptr,
667           gchar      **endptr)
668 {
669   gchar *fail_pos_1;
670   gchar *fail_pos_2;
671   gdouble val_1;
672   gdouble val_2 = 0;
673 
674   g_return_val_if_fail (nptr != NULL, 0);
675 
676   fail_pos_1 = NULL;
677   fail_pos_2 = NULL;
678 
679   val_1 = strtod (nptr, &fail_pos_1);
680 
681   if (fail_pos_1 && fail_pos_1[0] != 0)
682     val_2 = g_ascii_strtod (nptr, &fail_pos_2);
683 
684   if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
685     {
686       if (endptr)
687         *endptr = fail_pos_1;
688       return val_1;
689     }
690   else
691     {
692       if (endptr)
693         *endptr = fail_pos_2;
694       return val_2;
695     }
696 }
697 
698 /**
699  * g_ascii_strtod:
700  * @nptr:    the string to convert to a numeric value.
701  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
702  *           character after the last character used in the conversion.
703  *
704  * Converts a string to a #gdouble value.
705  *
706  * This function behaves like the standard strtod() function
707  * does in the C locale. It does this without actually changing
708  * the current locale, since that would not be thread-safe.
709  * A limitation of the implementation is that this function
710  * will still accept localized versions of infinities and NANs.
711  *
712  * This function is typically used when reading configuration
713  * files or other non-user input that should be locale independent.
714  * To handle input from the user you should normally use the
715  * locale-sensitive system strtod() function.
716  *
717  * To convert from a #gdouble to a string in a locale-insensitive
718  * way, use g_ascii_dtostr().
719  *
720  * If the correct value would cause overflow, plus or minus %HUGE_VAL
721  * is returned (according to the sign of the value), and %ERANGE is
722  * stored in %errno. If the correct value would cause underflow,
723  * zero is returned and %ERANGE is stored in %errno.
724  *
725  * This function resets %errno before calling strtod() so that
726  * you can reliably detect overflow and underflow.
727  *
728  * Returns: the #gdouble value.
729  */
730 gdouble
g_ascii_strtod(const gchar * nptr,gchar ** endptr)731 g_ascii_strtod (const gchar *nptr,
732                 gchar      **endptr)
733 {
734 #ifdef USE_XLOCALE
735 
736   g_return_val_if_fail (nptr != NULL, 0);
737 
738   errno = 0;
739 
740   return strtod_l (nptr, endptr, get_C_locale ());
741 
742 #else
743 
744   gchar *fail_pos;
745   gdouble val;
746 #ifndef __BIONIC__
747   struct lconv *locale_data;
748 #endif
749   const char *decimal_point;
750   gsize decimal_point_len;
751   const char *p, *decimal_point_pos;
752   const char *end = NULL; /* Silence gcc */
753   int strtod_errno;
754 
755   g_return_val_if_fail (nptr != NULL, 0);
756 
757   fail_pos = NULL;
758 
759 #ifndef __BIONIC__
760   locale_data = localeconv ();
761   decimal_point = locale_data->decimal_point;
762   decimal_point_len = strlen (decimal_point);
763 #else
764   decimal_point = ".";
765   decimal_point_len = 1;
766 #endif
767 
768   g_assert (decimal_point_len != 0);
769 
770   decimal_point_pos = NULL;
771   end = NULL;
772 
773   if (decimal_point[0] != '.' ||
774       decimal_point[1] != 0)
775     {
776       p = nptr;
777       /* Skip leading space */
778       while (g_ascii_isspace (*p))
779         p++;
780 
781       /* Skip leading optional sign */
782       if (*p == '+' || *p == '-')
783         p++;
784 
785       if (p[0] == '0' &&
786           (p[1] == 'x' || p[1] == 'X'))
787         {
788           p += 2;
789           /* HEX - find the (optional) decimal point */
790 
791           while (g_ascii_isxdigit (*p))
792             p++;
793 
794           if (*p == '.')
795             decimal_point_pos = p++;
796 
797           while (g_ascii_isxdigit (*p))
798             p++;
799 
800           if (*p == 'p' || *p == 'P')
801             p++;
802           if (*p == '+' || *p == '-')
803             p++;
804           while (g_ascii_isdigit (*p))
805             p++;
806 
807           end = p;
808         }
809       else if (g_ascii_isdigit (*p) || *p == '.')
810         {
811           while (g_ascii_isdigit (*p))
812             p++;
813 
814           if (*p == '.')
815             decimal_point_pos = p++;
816 
817           while (g_ascii_isdigit (*p))
818             p++;
819 
820           if (*p == 'e' || *p == 'E')
821             p++;
822           if (*p == '+' || *p == '-')
823             p++;
824           while (g_ascii_isdigit (*p))
825             p++;
826 
827           end = p;
828         }
829       /* For the other cases, we need not convert the decimal point */
830     }
831 
832   if (decimal_point_pos)
833     {
834       char *copy, *c;
835 
836       /* We need to convert the '.' to the locale specific decimal point */
837       copy = g_malloc (end - nptr + 1 + decimal_point_len);
838 
839       c = copy;
840       memcpy (c, nptr, decimal_point_pos - nptr);
841       c += decimal_point_pos - nptr;
842       memcpy (c, decimal_point, decimal_point_len);
843       c += decimal_point_len;
844       memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
845       c += end - (decimal_point_pos + 1);
846       *c = 0;
847 
848       errno = 0;
849       val = strtod (copy, &fail_pos);
850       strtod_errno = errno;
851 
852       if (fail_pos)
853         {
854           if (fail_pos - copy > decimal_point_pos - nptr)
855             fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
856           else
857             fail_pos = (char *)nptr + (fail_pos - copy);
858         }
859 
860       g_free (copy);
861 
862     }
863   else if (end)
864     {
865       char *copy;
866 
867       copy = g_malloc (end - (char *)nptr + 1);
868       memcpy (copy, nptr, end - nptr);
869       *(copy + (end - (char *)nptr)) = 0;
870 
871       errno = 0;
872       val = strtod (copy, &fail_pos);
873       strtod_errno = errno;
874 
875       if (fail_pos)
876         {
877           fail_pos = (char *)nptr + (fail_pos - copy);
878         }
879 
880       g_free (copy);
881     }
882   else
883     {
884       errno = 0;
885       val = strtod (nptr, &fail_pos);
886       strtod_errno = errno;
887     }
888 
889   if (endptr)
890     *endptr = fail_pos;
891 
892   errno = strtod_errno;
893 
894   return val;
895 #endif
896 }
897 
898 
899 /**
900  * g_ascii_dtostr:
901  * @buffer: A buffer to place the resulting string in
902  * @buf_len: The length of the buffer.
903  * @d: The #gdouble to convert
904  *
905  * Converts a #gdouble to a string, using the '.' as
906  * decimal point.
907  *
908  * This function generates enough precision that converting
909  * the string back using g_ascii_strtod() gives the same machine-number
910  * (on machines with IEEE compatible 64bit doubles). It is
911  * guaranteed that the size of the resulting string will never
912  * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
913  * nul character, which is always added.
914  *
915  * Returns: The pointer to the buffer with the converted string.
916  **/
917 gchar *
g_ascii_dtostr(gchar * buffer,gint buf_len,gdouble d)918 g_ascii_dtostr (gchar       *buffer,
919                 gint         buf_len,
920                 gdouble      d)
921 {
922   return g_ascii_formatd (buffer, buf_len, "%.17g", d);
923 }
924 
925 #pragma GCC diagnostic push
926 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
927 
928 /**
929  * g_ascii_formatd:
930  * @buffer: A buffer to place the resulting string in
931  * @buf_len: The length of the buffer.
932  * @format: The printf()-style format to use for the
933  *          code to use for converting.
934  * @d: The #gdouble to convert
935  *
936  * Converts a #gdouble to a string, using the '.' as
937  * decimal point. To format the number you pass in
938  * a printf()-style format string. Allowed conversion
939  * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
940  *
941  * The returned buffer is guaranteed to be nul-terminated.
942  *
943  * If you just want to want to serialize the value into a
944  * string, use g_ascii_dtostr().
945  *
946  * Returns: The pointer to the buffer with the converted string.
947  */
948 gchar *
g_ascii_formatd(gchar * buffer,gint buf_len,const gchar * format,gdouble d)949 g_ascii_formatd (gchar       *buffer,
950                  gint         buf_len,
951                  const gchar *format,
952                  gdouble      d)
953 {
954 #ifdef USE_XLOCALE
955   locale_t old_locale;
956 
957   old_locale = uselocale (get_C_locale ());
958    _g_snprintf (buffer, buf_len, format, d);
959   uselocale (old_locale);
960 
961   return buffer;
962 #else
963 #ifndef __BIONIC__
964   struct lconv *locale_data;
965 #endif
966   const char *decimal_point;
967   gsize decimal_point_len;
968   gchar *p;
969   int rest_len;
970   gchar format_char;
971 
972   g_return_val_if_fail (buffer != NULL, NULL);
973   g_return_val_if_fail (format[0] == '%', NULL);
974   g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
975 
976   format_char = format[strlen (format) - 1];
977 
978   g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
979                         format_char == 'f' || format_char == 'F' ||
980                         format_char == 'g' || format_char == 'G',
981                         NULL);
982 
983   if (format[0] != '%')
984     return NULL;
985 
986   if (strpbrk (format + 1, "'l%"))
987     return NULL;
988 
989   if (!(format_char == 'e' || format_char == 'E' ||
990         format_char == 'f' || format_char == 'F' ||
991         format_char == 'g' || format_char == 'G'))
992     return NULL;
993 
994   _g_snprintf (buffer, buf_len, format, d);
995 
996 #ifndef __BIONIC__
997   locale_data = localeconv ();
998   decimal_point = locale_data->decimal_point;
999   decimal_point_len = strlen (decimal_point);
1000 #else
1001   decimal_point = ".";
1002   decimal_point_len = 1;
1003 #endif
1004 
1005   g_assert (decimal_point_len != 0);
1006 
1007   if (decimal_point[0] != '.' ||
1008       decimal_point[1] != 0)
1009     {
1010       p = buffer;
1011 
1012       while (g_ascii_isspace (*p))
1013         p++;
1014 
1015       if (*p == '+' || *p == '-')
1016         p++;
1017 
1018       while (isdigit ((guchar)*p))
1019         p++;
1020 
1021       if (strncmp (p, decimal_point, decimal_point_len) == 0)
1022         {
1023           *p = '.';
1024           p++;
1025           if (decimal_point_len > 1)
1026             {
1027               rest_len = strlen (p + (decimal_point_len - 1));
1028               memmove (p, p + (decimal_point_len - 1), rest_len);
1029               p[rest_len] = 0;
1030             }
1031         }
1032     }
1033 
1034   return buffer;
1035 #endif
1036 }
1037 #pragma GCC diagnostic pop
1038 
1039 #define ISSPACE(c)              ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
1040                                  (c) == '\r' || (c) == '\t' || (c) == '\v')
1041 #define ISUPPER(c)              ((c) >= 'A' && (c) <= 'Z')
1042 #define ISLOWER(c)              ((c) >= 'a' && (c) <= 'z')
1043 #define ISALPHA(c)              (ISUPPER (c) || ISLOWER (c))
1044 #define TOUPPER(c)              (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1045 #define TOLOWER(c)              (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1046 
1047 #ifndef USE_XLOCALE
1048 
1049 static guint64
g_parse_long_long(const gchar * nptr,const gchar ** endptr,guint base,gboolean * negative)1050 g_parse_long_long (const gchar  *nptr,
1051                    const gchar **endptr,
1052                    guint         base,
1053                    gboolean     *negative)
1054 {
1055   /* this code is based on on the strtol(3) code from GNU libc released under
1056    * the GNU Lesser General Public License.
1057    *
1058    * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1059    *        Free Software Foundation, Inc.
1060    */
1061   gboolean overflow;
1062   guint64 cutoff;
1063   guint64 cutlim;
1064   guint64 ui64;
1065   const gchar *s, *save;
1066   guchar c;
1067 
1068   g_return_val_if_fail (nptr != NULL, 0);
1069 
1070   *negative = FALSE;
1071   if (base == 1 || base > 36)
1072     {
1073       errno = EINVAL;
1074       if (endptr)
1075         *endptr = nptr;
1076       return 0;
1077     }
1078 
1079   save = s = nptr;
1080 
1081   /* Skip white space.  */
1082   while (ISSPACE (*s))
1083     ++s;
1084 
1085   if (G_UNLIKELY (!*s))
1086     goto noconv;
1087 
1088   /* Check for a sign.  */
1089   if (*s == '-')
1090     {
1091       *negative = TRUE;
1092       ++s;
1093     }
1094   else if (*s == '+')
1095     ++s;
1096 
1097   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
1098   if (*s == '0')
1099     {
1100       if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1101         {
1102           s += 2;
1103           base = 16;
1104         }
1105       else if (base == 0)
1106         base = 8;
1107     }
1108   else if (base == 0)
1109     base = 10;
1110 
1111   /* Save the pointer so we can check later if anything happened.  */
1112   save = s;
1113   cutoff = G_MAXUINT64 / base;
1114   cutlim = G_MAXUINT64 % base;
1115 
1116   overflow = FALSE;
1117   ui64 = 0;
1118   c = *s;
1119   for (; c; c = *++s)
1120     {
1121       if (c >= '0' && c <= '9')
1122         c -= '0';
1123       else if (ISALPHA (c))
1124         c = TOUPPER (c) - 'A' + 10;
1125       else
1126         break;
1127       if (c >= base)
1128         break;
1129       /* Check for overflow.  */
1130       if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1131         overflow = TRUE;
1132       else
1133         {
1134           ui64 *= base;
1135           ui64 += c;
1136         }
1137     }
1138 
1139   /* Check if anything actually happened.  */
1140   if (s == save)
1141     goto noconv;
1142 
1143   /* Store in ENDPTR the address of one character
1144      past the last character we converted.  */
1145   if (endptr)
1146     *endptr = s;
1147 
1148   if (G_UNLIKELY (overflow))
1149     {
1150       errno = ERANGE;
1151       return G_MAXUINT64;
1152     }
1153 
1154   return ui64;
1155 
1156  noconv:
1157   /* We must handle a special case here: the base is 0 or 16 and the
1158      first two characters are '0' and 'x', but the rest are no
1159      hexadecimal digits.  This is no error case.  We return 0 and
1160      ENDPTR points to the `x`.  */
1161   if (endptr)
1162     {
1163       if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1164           && save[-2] == '0')
1165         *endptr = &save[-1];
1166       else
1167         /*  There was no number to convert.  */
1168         *endptr = nptr;
1169     }
1170   return 0;
1171 }
1172 #endif /* !USE_XLOCALE */
1173 
1174 /**
1175  * g_ascii_strtoull:
1176  * @nptr:    the string to convert to a numeric value.
1177  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1178  *           character after the last character used in the conversion.
1179  * @base:    to be used for the conversion, 2..36 or 0
1180  *
1181  * Converts a string to a #guint64 value.
1182  * This function behaves like the standard strtoull() function
1183  * does in the C locale. It does this without actually
1184  * changing the current locale, since that would not be
1185  * thread-safe.
1186  *
1187  * Note that input with a leading minus sign (`-`) is accepted, and will return
1188  * the negation of the parsed number, unless that would overflow a #guint64.
1189  * Critically, this means you cannot assume that a short fixed length input will
1190  * never result in a low return value, as the input could have a leading `-`.
1191  *
1192  * This function is typically used when reading configuration
1193  * files or other non-user input that should be locale independent.
1194  * To handle input from the user you should normally use the
1195  * locale-sensitive system strtoull() function.
1196  *
1197  * If the correct value would cause overflow, %G_MAXUINT64
1198  * is returned, and `ERANGE` is stored in `errno`.
1199  * If the base is outside the valid range, zero is returned, and
1200  * `EINVAL` is stored in `errno`.
1201  * If the string conversion fails, zero is returned, and @endptr returns
1202  * @nptr (if @endptr is non-%NULL).
1203  *
1204  * Returns: the #guint64 value or zero on error.
1205  *
1206  * Since: 2.2
1207  */
1208 guint64
g_ascii_strtoull(const gchar * nptr,gchar ** endptr,guint base)1209 g_ascii_strtoull (const gchar *nptr,
1210                   gchar      **endptr,
1211                   guint        base)
1212 {
1213 #ifdef USE_XLOCALE
1214   return strtoull_l (nptr, endptr, base, get_C_locale ());
1215 #else
1216   gboolean negative;
1217   guint64 result;
1218 
1219   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1220 
1221   /* Return the result of the appropriate sign.  */
1222   return negative ? -result : result;
1223 #endif
1224 }
1225 
1226 /**
1227  * g_ascii_strtoll:
1228  * @nptr:    the string to convert to a numeric value.
1229  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1230  *           character after the last character used in the conversion.
1231  * @base:    to be used for the conversion, 2..36 or 0
1232  *
1233  * Converts a string to a #gint64 value.
1234  * This function behaves like the standard strtoll() function
1235  * does in the C locale. It does this without actually
1236  * changing the current locale, since that would not be
1237  * thread-safe.
1238  *
1239  * This function is typically used when reading configuration
1240  * files or other non-user input that should be locale independent.
1241  * To handle input from the user you should normally use the
1242  * locale-sensitive system strtoll() function.
1243  *
1244  * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1245  * is returned, and `ERANGE` is stored in `errno`.
1246  * If the base is outside the valid range, zero is returned, and
1247  * `EINVAL` is stored in `errno`. If the
1248  * string conversion fails, zero is returned, and @endptr returns @nptr
1249  * (if @endptr is non-%NULL).
1250  *
1251  * Returns: the #gint64 value or zero on error.
1252  *
1253  * Since: 2.12
1254  */
1255 gint64
g_ascii_strtoll(const gchar * nptr,gchar ** endptr,guint base)1256 g_ascii_strtoll (const gchar *nptr,
1257                  gchar      **endptr,
1258                  guint        base)
1259 {
1260 #ifdef USE_XLOCALE
1261   return strtoll_l (nptr, endptr, base, get_C_locale ());
1262 #else
1263   gboolean negative;
1264   guint64 result;
1265 
1266   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1267 
1268   if (negative && result > (guint64) G_MININT64)
1269     {
1270       errno = ERANGE;
1271       return G_MININT64;
1272     }
1273   else if (!negative && result > (guint64) G_MAXINT64)
1274     {
1275       errno = ERANGE;
1276       return G_MAXINT64;
1277     }
1278   else if (negative)
1279     return - (gint64) result;
1280   else
1281     return (gint64) result;
1282 #endif
1283 }
1284 
1285 /**
1286  * g_strerror:
1287  * @errnum: the system error number. See the standard C %errno
1288  *     documentation
1289  *
1290  * Returns a string corresponding to the given error code, e.g. "no
1291  * such process". Unlike strerror(), this always returns a string in
1292  * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1293  * the lifetime of the process.
1294  *
1295  * Note that the string may be translated according to the current locale.
1296  *
1297  * The value of %errno will not be changed by this function. However, it may
1298  * be changed by intermediate function calls, so you should save its value
1299  * as soon as the call returns:
1300  * |[
1301  *   int saved_errno;
1302  *
1303  *   ret = read (blah);
1304  *   saved_errno = errno;
1305  *
1306  *   g_strerror (saved_errno);
1307  * ]|
1308  *
1309  * Returns: a UTF-8 string describing the error code. If the error code
1310  *     is unknown, it returns a string like "unknown error (<code>)".
1311  */
1312 const gchar *
g_strerror(gint errnum)1313 g_strerror (gint errnum)
1314 {
1315   static GHashTable *errors;
1316   G_LOCK_DEFINE_STATIC (errors);
1317   const gchar *msg;
1318   gint saved_errno = errno;
1319 
1320   G_LOCK (errors);
1321   if (errors)
1322     msg = g_hash_table_lookup (errors, GINT_TO_POINTER (errnum));
1323   else
1324     {
1325       errors = g_hash_table_new (NULL, NULL);
1326       msg = NULL;
1327     }
1328 
1329   if (!msg)
1330     {
1331       gchar buf[1024];
1332       GError *error = NULL;
1333 
1334 #if defined(G_OS_WIN32)
1335       strerror_s (buf, sizeof (buf), errnum);
1336       msg = buf;
1337 #elif defined(HAVE_STRERROR_R)
1338       /* Match the condition in strerror_r(3) for glibc */
1339 #  if defined(STRERROR_R_CHAR_P)
1340       msg = strerror_r (errnum, buf, sizeof (buf));
1341 #  else
1342       (void) strerror_r (errnum, buf, sizeof (buf));
1343       msg = buf;
1344 #  endif /* HAVE_STRERROR_R */
1345 #else
1346       g_strlcpy (buf, strerror (errnum), sizeof (buf));
1347       msg = buf;
1348 #endif
1349       if (!g_get_console_charset (NULL))
1350         {
1351           msg = g_locale_to_utf8 (msg, -1, NULL, NULL, &error);
1352           if (error)
1353             g_print ("%s\n", error->message);
1354         }
1355       else if (msg == (const gchar *)buf)
1356         msg = g_strdup (buf);
1357 
1358       g_hash_table_insert (errors, GINT_TO_POINTER (errnum), (char *) msg);
1359     }
1360   G_UNLOCK (errors);
1361 
1362   errno = saved_errno;
1363   return msg;
1364 }
1365 
1366 /**
1367  * g_strsignal:
1368  * @signum: the signal number. See the `signal` documentation
1369  *
1370  * Returns a string describing the given signal, e.g. "Segmentation fault".
1371  * You should use this function in preference to strsignal(), because it
1372  * returns a string in UTF-8 encoding, and since not all platforms support
1373  * the strsignal() function.
1374  *
1375  * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1376  *     it returns "unknown signal (<signum>)".
1377  */
1378 const gchar *
g_strsignal(gint signum)1379 g_strsignal (gint signum)
1380 {
1381   gchar *msg;
1382   gchar *tofree;
1383   const gchar *ret;
1384 
1385   msg = tofree = NULL;
1386 
1387 #ifdef HAVE_STRSIGNAL
1388   msg = strsignal (signum);
1389   if (!g_get_console_charset (NULL))
1390     msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1391 #endif
1392 
1393   if (!msg)
1394     msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1395   ret = g_intern_string (msg);
1396   g_free (tofree);
1397 
1398   return ret;
1399 }
1400 
1401 /* Functions g_strlcpy and g_strlcat were originally developed by
1402  * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1403  * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1404  * for more information.
1405  */
1406 
1407 #ifdef HAVE_STRLCPY
1408 /* Use the native ones, if available; they might be implemented in assembly */
1409 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1410 g_strlcpy (gchar       *dest,
1411            const gchar *src,
1412            gsize        dest_size)
1413 {
1414   g_return_val_if_fail (dest != NULL, 0);
1415   g_return_val_if_fail (src  != NULL, 0);
1416 
1417   return strlcpy (dest, src, dest_size);
1418 }
1419 
1420 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1421 g_strlcat (gchar       *dest,
1422            const gchar *src,
1423            gsize        dest_size)
1424 {
1425   g_return_val_if_fail (dest != NULL, 0);
1426   g_return_val_if_fail (src  != NULL, 0);
1427 
1428   return strlcat (dest, src, dest_size);
1429 }
1430 
1431 #else /* ! HAVE_STRLCPY */
1432 /**
1433  * g_strlcpy:
1434  * @dest: destination buffer
1435  * @src: source buffer
1436  * @dest_size: length of @dest in bytes
1437  *
1438  * Portability wrapper that calls strlcpy() on systems which have it,
1439  * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1440  * guaranteed to be nul-terminated; @src must be nul-terminated;
1441  * @dest_size is the buffer size, not the number of bytes to copy.
1442  *
1443  * At most @dest_size - 1 characters will be copied. Always nul-terminates
1444  * (unless @dest_size is 0). This function does not allocate memory. Unlike
1445  * strncpy(), this function doesn't pad @dest (so it's often faster). It
1446  * returns the size of the attempted result, strlen (src), so if
1447  * @retval >= @dest_size, truncation occurred.
1448  *
1449  * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1450  * but if you really want to avoid screwups, g_strdup() is an even better
1451  * idea.
1452  *
1453  * Returns: length of @src
1454  */
1455 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1456 g_strlcpy (gchar       *dest,
1457            const gchar *src,
1458            gsize        dest_size)
1459 {
1460   gchar *d = dest;
1461   const gchar *s = src;
1462   gsize n = dest_size;
1463 
1464   g_return_val_if_fail (dest != NULL, 0);
1465   g_return_val_if_fail (src  != NULL, 0);
1466 
1467   /* Copy as many bytes as will fit */
1468   if (n != 0 && --n != 0)
1469     do
1470       {
1471         gchar c = *s++;
1472 
1473         *d++ = c;
1474         if (c == 0)
1475           break;
1476       }
1477     while (--n != 0);
1478 
1479   /* If not enough room in dest, add NUL and traverse rest of src */
1480   if (n == 0)
1481     {
1482       if (dest_size != 0)
1483         *d = 0;
1484       while (*s++)
1485         ;
1486     }
1487 
1488   return s - src - 1;  /* count does not include NUL */
1489 }
1490 
1491 /**
1492  * g_strlcat:
1493  * @dest: destination buffer, already containing one nul-terminated string
1494  * @src: source buffer
1495  * @dest_size: length of @dest buffer in bytes (not length of existing string
1496  *     inside @dest)
1497  *
1498  * Portability wrapper that calls strlcat() on systems which have it,
1499  * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1500  * guaranteeing nul-termination for @dest. The total size of @dest won't
1501  * exceed @dest_size.
1502  *
1503  * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1504  * @dest_size is the full size of dest, not the space left over. This
1505  * function does not allocate memory. It always nul-terminates (unless
1506  * @dest_size == 0 or there were no nul characters in the @dest_size
1507  * characters of dest to start with).
1508  *
1509  * Caveat: this is supposedly a more secure alternative to strcat() or
1510  * strncat(), but for real security g_strconcat() is harder to mess up.
1511  *
1512  * Returns: size of attempted result, which is MIN (dest_size, strlen
1513  *     (original dest)) + strlen (src), so if retval >= dest_size,
1514  *     truncation occurred.
1515  */
1516 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1517 g_strlcat (gchar       *dest,
1518            const gchar *src,
1519            gsize        dest_size)
1520 {
1521   gchar *d = dest;
1522   const gchar *s = src;
1523   gsize bytes_left = dest_size;
1524   gsize dlength;  /* Logically, MIN (strlen (d), dest_size) */
1525 
1526   g_return_val_if_fail (dest != NULL, 0);
1527   g_return_val_if_fail (src  != NULL, 0);
1528 
1529   /* Find the end of dst and adjust bytes left but don't go past end */
1530   while (*d != 0 && bytes_left-- != 0)
1531     d++;
1532   dlength = d - dest;
1533   bytes_left = dest_size - dlength;
1534 
1535   if (bytes_left == 0)
1536     return dlength + strlen (s);
1537 
1538   while (*s != 0)
1539     {
1540       if (bytes_left != 1)
1541         {
1542           *d++ = *s;
1543           bytes_left--;
1544         }
1545       s++;
1546     }
1547   *d = 0;
1548 
1549   return dlength + (s - src);  /* count does not include NUL */
1550 }
1551 #endif /* ! HAVE_STRLCPY */
1552 
1553 /**
1554  * g_ascii_strdown:
1555  * @str: a string
1556  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1557  *
1558  * Converts all upper case ASCII letters to lower case ASCII letters.
1559  *
1560  * Returns: a newly-allocated string, with all the upper case
1561  *     characters in @str converted to lower case, with semantics that
1562  *     exactly match g_ascii_tolower(). (Note that this is unlike the
1563  *     old g_strdown(), which modified the string in place.)
1564  */
1565 gchar*
g_ascii_strdown(const gchar * str,gssize len)1566 g_ascii_strdown (const gchar *str,
1567                  gssize       len)
1568 {
1569   gchar *result, *s;
1570 
1571   g_return_val_if_fail (str != NULL, NULL);
1572 
1573   if (len < 0)
1574     len = (gssize) strlen (str);
1575 
1576   result = g_strndup (str, (gsize) len);
1577   for (s = result; *s; s++)
1578     *s = g_ascii_tolower (*s);
1579 
1580   return result;
1581 }
1582 
1583 /**
1584  * g_ascii_strup:
1585  * @str: a string
1586  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1587  *
1588  * Converts all lower case ASCII letters to upper case ASCII letters.
1589  *
1590  * Returns: a newly allocated string, with all the lower case
1591  *     characters in @str converted to upper case, with semantics that
1592  *     exactly match g_ascii_toupper(). (Note that this is unlike the
1593  *     old g_strup(), which modified the string in place.)
1594  */
1595 gchar*
g_ascii_strup(const gchar * str,gssize len)1596 g_ascii_strup (const gchar *str,
1597                gssize       len)
1598 {
1599   gchar *result, *s;
1600 
1601   g_return_val_if_fail (str != NULL, NULL);
1602 
1603   if (len < 0)
1604     len = (gssize) strlen (str);
1605 
1606   result = g_strndup (str, (gsize) len);
1607   for (s = result; *s; s++)
1608     *s = g_ascii_toupper (*s);
1609 
1610   return result;
1611 }
1612 
1613 /**
1614  * g_str_is_ascii:
1615  * @str: a string
1616  *
1617  * Determines if a string is pure ASCII. A string is pure ASCII if it
1618  * contains no bytes with the high bit set.
1619  *
1620  * Returns: %TRUE if @str is ASCII
1621  *
1622  * Since: 2.40
1623  */
1624 gboolean
g_str_is_ascii(const gchar * str)1625 g_str_is_ascii (const gchar *str)
1626 {
1627   gsize i;
1628 
1629   for (i = 0; str[i]; i++)
1630     if (str[i] & 0x80)
1631       return FALSE;
1632 
1633   return TRUE;
1634 }
1635 
1636 /**
1637  * g_strdown:
1638  * @string: the string to convert.
1639  *
1640  * Converts a string to lower case.
1641  *
1642  * Returns: the string
1643  *
1644  * Deprecated:2.2: This function is totally broken for the reasons discussed
1645  * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1646  * instead.
1647  **/
1648 gchar*
g_strdown(gchar * string)1649 g_strdown (gchar *string)
1650 {
1651   guchar *s;
1652 
1653   g_return_val_if_fail (string != NULL, NULL);
1654 
1655   s = (guchar *) string;
1656 
1657   while (*s)
1658     {
1659       if (isupper (*s))
1660         *s = tolower (*s);
1661       s++;
1662     }
1663 
1664   return (gchar *) string;
1665 }
1666 
1667 /**
1668  * g_strup:
1669  * @string: the string to convert
1670  *
1671  * Converts a string to upper case.
1672  *
1673  * Returns: the string
1674  *
1675  * Deprecated:2.2: This function is totally broken for the reasons
1676  *     discussed in the g_strncasecmp() docs - use g_ascii_strup()
1677  *     or g_utf8_strup() instead.
1678  */
1679 gchar*
g_strup(gchar * string)1680 g_strup (gchar *string)
1681 {
1682   guchar *s;
1683 
1684   g_return_val_if_fail (string != NULL, NULL);
1685 
1686   s = (guchar *) string;
1687 
1688   while (*s)
1689     {
1690       if (islower (*s))
1691         *s = toupper (*s);
1692       s++;
1693     }
1694 
1695   return (gchar *) string;
1696 }
1697 
1698 /**
1699  * g_strreverse:
1700  * @string: the string to reverse
1701  *
1702  * Reverses all of the bytes in a string. For example,
1703  * `g_strreverse ("abcdef")` will result in "fedcba".
1704  *
1705  * Note that g_strreverse() doesn't work on UTF-8 strings
1706  * containing multibyte characters. For that purpose, use
1707  * g_utf8_strreverse().
1708  *
1709  * Returns: the same pointer passed in as @string
1710  */
1711 gchar*
g_strreverse(gchar * string)1712 g_strreverse (gchar *string)
1713 {
1714   g_return_val_if_fail (string != NULL, NULL);
1715 
1716   if (*string)
1717     {
1718       gchar *h, *t;
1719 
1720       h = string;
1721       t = string + strlen (string) - 1;
1722 
1723       while (h < t)
1724         {
1725           gchar c;
1726 
1727           c = *h;
1728           *h = *t;
1729           h++;
1730           *t = c;
1731           t--;
1732         }
1733     }
1734 
1735   return string;
1736 }
1737 
1738 /**
1739  * g_ascii_tolower:
1740  * @c: any character
1741  *
1742  * Convert a character to ASCII lower case.
1743  *
1744  * Unlike the standard C library tolower() function, this only
1745  * recognizes standard ASCII letters and ignores the locale, returning
1746  * all non-ASCII characters unchanged, even if they are lower case
1747  * letters in a particular character set. Also unlike the standard
1748  * library function, this takes and returns a char, not an int, so
1749  * don't call it on %EOF but no need to worry about casting to #guchar
1750  * before passing a possibly non-ASCII character in.
1751  *
1752  * Returns: the result of converting @c to lower case. If @c is
1753  *     not an ASCII upper case letter, @c is returned unchanged.
1754  */
1755 gchar
g_ascii_tolower(gchar c)1756 g_ascii_tolower (gchar c)
1757 {
1758   return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1759 }
1760 
1761 /**
1762  * g_ascii_toupper:
1763  * @c: any character
1764  *
1765  * Convert a character to ASCII upper case.
1766  *
1767  * Unlike the standard C library toupper() function, this only
1768  * recognizes standard ASCII letters and ignores the locale, returning
1769  * all non-ASCII characters unchanged, even if they are upper case
1770  * letters in a particular character set. Also unlike the standard
1771  * library function, this takes and returns a char, not an int, so
1772  * don't call it on %EOF but no need to worry about casting to #guchar
1773  * before passing a possibly non-ASCII character in.
1774  *
1775  * Returns: the result of converting @c to upper case. If @c is not
1776  *    an ASCII lower case letter, @c is returned unchanged.
1777  */
1778 gchar
g_ascii_toupper(gchar c)1779 g_ascii_toupper (gchar c)
1780 {
1781   return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1782 }
1783 
1784 /**
1785  * g_ascii_digit_value:
1786  * @c: an ASCII character
1787  *
1788  * Determines the numeric value of a character as a decimal digit.
1789  * Differs from g_unichar_digit_value() because it takes a char, so
1790  * there's no worry about sign extension if characters are signed.
1791  *
1792  * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1793  *    its numeric value. Otherwise, -1.
1794  */
1795 int
g_ascii_digit_value(gchar c)1796 g_ascii_digit_value (gchar c)
1797 {
1798   if (g_ascii_isdigit (c))
1799     return c - '0';
1800   return -1;
1801 }
1802 
1803 /**
1804  * g_ascii_xdigit_value:
1805  * @c: an ASCII character.
1806  *
1807  * Determines the numeric value of a character as a hexadecimal
1808  * digit. Differs from g_unichar_xdigit_value() because it takes
1809  * a char, so there's no worry about sign extension if characters
1810  * are signed.
1811  *
1812  * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1813  *     its numeric value. Otherwise, -1.
1814  */
1815 int
g_ascii_xdigit_value(gchar c)1816 g_ascii_xdigit_value (gchar c)
1817 {
1818   if (c >= 'A' && c <= 'F')
1819     return c - 'A' + 10;
1820   if (c >= 'a' && c <= 'f')
1821     return c - 'a' + 10;
1822   return g_ascii_digit_value (c);
1823 }
1824 
1825 /**
1826  * g_ascii_strcasecmp:
1827  * @s1: string to compare with @s2
1828  * @s2: string to compare with @s1
1829  *
1830  * Compare two strings, ignoring the case of ASCII characters.
1831  *
1832  * Unlike the BSD strcasecmp() function, this only recognizes standard
1833  * ASCII letters and ignores the locale, treating all non-ASCII
1834  * bytes as if they are not letters.
1835  *
1836  * This function should be used only on strings that are known to be
1837  * in encodings where the bytes corresponding to ASCII letters always
1838  * represent themselves. This includes UTF-8 and the ISO-8859-*
1839  * charsets, but not for instance double-byte encodings like the
1840  * Windows Codepage 932, where the trailing bytes of double-byte
1841  * characters include all ASCII letters. If you compare two CP932
1842  * strings using this function, you will get false matches.
1843  *
1844  * Both @s1 and @s2 must be non-%NULL.
1845  *
1846  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1847  *     or a positive value if @s1 > @s2.
1848  */
1849 gint
g_ascii_strcasecmp(const gchar * s1,const gchar * s2)1850 g_ascii_strcasecmp (const gchar *s1,
1851                     const gchar *s2)
1852 {
1853   gint c1, c2;
1854 
1855   g_return_val_if_fail (s1 != NULL, 0);
1856   g_return_val_if_fail (s2 != NULL, 0);
1857 
1858   while (*s1 && *s2)
1859     {
1860       c1 = (gint)(guchar) TOLOWER (*s1);
1861       c2 = (gint)(guchar) TOLOWER (*s2);
1862       if (c1 != c2)
1863         return (c1 - c2);
1864       s1++; s2++;
1865     }
1866 
1867   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1868 }
1869 
1870 /**
1871  * g_ascii_strncasecmp:
1872  * @s1: string to compare with @s2
1873  * @s2: string to compare with @s1
1874  * @n: number of characters to compare
1875  *
1876  * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1877  * characters after the first @n in each string.
1878  *
1879  * Unlike the BSD strcasecmp() function, this only recognizes standard
1880  * ASCII letters and ignores the locale, treating all non-ASCII
1881  * characters as if they are not letters.
1882  *
1883  * The same warning as in g_ascii_strcasecmp() applies: Use this
1884  * function only on strings known to be in encodings where bytes
1885  * corresponding to ASCII letters always represent themselves.
1886  *
1887  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1888  *     or a positive value if @s1 > @s2.
1889  */
1890 gint
g_ascii_strncasecmp(const gchar * s1,const gchar * s2,gsize n)1891 g_ascii_strncasecmp (const gchar *s1,
1892                      const gchar *s2,
1893                      gsize        n)
1894 {
1895   gint c1, c2;
1896 
1897   g_return_val_if_fail (s1 != NULL, 0);
1898   g_return_val_if_fail (s2 != NULL, 0);
1899 
1900   while (n && *s1 && *s2)
1901     {
1902       n -= 1;
1903       c1 = (gint)(guchar) TOLOWER (*s1);
1904       c2 = (gint)(guchar) TOLOWER (*s2);
1905       if (c1 != c2)
1906         return (c1 - c2);
1907       s1++; s2++;
1908     }
1909 
1910   if (n)
1911     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1912   else
1913     return 0;
1914 }
1915 
1916 /**
1917  * g_strcasecmp:
1918  * @s1: a string
1919  * @s2: a string to compare with @s1
1920  *
1921  * A case-insensitive string comparison, corresponding to the standard
1922  * strcasecmp() function on platforms which support it.
1923  *
1924  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1925  *     or a positive value if @s1 > @s2.
1926  *
1927  * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1928  *     function is deprecated and how to replace it.
1929  */
1930 gint
g_strcasecmp(const gchar * s1,const gchar * s2)1931 g_strcasecmp (const gchar *s1,
1932               const gchar *s2)
1933 {
1934 #ifdef HAVE_STRCASECMP
1935   g_return_val_if_fail (s1 != NULL, 0);
1936   g_return_val_if_fail (s2 != NULL, 0);
1937 
1938   return strcasecmp (s1, s2);
1939 #else
1940   gint c1, c2;
1941 
1942   g_return_val_if_fail (s1 != NULL, 0);
1943   g_return_val_if_fail (s2 != NULL, 0);
1944 
1945   while (*s1 && *s2)
1946     {
1947       /* According to A. Cox, some platforms have islower's that
1948        * don't work right on non-uppercase
1949        */
1950       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1951       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1952       if (c1 != c2)
1953         return (c1 - c2);
1954       s1++; s2++;
1955     }
1956 
1957   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1958 #endif
1959 }
1960 
1961 /**
1962  * g_strncasecmp:
1963  * @s1: a string
1964  * @s2: a string to compare with @s1
1965  * @n: the maximum number of characters to compare
1966  *
1967  * A case-insensitive string comparison, corresponding to the standard
1968  * strncasecmp() function on platforms which support it. It is similar
1969  * to g_strcasecmp() except it only compares the first @n characters of
1970  * the strings.
1971  *
1972  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1973  *     or a positive value if @s1 > @s2.
1974  *
1975  * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1976  *     the comparison by calling toupper()/tolower(). These functions
1977  *     are locale-specific and operate on single bytes. However, it is
1978  *     impossible to handle things correctly from an internationalization
1979  *     standpoint by operating on bytes, since characters may be multibyte.
1980  *     Thus g_strncasecmp() is broken if your string is guaranteed to be
1981  *     ASCII, since it is locale-sensitive, and it's broken if your string
1982  *     is localized, since it doesn't work on many encodings at all,
1983  *     including UTF-8, EUC-JP, etc.
1984  *
1985  *     There are therefore two replacement techniques: g_ascii_strncasecmp(),
1986  *     which only works on ASCII and is not locale-sensitive, and
1987  *     g_utf8_casefold() followed by strcmp() on the resulting strings,
1988  *     which is good for case-insensitive sorting of UTF-8.
1989  */
1990 gint
g_strncasecmp(const gchar * s1,const gchar * s2,guint n)1991 g_strncasecmp (const gchar *s1,
1992                const gchar *s2,
1993                guint n)
1994 {
1995 #ifdef HAVE_STRNCASECMP
1996   return strncasecmp (s1, s2, n);
1997 #else
1998   gint c1, c2;
1999 
2000   g_return_val_if_fail (s1 != NULL, 0);
2001   g_return_val_if_fail (s2 != NULL, 0);
2002 
2003   while (n && *s1 && *s2)
2004     {
2005       n -= 1;
2006       /* According to A. Cox, some platforms have islower's that
2007        * don't work right on non-uppercase
2008        */
2009       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
2010       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
2011       if (c1 != c2)
2012         return (c1 - c2);
2013       s1++; s2++;
2014     }
2015 
2016   if (n)
2017     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
2018   else
2019     return 0;
2020 #endif
2021 }
2022 
2023 /**
2024  * g_strdelimit:
2025  * @string: the string to convert
2026  * @delimiters: (nullable): a string containing the current delimiters,
2027  *     or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
2028  * @new_delimiter: the new delimiter character
2029  *
2030  * Converts any delimiter characters in @string to @new_delimiter.
2031  *
2032  * Any characters in @string which are found in @delimiters are
2033  * changed to the @new_delimiter character. Modifies @string in place,
2034  * and returns @string itself, not a copy.
2035  *
2036  * The return value is to allow nesting such as:
2037  *
2038  * |[<!-- language="C" -->
2039  *   g_ascii_strup (g_strdelimit (str, "abc", '?'))
2040  * ]|
2041  *
2042  * In order to modify a copy, you may use g_strdup():
2043  *
2044  * |[<!-- language="C" -->
2045  *   reformatted = g_strdelimit (g_strdup (const_str), "abc", '?');
2046  *   ...
2047  *   g_free (reformatted);
2048  * ]|
2049  *
2050  * Returns: the modified @string
2051  */
2052 gchar *
g_strdelimit(gchar * string,const gchar * delimiters,gchar new_delim)2053 g_strdelimit (gchar       *string,
2054               const gchar *delimiters,
2055               gchar        new_delim)
2056 {
2057   gchar *c;
2058 
2059   g_return_val_if_fail (string != NULL, NULL);
2060 
2061   if (!delimiters)
2062     delimiters = G_STR_DELIMITERS;
2063 
2064   for (c = string; *c; c++)
2065     {
2066       if (strchr (delimiters, *c))
2067         *c = new_delim;
2068     }
2069 
2070   return string;
2071 }
2072 
2073 /**
2074  * g_strcanon:
2075  * @string: a nul-terminated array of bytes
2076  * @valid_chars: bytes permitted in @string
2077  * @substitutor: replacement character for disallowed bytes
2078  *
2079  * For each character in @string, if the character is not in @valid_chars,
2080  * replaces the character with @substitutor.
2081  *
2082  * Modifies @string in place, and return @string itself, not a copy. The
2083  * return value is to allow nesting such as:
2084  *
2085  * |[<!-- language="C" -->
2086  *   g_ascii_strup (g_strcanon (str, "abc", '?'))
2087  * ]|
2088  *
2089  * In order to modify a copy, you may use g_strdup():
2090  *
2091  * |[<!-- language="C" -->
2092  *   reformatted = g_strcanon (g_strdup (const_str), "abc", '?');
2093  *   ...
2094  *   g_free (reformatted);
2095  * ]|
2096  *
2097  * Returns: the modified @string
2098  */
2099 gchar *
g_strcanon(gchar * string,const gchar * valid_chars,gchar substitutor)2100 g_strcanon (gchar       *string,
2101             const gchar *valid_chars,
2102             gchar        substitutor)
2103 {
2104   gchar *c;
2105 
2106   g_return_val_if_fail (string != NULL, NULL);
2107   g_return_val_if_fail (valid_chars != NULL, NULL);
2108 
2109   for (c = string; *c; c++)
2110     {
2111       if (!strchr (valid_chars, *c))
2112         *c = substitutor;
2113     }
2114 
2115   return string;
2116 }
2117 
2118 /**
2119  * g_strcompress:
2120  * @source: a string to compress
2121  *
2122  * Replaces all escaped characters with their one byte equivalent.
2123  *
2124  * This function does the reverse conversion of g_strescape().
2125  *
2126  * Returns: a newly-allocated copy of @source with all escaped
2127  *     character compressed
2128  */
2129 gchar *
g_strcompress(const gchar * source)2130 g_strcompress (const gchar *source)
2131 {
2132   const gchar *p = source, *octal;
2133   gchar *dest;
2134   gchar *q;
2135 
2136   g_return_val_if_fail (source != NULL, NULL);
2137 
2138   dest = g_malloc (strlen (source) + 1);
2139   q = dest;
2140 
2141   while (*p)
2142     {
2143       if (*p == '\\')
2144         {
2145           p++;
2146           switch (*p)
2147             {
2148             case '\0':
2149               g_warning ("g_strcompress: trailing \\");
2150               goto out;
2151             case '0':  case '1':  case '2':  case '3':  case '4':
2152             case '5':  case '6':  case '7':
2153               *q = 0;
2154               octal = p;
2155               while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2156                 {
2157                   *q = (*q * 8) + (*p - '0');
2158                   p++;
2159                 }
2160               q++;
2161               p--;
2162               break;
2163             case 'b':
2164               *q++ = '\b';
2165               break;
2166             case 'f':
2167               *q++ = '\f';
2168               break;
2169             case 'n':
2170               *q++ = '\n';
2171               break;
2172             case 'r':
2173               *q++ = '\r';
2174               break;
2175             case 't':
2176               *q++ = '\t';
2177               break;
2178             case 'v':
2179               *q++ = '\v';
2180               break;
2181             default:            /* Also handles \" and \\ */
2182               *q++ = *p;
2183               break;
2184             }
2185         }
2186       else
2187         *q++ = *p;
2188       p++;
2189     }
2190 out:
2191   *q = 0;
2192 
2193   return dest;
2194 }
2195 
2196 /**
2197  * g_strescape:
2198  * @source: a string to escape
2199  * @exceptions: (nullable): a string of characters not to escape in @source
2200  *
2201  * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2202  * and '"' in the string @source by inserting a '\' before
2203  * them. Additionally all characters in the range 0x01-0x1F (everything
2204  * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2205  * replaced with a '\' followed by their octal representation.
2206  * Characters supplied in @exceptions are not escaped.
2207  *
2208  * g_strcompress() does the reverse conversion.
2209  *
2210  * Returns: a newly-allocated copy of @source with certain
2211  *     characters escaped. See above.
2212  */
2213 gchar *
g_strescape(const gchar * source,const gchar * exceptions)2214 g_strescape (const gchar *source,
2215              const gchar *exceptions)
2216 {
2217   const guchar *p;
2218   gchar *dest;
2219   gchar *q;
2220   guchar excmap[256];
2221 
2222   g_return_val_if_fail (source != NULL, NULL);
2223 
2224   p = (guchar *) source;
2225   /* Each source byte needs maximally four destination chars (\777) */
2226   q = dest = g_malloc (strlen (source) * 4 + 1);
2227 
2228   memset (excmap, 0, 256);
2229   if (exceptions)
2230     {
2231       guchar *e = (guchar *) exceptions;
2232 
2233       while (*e)
2234         {
2235           excmap[*e] = 1;
2236           e++;
2237         }
2238     }
2239 
2240   while (*p)
2241     {
2242       if (excmap[*p])
2243         *q++ = *p;
2244       else
2245         {
2246           switch (*p)
2247             {
2248             case '\b':
2249               *q++ = '\\';
2250               *q++ = 'b';
2251               break;
2252             case '\f':
2253               *q++ = '\\';
2254               *q++ = 'f';
2255               break;
2256             case '\n':
2257               *q++ = '\\';
2258               *q++ = 'n';
2259               break;
2260             case '\r':
2261               *q++ = '\\';
2262               *q++ = 'r';
2263               break;
2264             case '\t':
2265               *q++ = '\\';
2266               *q++ = 't';
2267               break;
2268             case '\v':
2269               *q++ = '\\';
2270               *q++ = 'v';
2271               break;
2272             case '\\':
2273               *q++ = '\\';
2274               *q++ = '\\';
2275               break;
2276             case '"':
2277               *q++ = '\\';
2278               *q++ = '"';
2279               break;
2280             default:
2281               if ((*p < ' ') || (*p >= 0177))
2282                 {
2283                   *q++ = '\\';
2284                   *q++ = '0' + (((*p) >> 6) & 07);
2285                   *q++ = '0' + (((*p) >> 3) & 07);
2286                   *q++ = '0' + ((*p) & 07);
2287                 }
2288               else
2289                 *q++ = *p;
2290               break;
2291             }
2292         }
2293       p++;
2294     }
2295   *q = 0;
2296   return dest;
2297 }
2298 
2299 /**
2300  * g_strchug:
2301  * @string: a string to remove the leading whitespace from
2302  *
2303  * Removes leading whitespace from a string, by moving the rest
2304  * of the characters forward.
2305  *
2306  * This function doesn't allocate or reallocate any memory;
2307  * it modifies @string in place. Therefore, it cannot be used on
2308  * statically allocated strings.
2309  *
2310  * The pointer to @string is returned to allow the nesting of functions.
2311  *
2312  * Also see g_strchomp() and g_strstrip().
2313  *
2314  * Returns: @string
2315  */
2316 gchar *
g_strchug(gchar * string)2317 g_strchug (gchar *string)
2318 {
2319   guchar *start;
2320 
2321   g_return_val_if_fail (string != NULL, NULL);
2322 
2323   for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2324     ;
2325 
2326   memmove (string, start, strlen ((gchar *) start) + 1);
2327 
2328   return string;
2329 }
2330 
2331 /**
2332  * g_strchomp:
2333  * @string: a string to remove the trailing whitespace from
2334  *
2335  * Removes trailing whitespace from a string.
2336  *
2337  * This function doesn't allocate or reallocate any memory;
2338  * it modifies @string in place. Therefore, it cannot be used
2339  * on statically allocated strings.
2340  *
2341  * The pointer to @string is returned to allow the nesting of functions.
2342  *
2343  * Also see g_strchug() and g_strstrip().
2344  *
2345  * Returns: @string
2346  */
2347 gchar *
g_strchomp(gchar * string)2348 g_strchomp (gchar *string)
2349 {
2350   gsize len;
2351 
2352   g_return_val_if_fail (string != NULL, NULL);
2353 
2354   len = strlen (string);
2355   while (len--)
2356     {
2357       if (g_ascii_isspace ((guchar) string[len]))
2358         string[len] = '\0';
2359       else
2360         break;
2361     }
2362 
2363   return string;
2364 }
2365 
2366 /**
2367  * g_strsplit:
2368  * @string: a string to split
2369  * @delimiter: a string which specifies the places at which to split
2370  *     the string. The delimiter is not included in any of the resulting
2371  *     strings, unless @max_tokens is reached.
2372  * @max_tokens: the maximum number of pieces to split @string into.
2373  *     If this is less than 1, the string is split completely.
2374  *
2375  * Splits a string into a maximum of @max_tokens pieces, using the given
2376  * @delimiter. If @max_tokens is reached, the remainder of @string is
2377  * appended to the last token.
2378  *
2379  * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2380  * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2381  * and "".
2382  *
2383  * As a special case, the result of splitting the empty string "" is an empty
2384  * vector, not a vector containing a single string. The reason for this
2385  * special case is that being able to represent an empty vector is typically
2386  * more useful than consistent handling of empty elements. If you do need
2387  * to represent empty elements, you'll need to check for the empty string
2388  * before calling g_strsplit().
2389  *
2390  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2391  *    g_strfreev() to free it.
2392  */
2393 gchar**
g_strsplit(const gchar * string,const gchar * delimiter,gint max_tokens)2394 g_strsplit (const gchar *string,
2395             const gchar *delimiter,
2396             gint         max_tokens)
2397 {
2398   char *s;
2399   const gchar *remainder;
2400   GPtrArray *string_list;
2401 
2402   g_return_val_if_fail (string != NULL, NULL);
2403   g_return_val_if_fail (delimiter != NULL, NULL);
2404   g_return_val_if_fail (delimiter[0] != '\0', NULL);
2405 
2406   if (max_tokens < 1)
2407     max_tokens = G_MAXINT;
2408 
2409   string_list = g_ptr_array_new ();
2410   remainder = string;
2411   s = strstr (remainder, delimiter);
2412   if (s)
2413     {
2414       gsize delimiter_len = strlen (delimiter);
2415 
2416       while (--max_tokens && s)
2417         {
2418           gsize len;
2419 
2420           len = s - remainder;
2421           g_ptr_array_add (string_list, g_strndup (remainder, len));
2422           remainder = s + delimiter_len;
2423           s = strstr (remainder, delimiter);
2424         }
2425     }
2426   if (*string)
2427     g_ptr_array_add (string_list, g_strdup (remainder));
2428 
2429   g_ptr_array_add (string_list, NULL);
2430 
2431   return (char **) g_ptr_array_free (string_list, FALSE);
2432 }
2433 
2434 /**
2435  * g_strsplit_set:
2436  * @string: The string to be tokenized
2437  * @delimiters: A nul-terminated string containing bytes that are used
2438  *     to split the string (it can accept an empty string, which will result
2439  *     in no string splitting).
2440  * @max_tokens: The maximum number of tokens to split @string into.
2441  *     If this is less than 1, the string is split completely
2442  *
2443  * Splits @string into a number of tokens not containing any of the characters
2444  * in @delimiter. A token is the (possibly empty) longest string that does not
2445  * contain any of the characters in @delimiters. If @max_tokens is reached, the
2446  * remainder is appended to the last token.
2447  *
2448  * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2449  * %NULL-terminated vector containing the three strings "abc", "def",
2450  * and "ghi".
2451  *
2452  * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2453  * vector containing the four strings "", "def", "ghi", and "".
2454  *
2455  * As a special case, the result of splitting the empty string "" is an empty
2456  * vector, not a vector containing a single string. The reason for this
2457  * special case is that being able to represent an empty vector is typically
2458  * more useful than consistent handling of empty elements. If you do need
2459  * to represent empty elements, you'll need to check for the empty string
2460  * before calling g_strsplit_set().
2461  *
2462  * Note that this function works on bytes not characters, so it can't be used
2463  * to delimit UTF-8 strings for anything but ASCII characters.
2464  *
2465  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2466  *    g_strfreev() to free it.
2467  *
2468  * Since: 2.4
2469  **/
2470 gchar **
g_strsplit_set(const gchar * string,const gchar * delimiters,gint max_tokens)2471 g_strsplit_set (const gchar *string,
2472                 const gchar *delimiters,
2473                 gint         max_tokens)
2474 {
2475   guint8 delim_table[256]; /* 1 = index is a separator; 0 otherwise */
2476   GSList *tokens, *list;
2477   gint n_tokens;
2478   const gchar *s;
2479   const gchar *current;
2480   gchar *token;
2481   gchar **result;
2482 
2483   g_return_val_if_fail (string != NULL, NULL);
2484   g_return_val_if_fail (delimiters != NULL, NULL);
2485 
2486   if (max_tokens < 1)
2487     max_tokens = G_MAXINT;
2488 
2489   if (*string == '\0')
2490     {
2491       result = g_new (char *, 1);
2492       result[0] = NULL;
2493       return result;
2494     }
2495 
2496   /* Check if each character in @string is a separator, by indexing by the
2497    * character value into the @delim_table, which has value 1 stored at an index
2498    * if that index is a separator. */
2499   memset (delim_table, FALSE, sizeof (delim_table));
2500   for (s = delimiters; *s != '\0'; ++s)
2501     delim_table[*(guchar *)s] = TRUE;
2502 
2503   tokens = NULL;
2504   n_tokens = 0;
2505 
2506   s = current = string;
2507   while (*s != '\0')
2508     {
2509       if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2510         {
2511           token = g_strndup (current, s - current);
2512           tokens = g_slist_prepend (tokens, token);
2513           ++n_tokens;
2514 
2515           current = s + 1;
2516         }
2517 
2518       ++s;
2519     }
2520 
2521   token = g_strndup (current, s - current);
2522   tokens = g_slist_prepend (tokens, token);
2523   ++n_tokens;
2524 
2525   result = g_new (gchar *, n_tokens + 1);
2526 
2527   result[n_tokens] = NULL;
2528   for (list = tokens; list != NULL; list = list->next)
2529     result[--n_tokens] = list->data;
2530 
2531   g_slist_free (tokens);
2532 
2533   return result;
2534 }
2535 
2536 /**
2537  * GStrv:
2538  *
2539  * A typedef alias for gchar**. This is mostly useful when used together with
2540  * g_auto().
2541  */
2542 
2543 /**
2544  * g_strfreev:
2545  * @str_array: (nullable): a %NULL-terminated array of strings to free
2546  *
2547  * Frees a %NULL-terminated array of strings, as well as each
2548  * string it contains.
2549  *
2550  * If @str_array is %NULL, this function simply returns.
2551  */
2552 void
g_strfreev(gchar ** str_array)2553 g_strfreev (gchar **str_array)
2554 {
2555   if (str_array)
2556     {
2557       gsize i;
2558 
2559       for (i = 0; str_array[i] != NULL; i++)
2560         g_free (str_array[i]);
2561 
2562       g_free (str_array);
2563     }
2564 }
2565 
2566 /**
2567  * g_strdupv:
2568  * @str_array: (nullable): a %NULL-terminated array of strings
2569  *
2570  * Copies %NULL-terminated array of strings. The copy is a deep copy;
2571  * the new array should be freed by first freeing each string, then
2572  * the array itself. g_strfreev() does this for you. If called
2573  * on a %NULL value, g_strdupv() simply returns %NULL.
2574  *
2575  * Returns: (nullable): a new %NULL-terminated array of strings.
2576  */
2577 gchar**
g_strdupv(gchar ** str_array)2578 g_strdupv (gchar **str_array)
2579 {
2580   if (str_array)
2581     {
2582       gsize i;
2583       gchar **retval;
2584 
2585       i = 0;
2586       while (str_array[i])
2587         ++i;
2588 
2589       retval = g_new (gchar*, i + 1);
2590 
2591       i = 0;
2592       while (str_array[i])
2593         {
2594           retval[i] = g_strdup (str_array[i]);
2595           ++i;
2596         }
2597       retval[i] = NULL;
2598 
2599       return retval;
2600     }
2601   else
2602     return NULL;
2603 }
2604 
2605 /**
2606  * g_strjoinv:
2607  * @separator: (nullable): a string to insert between each of the
2608  *     strings, or %NULL
2609  * @str_array: a %NULL-terminated array of strings to join
2610  *
2611  * Joins a number of strings together to form one long string, with the
2612  * optional @separator inserted between each of them. The returned string
2613  * should be freed with g_free().
2614  *
2615  * If @str_array has no items, the return value will be an
2616  * empty string. If @str_array contains a single item, @separator will not
2617  * appear in the resulting string.
2618  *
2619  * Returns: a newly-allocated string containing all of the strings joined
2620  *     together, with @separator between them
2621  */
2622 gchar*
g_strjoinv(const gchar * separator,gchar ** str_array)2623 g_strjoinv (const gchar  *separator,
2624             gchar       **str_array)
2625 {
2626   gchar *string;
2627   gchar *ptr;
2628 
2629   g_return_val_if_fail (str_array != NULL, NULL);
2630 
2631   if (separator == NULL)
2632     separator = "";
2633 
2634   if (*str_array)
2635     {
2636       gsize i;
2637       gsize len;
2638       gsize separator_len;
2639 
2640       separator_len = strlen (separator);
2641       /* First part, getting length */
2642       len = 1 + strlen (str_array[0]);
2643       for (i = 1; str_array[i] != NULL; i++)
2644         len += strlen (str_array[i]);
2645       len += separator_len * (i - 1);
2646 
2647       /* Second part, building string */
2648       string = g_new (gchar, len);
2649       ptr = g_stpcpy (string, *str_array);
2650       for (i = 1; str_array[i] != NULL; i++)
2651         {
2652           ptr = g_stpcpy (ptr, separator);
2653           ptr = g_stpcpy (ptr, str_array[i]);
2654         }
2655       }
2656   else
2657     string = g_strdup ("");
2658 
2659   return string;
2660 }
2661 
2662 /**
2663  * g_strjoin:
2664  * @separator: (nullable): a string to insert between each of the
2665  *     strings, or %NULL
2666  * @...: a %NULL-terminated list of strings to join
2667  *
2668  * Joins a number of strings together to form one long string, with the
2669  * optional @separator inserted between each of them. The returned string
2670  * should be freed with g_free().
2671  *
2672  * Returns: a newly-allocated string containing all of the strings joined
2673  *     together, with @separator between them
2674  */
2675 gchar*
g_strjoin(const gchar * separator,...)2676 g_strjoin (const gchar *separator,
2677            ...)
2678 {
2679   gchar *string, *s;
2680   va_list args;
2681   gsize len;
2682   gsize separator_len;
2683   gchar *ptr;
2684 
2685   if (separator == NULL)
2686     separator = "";
2687 
2688   separator_len = strlen (separator);
2689 
2690   va_start (args, separator);
2691 
2692   s = va_arg (args, gchar*);
2693 
2694   if (s)
2695     {
2696       /* First part, getting length */
2697       len = 1 + strlen (s);
2698 
2699       s = va_arg (args, gchar*);
2700       while (s)
2701         {
2702           len += separator_len + strlen (s);
2703           s = va_arg (args, gchar*);
2704         }
2705       va_end (args);
2706 
2707       /* Second part, building string */
2708       string = g_new (gchar, len);
2709 
2710       va_start (args, separator);
2711 
2712       s = va_arg (args, gchar*);
2713       ptr = g_stpcpy (string, s);
2714 
2715       s = va_arg (args, gchar*);
2716       while (s)
2717         {
2718           ptr = g_stpcpy (ptr, separator);
2719           ptr = g_stpcpy (ptr, s);
2720           s = va_arg (args, gchar*);
2721         }
2722     }
2723   else
2724     string = g_strdup ("");
2725 
2726   va_end (args);
2727 
2728   return string;
2729 }
2730 
2731 
2732 /**
2733  * g_strstr_len:
2734  * @haystack: a nul-terminated string
2735  * @haystack_len: the maximum length of @haystack in bytes. A length of -1
2736  *     can be used to mean "search the entire string", like `strstr()`.
2737  * @needle: the string to search for
2738  *
2739  * Searches the string @haystack for the first occurrence
2740  * of the string @needle, limiting the length of the search
2741  * to @haystack_len.
2742  *
2743  * Returns: a pointer to the found occurrence, or
2744  *    %NULL if not found.
2745  */
2746 gchar *
g_strstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2747 g_strstr_len (const gchar *haystack,
2748               gssize       haystack_len,
2749               const gchar *needle)
2750 {
2751   g_return_val_if_fail (haystack != NULL, NULL);
2752   g_return_val_if_fail (needle != NULL, NULL);
2753 
2754   if (haystack_len < 0)
2755     return strstr (haystack, needle);
2756   else
2757     {
2758       const gchar *p = haystack;
2759       gsize needle_len = strlen (needle);
2760       gsize haystack_len_unsigned = haystack_len;
2761       const gchar *end;
2762       gsize i;
2763 
2764       if (needle_len == 0)
2765         return (gchar *)haystack;
2766 
2767       if (haystack_len_unsigned < needle_len)
2768         return NULL;
2769 
2770       end = haystack + haystack_len - needle_len;
2771 
2772       while (p <= end && *p)
2773         {
2774           for (i = 0; i < needle_len; i++)
2775             if (p[i] != needle[i])
2776               goto next;
2777 
2778           return (gchar *)p;
2779 
2780         next:
2781           p++;
2782         }
2783 
2784       return NULL;
2785     }
2786 }
2787 
2788 /**
2789  * g_strrstr:
2790  * @haystack: a nul-terminated string
2791  * @needle: the nul-terminated string to search for
2792  *
2793  * Searches the string @haystack for the last occurrence
2794  * of the string @needle.
2795  *
2796  * Returns: a pointer to the found occurrence, or
2797  *    %NULL if not found.
2798  */
2799 gchar *
g_strrstr(const gchar * haystack,const gchar * needle)2800 g_strrstr (const gchar *haystack,
2801            const gchar *needle)
2802 {
2803   gsize i;
2804   gsize needle_len;
2805   gsize haystack_len;
2806   const gchar *p;
2807 
2808   g_return_val_if_fail (haystack != NULL, NULL);
2809   g_return_val_if_fail (needle != NULL, NULL);
2810 
2811   needle_len = strlen (needle);
2812   haystack_len = strlen (haystack);
2813 
2814   if (needle_len == 0)
2815     return (gchar *)haystack;
2816 
2817   if (haystack_len < needle_len)
2818     return NULL;
2819 
2820   p = haystack + haystack_len - needle_len;
2821 
2822   while (p >= haystack)
2823     {
2824       for (i = 0; i < needle_len; i++)
2825         if (p[i] != needle[i])
2826           goto next;
2827 
2828       return (gchar *)p;
2829 
2830     next:
2831       p--;
2832     }
2833 
2834   return NULL;
2835 }
2836 
2837 /**
2838  * g_strrstr_len:
2839  * @haystack: a nul-terminated string
2840  * @haystack_len: the maximum length of @haystack in bytes. A length of -1
2841  *     can be used to mean "search the entire string", like g_strrstr().
2842  * @needle: the nul-terminated string to search for
2843  *
2844  * Searches the string @haystack for the last occurrence
2845  * of the string @needle, limiting the length of the search
2846  * to @haystack_len.
2847  *
2848  * Returns: a pointer to the found occurrence, or
2849  *    %NULL if not found.
2850  */
2851 gchar *
g_strrstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2852 g_strrstr_len (const gchar *haystack,
2853                gssize        haystack_len,
2854                const gchar *needle)
2855 {
2856   g_return_val_if_fail (haystack != NULL, NULL);
2857   g_return_val_if_fail (needle != NULL, NULL);
2858 
2859   if (haystack_len < 0)
2860     return g_strrstr (haystack, needle);
2861   else
2862     {
2863       gsize needle_len = strlen (needle);
2864       const gchar *haystack_max = haystack + haystack_len;
2865       const gchar *p = haystack;
2866       gsize i;
2867 
2868       while (p < haystack_max && *p)
2869         p++;
2870 
2871       if (p < haystack + needle_len)
2872         return NULL;
2873 
2874       p -= needle_len;
2875 
2876       while (p >= haystack)
2877         {
2878           for (i = 0; i < needle_len; i++)
2879             if (p[i] != needle[i])
2880               goto next;
2881 
2882           return (gchar *)p;
2883 
2884         next:
2885           p--;
2886         }
2887 
2888       return NULL;
2889     }
2890 }
2891 
2892 
2893 /**
2894  * g_str_has_suffix:
2895  * @str: a nul-terminated string
2896  * @suffix: the nul-terminated suffix to look for
2897  *
2898  * Looks whether the string @str ends with @suffix.
2899  *
2900  * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2901  *
2902  * Since: 2.2
2903  */
2904 gboolean
g_str_has_suffix(const gchar * str,const gchar * suffix)2905 g_str_has_suffix (const gchar *str,
2906                   const gchar *suffix)
2907 {
2908   gsize str_len;
2909   gsize suffix_len;
2910 
2911   g_return_val_if_fail (str != NULL, FALSE);
2912   g_return_val_if_fail (suffix != NULL, FALSE);
2913 
2914   str_len = strlen (str);
2915   suffix_len = strlen (suffix);
2916 
2917   if (str_len < suffix_len)
2918     return FALSE;
2919 
2920   return strcmp (str + str_len - suffix_len, suffix) == 0;
2921 }
2922 
2923 /**
2924  * g_str_has_prefix:
2925  * @str: a nul-terminated string
2926  * @prefix: the nul-terminated prefix to look for
2927  *
2928  * Looks whether the string @str begins with @prefix.
2929  *
2930  * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2931  *
2932  * Since: 2.2
2933  */
2934 gboolean
g_str_has_prefix(const gchar * str,const gchar * prefix)2935 g_str_has_prefix (const gchar *str,
2936                   const gchar *prefix)
2937 {
2938   g_return_val_if_fail (str != NULL, FALSE);
2939   g_return_val_if_fail (prefix != NULL, FALSE);
2940 
2941   return strncmp (str, prefix, strlen (prefix)) == 0;
2942 }
2943 
2944 /**
2945  * g_strv_length:
2946  * @str_array: a %NULL-terminated array of strings
2947  *
2948  * Returns the length of the given %NULL-terminated
2949  * string array @str_array. @str_array must not be %NULL.
2950  *
2951  * Returns: length of @str_array.
2952  *
2953  * Since: 2.6
2954  */
2955 guint
g_strv_length(gchar ** str_array)2956 g_strv_length (gchar **str_array)
2957 {
2958   guint i = 0;
2959 
2960   g_return_val_if_fail (str_array != NULL, 0);
2961 
2962   while (str_array[i])
2963     ++i;
2964 
2965   return i;
2966 }
2967 
2968 static void
index_add_folded(GPtrArray * array,const gchar * start,const gchar * end)2969 index_add_folded (GPtrArray   *array,
2970                   const gchar *start,
2971                   const gchar *end)
2972 {
2973   gchar *normal;
2974 
2975   normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2976 
2977   /* TODO: Invent time machine.  Converse with Mustafa Ataturk... */
2978   if (strstr (normal, "ı") || strstr (normal, "İ"))
2979     {
2980       gchar *s = normal;
2981       GString *tmp;
2982 
2983       tmp = g_string_new (NULL);
2984 
2985       while (*s)
2986         {
2987           gchar *i, *I, *e;
2988 
2989           i = strstr (s, "ı");
2990           I = strstr (s, "İ");
2991 
2992           if (!i && !I)
2993             break;
2994           else if (i && !I)
2995             e = i;
2996           else if (I && !i)
2997             e = I;
2998           else if (i < I)
2999             e = i;
3000           else
3001             e = I;
3002 
3003           g_string_append_len (tmp, s, e - s);
3004           g_string_append_c (tmp, 'i');
3005           s = g_utf8_next_char (e);
3006         }
3007 
3008       g_string_append (tmp, s);
3009       g_free (normal);
3010       normal = g_string_free (tmp, FALSE);
3011     }
3012 
3013   g_ptr_array_add (array, g_utf8_casefold (normal, -1));
3014   g_free (normal);
3015 }
3016 
3017 static gchar **
split_words(const gchar * value)3018 split_words (const gchar *value)
3019 {
3020   const gchar *start = NULL;
3021   GPtrArray *result;
3022   const gchar *s;
3023 
3024   result = g_ptr_array_new ();
3025 
3026   for (s = value; *s; s = g_utf8_next_char (s))
3027     {
3028       gunichar c = g_utf8_get_char (s);
3029 
3030       if (start == NULL)
3031         {
3032           if (g_unichar_isalnum (c) || g_unichar_ismark (c))
3033             start = s;
3034         }
3035       else
3036         {
3037           if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
3038             {
3039               index_add_folded (result, start, s);
3040               start = NULL;
3041             }
3042         }
3043     }
3044 
3045   if (start)
3046     index_add_folded (result, start, s);
3047 
3048   g_ptr_array_add (result, NULL);
3049 
3050   return (gchar **) g_ptr_array_free (result, FALSE);
3051 }
3052 
3053 /**
3054  * g_str_tokenize_and_fold:
3055  * @string: a string
3056  * @translit_locale: (nullable): the language code (like 'de' or
3057  *   'en_GB') from which @string originates
3058  * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
3059  *   return location for ASCII alternates
3060  *
3061  * Tokenises @string and performs folding on each token.
3062  *
3063  * A token is a non-empty sequence of alphanumeric characters in the
3064  * source string, separated by non-alphanumeric characters.  An
3065  * "alphanumeric" character for this purpose is one that matches
3066  * g_unichar_isalnum() or g_unichar_ismark().
3067  *
3068  * Each token is then (Unicode) normalised and case-folded.  If
3069  * @ascii_alternates is non-%NULL and some of the returned tokens
3070  * contain non-ASCII characters, ASCII alternatives will be generated.
3071  *
3072  * The number of ASCII alternatives that are generated and the method
3073  * for doing so is unspecified, but @translit_locale (if specified) may
3074  * improve the transliteration if the language of the source string is
3075  * known.
3076  *
3077  * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3078  *
3079  * Since: 2.40
3080  **/
3081 gchar **
g_str_tokenize_and_fold(const gchar * string,const gchar * translit_locale,gchar *** ascii_alternates)3082 g_str_tokenize_and_fold (const gchar   *string,
3083                          const gchar   *translit_locale,
3084                          gchar       ***ascii_alternates)
3085 {
3086   gchar **result;
3087 
3088   g_return_val_if_fail (string != NULL, NULL);
3089 
3090   if (ascii_alternates && g_str_is_ascii (string))
3091     {
3092       *ascii_alternates = g_new0 (gchar *, 0 + 1);
3093       ascii_alternates = NULL;
3094     }
3095 
3096   result = split_words (string);
3097 
3098   if (ascii_alternates)
3099     {
3100       gint i, j, n;
3101 
3102       n = g_strv_length (result);
3103       *ascii_alternates = g_new (gchar *, n + 1);
3104       j = 0;
3105 
3106       for (i = 0; i < n; i++)
3107         {
3108           if (!g_str_is_ascii (result[i]))
3109             {
3110               gchar *composed;
3111               gchar *ascii;
3112               gint k;
3113 
3114               composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
3115 
3116               ascii = g_str_to_ascii (composed, translit_locale);
3117 
3118               /* Only accept strings that are now entirely alnums */
3119               for (k = 0; ascii[k]; k++)
3120                 if (!g_ascii_isalnum (ascii[k]))
3121                   break;
3122 
3123               if (ascii[k] == '\0')
3124                 /* Made it to the end... */
3125                 (*ascii_alternates)[j++] = ascii;
3126               else
3127                 g_free (ascii);
3128 
3129               g_free (composed);
3130             }
3131         }
3132 
3133       (*ascii_alternates)[j] = NULL;
3134     }
3135 
3136   return result;
3137 }
3138 
3139 /**
3140  * g_str_match_string:
3141  * @search_term: the search term from the user
3142  * @potential_hit: the text that may be a hit
3143  * @accept_alternates: %TRUE to accept ASCII alternates
3144  *
3145  * Checks if a search conducted for @search_term should match
3146  * @potential_hit.
3147  *
3148  * This function calls g_str_tokenize_and_fold() on both
3149  * @search_term and @potential_hit.  ASCII alternates are never taken
3150  * for @search_term but will be taken for @potential_hit according to
3151  * the value of @accept_alternates.
3152  *
3153  * A hit occurs when each folded token in @search_term is a prefix of a
3154  * folded token from @potential_hit.
3155  *
3156  * Depending on how you're performing the search, it will typically be
3157  * faster to call g_str_tokenize_and_fold() on each string in
3158  * your corpus and build an index on the returned folded tokens, then
3159  * call g_str_tokenize_and_fold() on the search term and
3160  * perform lookups into that index.
3161  *
3162  * As some examples, searching for ‘fred’ would match the potential hit
3163  * ‘Smith, Fred’ and also ‘Frédéric’.  Searching for ‘Fréd’ would match
3164  * ‘Frédéric’ but not ‘Frederic’ (due to the one-directional nature of
3165  * accent matching).  Searching ‘fo’ would match ‘Foo’ and ‘Bar Foo
3166  * Baz’, but not ‘SFO’ (because no word has ‘fo’ as a prefix).
3167  *
3168  * Returns: %TRUE if @potential_hit is a hit
3169  *
3170  * Since: 2.40
3171  **/
3172 gboolean
g_str_match_string(const gchar * search_term,const gchar * potential_hit,gboolean accept_alternates)3173 g_str_match_string (const gchar *search_term,
3174                     const gchar *potential_hit,
3175                     gboolean     accept_alternates)
3176 {
3177   gchar **alternates = NULL;
3178   gchar **term_tokens;
3179   gchar **hit_tokens;
3180   gboolean matched;
3181   gint i, j;
3182 
3183   g_return_val_if_fail (search_term != NULL, FALSE);
3184   g_return_val_if_fail (potential_hit != NULL, FALSE);
3185 
3186   term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3187   hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3188 
3189   matched = TRUE;
3190 
3191   for (i = 0; term_tokens[i]; i++)
3192     {
3193       for (j = 0; hit_tokens[j]; j++)
3194         if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3195           goto one_matched;
3196 
3197       if (accept_alternates)
3198         for (j = 0; alternates[j]; j++)
3199           if (g_str_has_prefix (alternates[j], term_tokens[i]))
3200             goto one_matched;
3201 
3202       matched = FALSE;
3203       break;
3204 
3205 one_matched:
3206       continue;
3207     }
3208 
3209   g_strfreev (term_tokens);
3210   g_strfreev (hit_tokens);
3211   g_strfreev (alternates);
3212 
3213   return matched;
3214 }
3215 
3216 /**
3217  * g_strv_contains:
3218  * @strv: a %NULL-terminated array of strings
3219  * @str: a string
3220  *
3221  * Checks if @strv contains @str. @strv must not be %NULL.
3222  *
3223  * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3224  *
3225  * Since: 2.44
3226  */
3227 gboolean
g_strv_contains(const gchar * const * strv,const gchar * str)3228 g_strv_contains (const gchar * const *strv,
3229                  const gchar         *str)
3230 {
3231   g_return_val_if_fail (strv != NULL, FALSE);
3232   g_return_val_if_fail (str != NULL, FALSE);
3233 
3234   for (; *strv != NULL; strv++)
3235     {
3236       if (g_str_equal (str, *strv))
3237         return TRUE;
3238     }
3239 
3240   return FALSE;
3241 }
3242 
3243 /**
3244  * g_strv_equal:
3245  * @strv1: a %NULL-terminated array of strings
3246  * @strv2: another %NULL-terminated array of strings
3247  *
3248  * Checks if @strv1 and @strv2 contain exactly the same elements in exactly the
3249  * same order. Elements are compared using g_str_equal(). To match independently
3250  * of order, sort the arrays first (using g_qsort_with_data() or similar).
3251  *
3252  * Two empty arrays are considered equal. Neither @strv1 not @strv2 may be
3253  * %NULL.
3254  *
3255  * Returns: %TRUE if @strv1 and @strv2 are equal
3256  * Since: 2.60
3257  */
3258 gboolean
g_strv_equal(const gchar * const * strv1,const gchar * const * strv2)3259 g_strv_equal (const gchar * const *strv1,
3260               const gchar * const *strv2)
3261 {
3262   g_return_val_if_fail (strv1 != NULL, FALSE);
3263   g_return_val_if_fail (strv2 != NULL, FALSE);
3264 
3265   if (strv1 == strv2)
3266     return TRUE;
3267 
3268   for (; *strv1 != NULL && *strv2 != NULL; strv1++, strv2++)
3269     {
3270       if (!g_str_equal (*strv1, *strv2))
3271         return FALSE;
3272     }
3273 
3274   return (*strv1 == NULL && *strv2 == NULL);
3275 }
3276 
3277 static gboolean
str_has_sign(const gchar * str)3278 str_has_sign (const gchar *str)
3279 {
3280   return str[0] == '-' || str[0] == '+';
3281 }
3282 
3283 static gboolean
str_has_hex_prefix(const gchar * str)3284 str_has_hex_prefix (const gchar *str)
3285 {
3286   return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
3287 }
3288 
3289 /**
3290  * g_ascii_string_to_signed:
3291  * @str: a string
3292  * @base: base of a parsed number
3293  * @min: a lower bound (inclusive)
3294  * @max: an upper bound (inclusive)
3295  * @out_num: (out) (optional): a return location for a number
3296  * @error: a return location for #GError
3297  *
3298  * A convenience function for converting a string to a signed number.
3299  *
3300  * This function assumes that @str contains only a number of the given
3301  * @base that is within inclusive bounds limited by @min and @max. If
3302  * this is true, then the converted number is stored in @out_num. An
3303  * empty string is not a valid input. A string with leading or
3304  * trailing whitespace is also an invalid input.
3305  *
3306  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3307  * not be prefixed with "0x" or "0X". Such a problem does not exist
3308  * for octal numbers, since they were usually prefixed with a zero
3309  * which does not change the value of the parsed number.
3310  *
3311  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3312  * domain. If the input is invalid, the error code will be
3313  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3314  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3315  *
3316  * See g_ascii_strtoll() if you have more complex needs such as
3317  * parsing a string which starts with a number, but then has other
3318  * characters.
3319  *
3320  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3321  *
3322  * Since: 2.54
3323  */
3324 gboolean
g_ascii_string_to_signed(const gchar * str,guint base,gint64 min,gint64 max,gint64 * out_num,GError ** error)3325 g_ascii_string_to_signed (const gchar  *str,
3326                           guint         base,
3327                           gint64        min,
3328                           gint64        max,
3329                           gint64       *out_num,
3330                           GError      **error)
3331 {
3332   gint64 number;
3333   const gchar *end_ptr = NULL;
3334   gint saved_errno = 0;
3335 
3336   g_return_val_if_fail (str != NULL, FALSE);
3337   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3338   g_return_val_if_fail (min <= max, FALSE);
3339   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3340 
3341   if (str[0] == '\0')
3342     {
3343       g_set_error_literal (error,
3344                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3345                            _("Empty string is not a number"));
3346       return FALSE;
3347     }
3348 
3349   errno = 0;
3350   number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
3351   saved_errno = errno;
3352 
3353   if (/* We do not allow leading whitespace, but g_ascii_strtoll
3354        * accepts it and just skips it, so we need to check for it
3355        * ourselves.
3356        */
3357       g_ascii_isspace (str[0]) ||
3358       /* We don't support hexadecimal numbers prefixed with 0x or
3359        * 0X.
3360        */
3361       (base == 16 &&
3362        (str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
3363       (saved_errno != 0 && saved_errno != ERANGE) ||
3364       end_ptr == NULL ||
3365       *end_ptr != '\0')
3366     {
3367       g_set_error (error,
3368                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3369                    _("“%s” is not a signed number"), str);
3370       return FALSE;
3371     }
3372   if (saved_errno == ERANGE || number < min || number > max)
3373     {
3374       gchar *min_str = g_strdup_printf ("%" G_GINT64_FORMAT, min);
3375       gchar *max_str = g_strdup_printf ("%" G_GINT64_FORMAT, max);
3376 
3377       g_set_error (error,
3378                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3379                    _("Number “%s” is out of bounds [%s, %s]"),
3380                    str, min_str, max_str);
3381       g_free (min_str);
3382       g_free (max_str);
3383       return FALSE;
3384     }
3385   if (out_num != NULL)
3386     *out_num = number;
3387   return TRUE;
3388 }
3389 
3390 /**
3391  * g_ascii_string_to_unsigned:
3392  * @str: a string
3393  * @base: base of a parsed number
3394  * @min: a lower bound (inclusive)
3395  * @max: an upper bound (inclusive)
3396  * @out_num: (out) (optional): a return location for a number
3397  * @error: a return location for #GError
3398  *
3399  * A convenience function for converting a string to an unsigned number.
3400  *
3401  * This function assumes that @str contains only a number of the given
3402  * @base that is within inclusive bounds limited by @min and @max. If
3403  * this is true, then the converted number is stored in @out_num. An
3404  * empty string is not a valid input. A string with leading or
3405  * trailing whitespace is also an invalid input. A string with a leading sign
3406  * (`-` or `+`) is not a valid input for the unsigned parser.
3407  *
3408  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3409  * not be prefixed with "0x" or "0X". Such a problem does not exist
3410  * for octal numbers, since they were usually prefixed with a zero
3411  * which does not change the value of the parsed number.
3412  *
3413  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3414  * domain. If the input is invalid, the error code will be
3415  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3416  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3417  *
3418  * See g_ascii_strtoull() if you have more complex needs such as
3419  * parsing a string which starts with a number, but then has other
3420  * characters.
3421  *
3422  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3423  *
3424  * Since: 2.54
3425  */
3426 gboolean
g_ascii_string_to_unsigned(const gchar * str,guint base,guint64 min,guint64 max,guint64 * out_num,GError ** error)3427 g_ascii_string_to_unsigned (const gchar  *str,
3428                             guint         base,
3429                             guint64       min,
3430                             guint64       max,
3431                             guint64      *out_num,
3432                             GError      **error)
3433 {
3434   guint64 number;
3435   const gchar *end_ptr = NULL;
3436   gint saved_errno = 0;
3437 
3438   g_return_val_if_fail (str != NULL, FALSE);
3439   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3440   g_return_val_if_fail (min <= max, FALSE);
3441   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3442 
3443   if (str[0] == '\0')
3444     {
3445       g_set_error_literal (error,
3446                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3447                            _("Empty string is not a number"));
3448       return FALSE;
3449     }
3450 
3451   errno = 0;
3452   number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
3453   saved_errno = errno;
3454 
3455   if (/* We do not allow leading whitespace, but g_ascii_strtoull
3456        * accepts it and just skips it, so we need to check for it
3457        * ourselves.
3458        */
3459       g_ascii_isspace (str[0]) ||
3460       /* Unsigned number should have no sign.
3461        */
3462       str_has_sign (str) ||
3463       /* We don't support hexadecimal numbers prefixed with 0x or
3464        * 0X.
3465        */
3466       (base == 16 && str_has_hex_prefix (str)) ||
3467       (saved_errno != 0 && saved_errno != ERANGE) ||
3468       end_ptr == NULL ||
3469       *end_ptr != '\0')
3470     {
3471       g_set_error (error,
3472                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3473                    _("“%s” is not an unsigned number"), str);
3474       return FALSE;
3475     }
3476   if (saved_errno == ERANGE || number < min || number > max)
3477     {
3478       gchar *min_str = g_strdup_printf ("%" G_GUINT64_FORMAT, min);
3479       gchar *max_str = g_strdup_printf ("%" G_GUINT64_FORMAT, max);
3480 
3481       g_set_error (error,
3482                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3483                    _("Number “%s” is out of bounds [%s, %s]"),
3484                    str, min_str, max_str);
3485       g_free (min_str);
3486       g_free (max_str);
3487       return FALSE;
3488     }
3489   if (out_num != NULL)
3490     *out_num = number;
3491   return TRUE;
3492 }
3493 
3494 G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)
3495