1 /* GLIB - Library of useful routines for C programming
2 *
3 * gconvert.c: Convert between character sets using iconv
4 * Copyright Red Hat Inc., 2000
5 * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "config.h"
22 #include "glibconfig.h"
23
24 #ifndef G_OS_WIN32
25 #include <iconv.h>
26 #endif
27 #include <errno.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31
32 #ifdef G_OS_WIN32
33 #include "win_iconv.c"
34 #endif
35
36 #ifdef G_PLATFORM_WIN32
37 #define STRICT
38 #include <windows.h>
39 #undef STRICT
40 #endif
41
42 #include "gconvert.h"
43 #include "gconvertprivate.h"
44
45 #include "gcharsetprivate.h"
46 #include "gslist.h"
47 #include "gstrfuncs.h"
48 #include "gtestutils.h"
49 #include "gthread.h"
50 #include "gthreadprivate.h"
51 #include "gunicode.h"
52 #include "gfileutils.h"
53 #include "genviron.h"
54
55 #include "glibintl.h"
56
57
58 /**
59 * SECTION:conversions
60 * @title: Character Set Conversion
61 * @short_description: convert strings between different character sets
62 *
63 * The g_convert() family of function wraps the functionality of iconv().
64 * In addition to pure character set conversions, GLib has functions to
65 * deal with the extra complications of encodings for file names.
66 *
67 * ## File Name Encodings
68 *
69 * Historically, UNIX has not had a defined encoding for file names:
70 * a file name is valid as long as it does not have path separators
71 * in it ("/"). However, displaying file names may require conversion:
72 * from the character set in which they were created, to the character
73 * set in which the application operates. Consider the Spanish file name
74 * "Presentación.sxi". If the application which created it uses
75 * ISO-8859-1 for its encoding,
76 * |[
77 * Character: P r e s e n t a c i ó n . s x i
78 * Hex code: 50 72 65 73 65 6e 74 61 63 69 f3 6e 2e 73 78 69
79 * ]|
80 * However, if the application use UTF-8, the actual file name on
81 * disk would look like this:
82 * |[
83 * Character: P r e s e n t a c i ó n . s x i
84 * Hex code: 50 72 65 73 65 6e 74 61 63 69 c3 b3 6e 2e 73 78 69
85 * ]|
86 * Glib uses UTF-8 for its strings, and GUI toolkits like GTK+ that use
87 * GLib do the same thing. If you get a file name from the file system,
88 * for example, from readdir() or from g_dir_read_name(), and you wish
89 * to display the file name to the user, you will need to convert it
90 * into UTF-8. The opposite case is when the user types the name of a
91 * file they wish to save: the toolkit will give you that string in
92 * UTF-8 encoding, and you will need to convert it to the character
93 * set used for file names before you can create the file with open()
94 * or fopen().
95 *
96 * By default, GLib assumes that file names on disk are in UTF-8
97 * encoding. This is a valid assumption for file systems which
98 * were created relatively recently: most applications use UTF-8
99 * encoding for their strings, and that is also what they use for
100 * the file names they create. However, older file systems may
101 * still contain file names created in "older" encodings, such as
102 * ISO-8859-1. In this case, for compatibility reasons, you may want
103 * to instruct GLib to use that particular encoding for file names
104 * rather than UTF-8. You can do this by specifying the encoding for
105 * file names in the [`G_FILENAME_ENCODING`][G_FILENAME_ENCODING]
106 * environment variable. For example, if your installation uses
107 * ISO-8859-1 for file names, you can put this in your `~/.profile`:
108 * |[
109 * export G_FILENAME_ENCODING=ISO-8859-1
110 * ]|
111 * GLib provides the functions g_filename_to_utf8() and
112 * g_filename_from_utf8() to perform the necessary conversions.
113 * These functions convert file names from the encoding specified
114 * in `G_FILENAME_ENCODING` to UTF-8 and vice-versa. This
115 * [diagram][file-name-encodings-diagram] illustrates how
116 * these functions are used to convert between UTF-8 and the
117 * encoding for file names in the file system.
118 *
119 * ## Conversion between file name encodings # {#file-name-encodings-diagram)
120 *
121 * ![](file-name-encodings.png)
122 *
123 * ## Checklist for Application Writers
124 *
125 * This section is a practical summary of the detailed
126 * things to do to make sure your applications process file
127 * name encodings correctly.
128 *
129 * 1. If you get a file name from the file system from a function
130 * such as readdir() or gtk_file_chooser_get_filename(), you do
131 * not need to do any conversion to pass that file name to
132 * functions like open(), rename(), or fopen() -- those are "raw"
133 * file names which the file system understands.
134 *
135 * 2. If you need to display a file name, convert it to UTF-8 first
136 * by using g_filename_to_utf8(). If conversion fails, display a
137 * string like "Unknown file name". Do not convert this string back
138 * into the encoding used for file names if you wish to pass it to
139 * the file system; use the original file name instead.
140 *
141 * For example, the document window of a word processor could display
142 * "Unknown file name" in its title bar but still let the user save
143 * the file, as it would keep the raw file name internally. This
144 * can happen if the user has not set the `G_FILENAME_ENCODING`
145 * environment variable even though he has files whose names are
146 * not encoded in UTF-8.
147 *
148 * 3. If your user interface lets the user type a file name for saving
149 * or renaming, convert it to the encoding used for file names in
150 * the file system by using g_filename_from_utf8(). Pass the converted
151 * file name to functions like fopen(). If conversion fails, ask the
152 * user to enter a different file name. This can happen if the user
153 * types Japanese characters when `G_FILENAME_ENCODING` is set to
154 * `ISO-8859-1`, for example.
155 */
156
157 /* We try to terminate strings in unknown charsets with this many zero bytes
158 * to ensure that multibyte strings really are nul-terminated when we return
159 * them from g_convert() and friends.
160 */
161 #define NUL_TERMINATOR_LENGTH 4
162
G_DEFINE_QUARK(g_convert_error,g_convert_error)163 G_DEFINE_QUARK (g_convert_error, g_convert_error)
164
165 static gboolean
166 try_conversion (const char *to_codeset,
167 const char *from_codeset,
168 iconv_t *cd)
169 {
170 *cd = iconv_open (to_codeset, from_codeset);
171
172 if (*cd == (iconv_t)-1 && errno == EINVAL)
173 return FALSE;
174 else
175 return TRUE;
176 }
177
178 static gboolean
try_to_aliases(const char ** to_aliases,const char * from_codeset,iconv_t * cd)179 try_to_aliases (const char **to_aliases,
180 const char *from_codeset,
181 iconv_t *cd)
182 {
183 if (to_aliases)
184 {
185 const char **p = to_aliases;
186 while (*p)
187 {
188 if (try_conversion (*p, from_codeset, cd))
189 return TRUE;
190
191 p++;
192 }
193 }
194
195 return FALSE;
196 }
197
198 /**
199 * g_iconv_open: (skip)
200 * @to_codeset: destination codeset
201 * @from_codeset: source codeset
202 *
203 * Same as the standard UNIX routine iconv_open(), but
204 * may be implemented via libiconv on UNIX flavors that lack
205 * a native implementation.
206 *
207 * GLib provides g_convert() and g_locale_to_utf8() which are likely
208 * more convenient than the raw iconv wrappers.
209 *
210 * Returns: a "conversion descriptor", or (GIConv)-1 if
211 * opening the converter failed.
212 **/
213 GIConv
g_iconv_open(const gchar * to_codeset,const gchar * from_codeset)214 g_iconv_open (const gchar *to_codeset,
215 const gchar *from_codeset)
216 {
217 iconv_t cd;
218
219 if (!try_conversion (to_codeset, from_codeset, &cd))
220 {
221 const char **to_aliases = _g_charset_get_aliases (to_codeset);
222 const char **from_aliases = _g_charset_get_aliases (from_codeset);
223
224 if (from_aliases)
225 {
226 const char **p = from_aliases;
227 while (*p)
228 {
229 if (try_conversion (to_codeset, *p, &cd))
230 goto out;
231
232 if (try_to_aliases (to_aliases, *p, &cd))
233 goto out;
234
235 p++;
236 }
237 }
238
239 if (try_to_aliases (to_aliases, from_codeset, &cd))
240 goto out;
241 }
242
243 out:
244 return (cd == (iconv_t)-1) ? (GIConv)-1 : (GIConv)cd;
245 }
246
247 /**
248 * g_iconv: (skip)
249 * @converter: conversion descriptor from g_iconv_open()
250 * @inbuf: bytes to convert
251 * @inbytes_left: inout parameter, bytes remaining to convert in @inbuf
252 * @outbuf: converted output bytes
253 * @outbytes_left: inout parameter, bytes available to fill in @outbuf
254 *
255 * Same as the standard UNIX routine iconv(), but
256 * may be implemented via libiconv on UNIX flavors that lack
257 * a native implementation.
258 *
259 * GLib provides g_convert() and g_locale_to_utf8() which are likely
260 * more convenient than the raw iconv wrappers.
261 *
262 * Returns: count of non-reversible conversions, or -1 on error
263 **/
264 gsize
g_iconv(GIConv converter,gchar ** inbuf,gsize * inbytes_left,gchar ** outbuf,gsize * outbytes_left)265 g_iconv (GIConv converter,
266 gchar **inbuf,
267 gsize *inbytes_left,
268 gchar **outbuf,
269 gsize *outbytes_left)
270 {
271 iconv_t cd = (iconv_t)converter;
272
273 return iconv (cd, inbuf, inbytes_left, outbuf, outbytes_left);
274 }
275
276 /**
277 * g_iconv_close: (skip)
278 * @converter: a conversion descriptor from g_iconv_open()
279 *
280 * Same as the standard UNIX routine iconv_close(), but
281 * may be implemented via libiconv on UNIX flavors that lack
282 * a native implementation. Should be called to clean up
283 * the conversion descriptor from g_iconv_open() when
284 * you are done converting things.
285 *
286 * GLib provides g_convert() and g_locale_to_utf8() which are likely
287 * more convenient than the raw iconv wrappers.
288 *
289 * Returns: -1 on error, 0 on success
290 **/
291 gint
g_iconv_close(GIConv converter)292 g_iconv_close (GIConv converter)
293 {
294 iconv_t cd = (iconv_t)converter;
295
296 return iconv_close (cd);
297 }
298
299 static GIConv
open_converter(const gchar * to_codeset,const gchar * from_codeset,GError ** error)300 open_converter (const gchar *to_codeset,
301 const gchar *from_codeset,
302 GError **error)
303 {
304 GIConv cd;
305
306 cd = g_iconv_open (to_codeset, from_codeset);
307
308 if (cd == (GIConv) -1)
309 {
310 /* Something went wrong. */
311 if (error)
312 {
313 if (errno == EINVAL)
314 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
315 _("Conversion from character set “%s” to “%s” is not supported"),
316 from_codeset, to_codeset);
317 else
318 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
319 _("Could not open converter from “%s” to “%s”"),
320 from_codeset, to_codeset);
321 }
322 }
323
324 return cd;
325 }
326
327 static int
close_converter(GIConv cd)328 close_converter (GIConv cd)
329 {
330 if (cd == (GIConv) -1)
331 return 0;
332
333 return g_iconv_close (cd);
334 }
335
336 /**
337 * g_convert_with_iconv: (skip)
338 * @str: (array length=len) (element-type guint8):
339 * the string to convert.
340 * @len: the length of the string in bytes, or -1 if the string is
341 * nul-terminated (Note that some encodings may allow nul
342 * bytes to occur inside strings. In that case, using -1
343 * for the @len parameter is unsafe)
344 * @converter: conversion descriptor from g_iconv_open()
345 * @bytes_read: (out) (optional): location to store the number of bytes in
346 * the input string that were successfully converted, or %NULL.
347 * Even if the conversion was successful, this may be
348 * less than @len if there were partial characters
349 * at the end of the input. If the error
350 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
351 * stored will be the byte offset after the last valid
352 * input sequence.
353 * @bytes_written: (out) (optional): the number of bytes stored in
354 * the output buffer (not including the terminating nul).
355 * @error: location to store the error occurring, or %NULL to ignore
356 * errors. Any of the errors in #GConvertError may occur.
357 *
358 * Converts a string from one character set to another.
359 *
360 * Note that you should use g_iconv() for streaming conversions.
361 * Despite the fact that @bytes_read can return information about partial
362 * characters, the g_convert_... functions are not generally suitable
363 * for streaming. If the underlying converter maintains internal state,
364 * then this won't be preserved across successive calls to g_convert(),
365 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
366 * this is the GNU C converter for CP1255 which does not emit a base
367 * character until it knows that the next character is not a mark that
368 * could combine with the base character.)
369 *
370 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
371 * If the conversion was successful, a newly allocated buffer
372 * containing the converted string, which must be freed with
373 * g_free(). Otherwise %NULL and @error will be set.
374 **/
375 gchar*
g_convert_with_iconv(const gchar * str,gssize len,GIConv converter,gsize * bytes_read,gsize * bytes_written,GError ** error)376 g_convert_with_iconv (const gchar *str,
377 gssize len,
378 GIConv converter,
379 gsize *bytes_read,
380 gsize *bytes_written,
381 GError **error)
382 {
383 gchar *dest;
384 gchar *outp;
385 const gchar *p;
386 gsize inbytes_remaining;
387 gsize outbytes_remaining;
388 gsize err;
389 gsize outbuf_size;
390 gboolean have_error = FALSE;
391 gboolean done = FALSE;
392 gboolean reset = FALSE;
393
394 g_return_val_if_fail (converter != (GIConv) -1, NULL);
395
396 if (len < 0)
397 len = strlen (str);
398
399 p = str;
400 inbytes_remaining = len;
401 outbuf_size = len + NUL_TERMINATOR_LENGTH;
402
403 outbytes_remaining = outbuf_size - NUL_TERMINATOR_LENGTH;
404 outp = dest = g_malloc (outbuf_size);
405
406 while (!done && !have_error)
407 {
408 if (reset)
409 err = g_iconv (converter, NULL, &inbytes_remaining, &outp, &outbytes_remaining);
410 else
411 err = g_iconv (converter, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);
412
413 if (err == (gsize) -1)
414 {
415 switch (errno)
416 {
417 case EINVAL:
418 /* Incomplete text, do not report an error */
419 done = TRUE;
420 break;
421 case E2BIG:
422 {
423 gsize used = outp - dest;
424
425 outbuf_size *= 2;
426 dest = g_realloc (dest, outbuf_size);
427
428 outp = dest + used;
429 outbytes_remaining = outbuf_size - used - NUL_TERMINATOR_LENGTH;
430 }
431 break;
432 case EILSEQ:
433 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
434 _("Invalid byte sequence in conversion input"));
435 have_error = TRUE;
436 break;
437 default:
438 {
439 int errsv = errno;
440
441 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
442 _("Error during conversion: %s"),
443 g_strerror (errsv));
444 }
445 have_error = TRUE;
446 break;
447 }
448 }
449 else
450 {
451 if (!reset)
452 {
453 /* call g_iconv with NULL inbuf to cleanup shift state */
454 reset = TRUE;
455 inbytes_remaining = 0;
456 }
457 else
458 done = TRUE;
459 }
460 }
461
462 memset (outp, 0, NUL_TERMINATOR_LENGTH);
463
464 if (bytes_read)
465 *bytes_read = p - str;
466 else
467 {
468 if ((p - str) != len)
469 {
470 if (!have_error)
471 {
472 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
473 _("Partial character sequence at end of input"));
474 have_error = TRUE;
475 }
476 }
477 }
478
479 if (bytes_written)
480 *bytes_written = outp - dest; /* Doesn't include '\0' */
481
482 if (have_error)
483 {
484 g_free (dest);
485 return NULL;
486 }
487 else
488 return dest;
489 }
490
491 /**
492 * g_convert:
493 * @str: (array length=len) (element-type guint8):
494 * the string to convert.
495 * @len: the length of the string in bytes, or -1 if the string is
496 * nul-terminated (Note that some encodings may allow nul
497 * bytes to occur inside strings. In that case, using -1
498 * for the @len parameter is unsafe)
499 * @to_codeset: name of character set into which to convert @str
500 * @from_codeset: character set of @str.
501 * @bytes_read: (out) (optional): location to store the number of bytes in
502 * the input string that were successfully converted, or %NULL.
503 * Even if the conversion was successful, this may be
504 * less than @len if there were partial characters
505 * at the end of the input. If the error
506 * #G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
507 * stored will be the byte offset after the last valid
508 * input sequence.
509 * @bytes_written: (out) (optional): the number of bytes stored in
510 * the output buffer (not including the terminating nul).
511 * @error: location to store the error occurring, or %NULL to ignore
512 * errors. Any of the errors in #GConvertError may occur.
513 *
514 * Converts a string from one character set to another.
515 *
516 * Note that you should use g_iconv() for streaming conversions.
517 * Despite the fact that @bytes_read can return information about partial
518 * characters, the g_convert_... functions are not generally suitable
519 * for streaming. If the underlying converter maintains internal state,
520 * then this won't be preserved across successive calls to g_convert(),
521 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
522 * this is the GNU C converter for CP1255 which does not emit a base
523 * character until it knows that the next character is not a mark that
524 * could combine with the base character.)
525 *
526 * Using extensions such as "//TRANSLIT" may not work (or may not work
527 * well) on many platforms. Consider using g_str_to_ascii() instead.
528 *
529 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
530 * If the conversion was successful, a newly allocated buffer
531 * containing the converted string, which must be freed with g_free().
532 * Otherwise %NULL and @error will be set.
533 **/
534 gchar*
g_convert(const gchar * str,gssize len,const gchar * to_codeset,const gchar * from_codeset,gsize * bytes_read,gsize * bytes_written,GError ** error)535 g_convert (const gchar *str,
536 gssize len,
537 const gchar *to_codeset,
538 const gchar *from_codeset,
539 gsize *bytes_read,
540 gsize *bytes_written,
541 GError **error)
542 {
543 gchar *res;
544 GIConv cd;
545
546 g_return_val_if_fail (str != NULL, NULL);
547 g_return_val_if_fail (to_codeset != NULL, NULL);
548 g_return_val_if_fail (from_codeset != NULL, NULL);
549
550 cd = open_converter (to_codeset, from_codeset, error);
551
552 if (cd == (GIConv) -1)
553 {
554 if (bytes_read)
555 *bytes_read = 0;
556
557 if (bytes_written)
558 *bytes_written = 0;
559
560 return NULL;
561 }
562
563 res = g_convert_with_iconv (str, len, cd,
564 bytes_read, bytes_written,
565 error);
566
567 close_converter (cd);
568
569 return res;
570 }
571
572 /**
573 * g_convert_with_fallback:
574 * @str: (array length=len) (element-type guint8):
575 * the string to convert.
576 * @len: the length of the string in bytes, or -1 if the string is
577 * nul-terminated (Note that some encodings may allow nul
578 * bytes to occur inside strings. In that case, using -1
579 * for the @len parameter is unsafe)
580 * @to_codeset: name of character set into which to convert @str
581 * @from_codeset: character set of @str.
582 * @fallback: UTF-8 string to use in place of characters not
583 * present in the target encoding. (The string must be
584 * representable in the target encoding).
585 * If %NULL, characters not in the target encoding will
586 * be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
587 * @bytes_read: (out) (optional): location to store the number of bytes in
588 * the input string that were successfully converted, or %NULL.
589 * Even if the conversion was successful, this may be
590 * less than @len if there were partial characters
591 * at the end of the input.
592 * @bytes_written: (out) (optional): the number of bytes stored in
593 * the output buffer (not including the terminating nul).
594 * @error: location to store the error occurring, or %NULL to ignore
595 * errors. Any of the errors in #GConvertError may occur.
596 *
597 * Converts a string from one character set to another, possibly
598 * including fallback sequences for characters not representable
599 * in the output. Note that it is not guaranteed that the specification
600 * for the fallback sequences in @fallback will be honored. Some
601 * systems may do an approximate conversion from @from_codeset
602 * to @to_codeset in their iconv() functions,
603 * in which case GLib will simply return that approximate conversion.
604 *
605 * Note that you should use g_iconv() for streaming conversions.
606 * Despite the fact that @bytes_read can return information about partial
607 * characters, the g_convert_... functions are not generally suitable
608 * for streaming. If the underlying converter maintains internal state,
609 * then this won't be preserved across successive calls to g_convert(),
610 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
611 * this is the GNU C converter for CP1255 which does not emit a base
612 * character until it knows that the next character is not a mark that
613 * could combine with the base character.)
614 *
615 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
616 * If the conversion was successful, a newly allocated buffer
617 * containing the converted string, which must be freed with g_free().
618 * Otherwise %NULL and @error will be set.
619 **/
620 gchar*
g_convert_with_fallback(const gchar * str,gssize len,const gchar * to_codeset,const gchar * from_codeset,const gchar * fallback,gsize * bytes_read,gsize * bytes_written,GError ** error)621 g_convert_with_fallback (const gchar *str,
622 gssize len,
623 const gchar *to_codeset,
624 const gchar *from_codeset,
625 const gchar *fallback,
626 gsize *bytes_read,
627 gsize *bytes_written,
628 GError **error)
629 {
630 gchar *utf8;
631 gchar *dest;
632 gchar *outp;
633 const gchar *insert_str = NULL;
634 const gchar *p;
635 gsize inbytes_remaining;
636 const gchar *save_p = NULL;
637 gsize save_inbytes = 0;
638 gsize outbytes_remaining;
639 gsize err;
640 GIConv cd;
641 gsize outbuf_size;
642 gboolean have_error = FALSE;
643 gboolean done = FALSE;
644
645 GError *local_error = NULL;
646
647 g_return_val_if_fail (str != NULL, NULL);
648 g_return_val_if_fail (to_codeset != NULL, NULL);
649 g_return_val_if_fail (from_codeset != NULL, NULL);
650
651 if (len < 0)
652 len = strlen (str);
653
654 /* Try an exact conversion; we only proceed if this fails
655 * due to an illegal sequence in the input string.
656 */
657 dest = g_convert (str, len, to_codeset, from_codeset,
658 bytes_read, bytes_written, &local_error);
659 if (!local_error)
660 return dest;
661
662 if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
663 {
664 g_propagate_error (error, local_error);
665 return NULL;
666 }
667 else
668 g_error_free (local_error);
669
670 local_error = NULL;
671
672 /* No go; to proceed, we need a converter from "UTF-8" to
673 * to_codeset, and the string as UTF-8.
674 */
675 cd = open_converter (to_codeset, "UTF-8", error);
676 if (cd == (GIConv) -1)
677 {
678 if (bytes_read)
679 *bytes_read = 0;
680
681 if (bytes_written)
682 *bytes_written = 0;
683
684 return NULL;
685 }
686
687 utf8 = g_convert (str, len, "UTF-8", from_codeset,
688 bytes_read, &inbytes_remaining, error);
689 if (!utf8)
690 {
691 close_converter (cd);
692 if (bytes_written)
693 *bytes_written = 0;
694 return NULL;
695 }
696
697 /* Now the heart of the code. We loop through the UTF-8 string, and
698 * whenever we hit an offending character, we form fallback, convert
699 * the fallback to the target codeset, and then go back to
700 * converting the original string after finishing with the fallback.
701 *
702 * The variables save_p and save_inbytes store the input state
703 * for the original string while we are converting the fallback
704 */
705 p = utf8;
706
707 outbuf_size = len + NUL_TERMINATOR_LENGTH;
708 outbytes_remaining = outbuf_size - NUL_TERMINATOR_LENGTH;
709 outp = dest = g_malloc (outbuf_size);
710
711 while (!done && !have_error)
712 {
713 gsize inbytes_tmp = inbytes_remaining;
714 err = g_iconv (cd, (char **)&p, &inbytes_tmp, &outp, &outbytes_remaining);
715 inbytes_remaining = inbytes_tmp;
716
717 if (err == (gsize) -1)
718 {
719 switch (errno)
720 {
721 case EINVAL:
722 g_assert_not_reached();
723 break;
724 case E2BIG:
725 {
726 gsize used = outp - dest;
727
728 outbuf_size *= 2;
729 dest = g_realloc (dest, outbuf_size);
730
731 outp = dest + used;
732 outbytes_remaining = outbuf_size - used - NUL_TERMINATOR_LENGTH;
733
734 break;
735 }
736 case EILSEQ:
737 if (save_p)
738 {
739 /* Error converting fallback string - fatal
740 */
741 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
742 _("Cannot convert fallback “%s” to codeset “%s”"),
743 insert_str, to_codeset);
744 have_error = TRUE;
745 break;
746 }
747 else if (p)
748 {
749 if (!fallback)
750 {
751 gunichar ch = g_utf8_get_char (p);
752 insert_str = g_strdup_printf (ch < 0x10000 ? "\\u%04x" : "\\U%08x",
753 ch);
754 }
755 else
756 insert_str = fallback;
757
758 save_p = g_utf8_next_char (p);
759 save_inbytes = inbytes_remaining - (save_p - p);
760 p = insert_str;
761 inbytes_remaining = strlen (p);
762 break;
763 }
764 /* if p is null */
765 G_GNUC_FALLTHROUGH;
766 default:
767 {
768 int errsv = errno;
769
770 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
771 _("Error during conversion: %s"),
772 g_strerror (errsv));
773 }
774
775 have_error = TRUE;
776 break;
777 }
778 }
779 else
780 {
781 if (save_p)
782 {
783 if (!fallback)
784 g_free ((gchar *)insert_str);
785 p = save_p;
786 inbytes_remaining = save_inbytes;
787 save_p = NULL;
788 }
789 else if (p)
790 {
791 /* call g_iconv with NULL inbuf to cleanup shift state */
792 p = NULL;
793 inbytes_remaining = 0;
794 }
795 else
796 done = TRUE;
797 }
798 }
799
800 /* Cleanup
801 */
802 memset (outp, 0, NUL_TERMINATOR_LENGTH);
803
804 close_converter (cd);
805
806 if (bytes_written)
807 *bytes_written = outp - dest; /* Doesn't include '\0' */
808
809 g_free (utf8);
810
811 if (have_error)
812 {
813 if (save_p && !fallback)
814 g_free ((gchar *)insert_str);
815 g_free (dest);
816 return NULL;
817 }
818 else
819 return dest;
820 }
821
822 /*
823 * g_locale_to_utf8
824 *
825 *
826 */
827
828 /*
829 * Validate @string as UTF-8. @len can be negative if @string is
830 * nul-terminated, or a non-negative value in bytes. If @string ends in an
831 * incomplete sequence, or contains any illegal sequences or nul codepoints,
832 * %NULL will be returned and the error set to
833 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
834 * On success, @bytes_read and @bytes_written, if provided, will be set to
835 * the number of bytes in @string up to @len or the terminating nul byte.
836 * On error, @bytes_read will be set to the byte offset after the last valid
837 * and non-nul UTF-8 sequence in @string, and @bytes_written will be set to 0.
838 */
839 static gchar *
strdup_len(const gchar * string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)840 strdup_len (const gchar *string,
841 gssize len,
842 gsize *bytes_read,
843 gsize *bytes_written,
844 GError **error)
845 {
846 gsize real_len;
847 const gchar *end_valid;
848
849 if (!g_utf8_validate (string, len, &end_valid))
850 {
851 if (bytes_read)
852 *bytes_read = end_valid - string;
853 if (bytes_written)
854 *bytes_written = 0;
855
856 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
857 _("Invalid byte sequence in conversion input"));
858 return NULL;
859 }
860
861 real_len = end_valid - string;
862
863 if (bytes_read)
864 *bytes_read = real_len;
865 if (bytes_written)
866 *bytes_written = real_len;
867
868 return g_strndup (string, real_len);
869 }
870
871 typedef enum
872 {
873 CONVERT_CHECK_NO_NULS_IN_INPUT = 1 << 0,
874 CONVERT_CHECK_NO_NULS_IN_OUTPUT = 1 << 1
875 } ConvertCheckFlags;
876
877 /*
878 * Convert from @string in the encoding identified by @from_codeset,
879 * returning a string in the encoding identifed by @to_codeset.
880 * @len can be negative if @string is nul-terminated, or a non-negative
881 * value in bytes. Flags defined in #ConvertCheckFlags can be set in @flags
882 * to check the input, the output, or both, for embedded nul bytes.
883 * On success, @bytes_read, if provided, will be set to the number of bytes
884 * in @string up to @len or the terminating nul byte, and @bytes_written, if
885 * provided, will be set to the number of output bytes written into the
886 * returned buffer, excluding the terminating nul sequence.
887 * On error, @bytes_read will be set to the byte offset after the last valid
888 * sequence in @string, and @bytes_written will be set to 0.
889 */
890 static gchar *
convert_checked(const gchar * string,gssize len,const gchar * to_codeset,const gchar * from_codeset,ConvertCheckFlags flags,gsize * bytes_read,gsize * bytes_written,GError ** error)891 convert_checked (const gchar *string,
892 gssize len,
893 const gchar *to_codeset,
894 const gchar *from_codeset,
895 ConvertCheckFlags flags,
896 gsize *bytes_read,
897 gsize *bytes_written,
898 GError **error)
899 {
900 gchar *out;
901 gsize outbytes;
902
903 if ((flags & CONVERT_CHECK_NO_NULS_IN_INPUT) && len > 0)
904 {
905 const gchar *early_nul = memchr (string, '\0', len);
906 if (early_nul != NULL)
907 {
908 if (bytes_read)
909 *bytes_read = early_nul - string;
910 if (bytes_written)
911 *bytes_written = 0;
912
913 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
914 _("Embedded NUL byte in conversion input"));
915 return NULL;
916 }
917 }
918
919 out = g_convert (string, len, to_codeset, from_codeset,
920 bytes_read, &outbytes, error);
921 if (out == NULL)
922 {
923 if (bytes_written)
924 *bytes_written = 0;
925 return NULL;
926 }
927
928 if ((flags & CONVERT_CHECK_NO_NULS_IN_OUTPUT)
929 && memchr (out, '\0', outbytes) != NULL)
930 {
931 g_free (out);
932 if (bytes_written)
933 *bytes_written = 0;
934 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL,
935 _("Embedded NUL byte in conversion output"));
936 return NULL;
937 }
938
939 if (bytes_written)
940 *bytes_written = outbytes;
941 return out;
942 }
943
944 /**
945 * g_locale_to_utf8:
946 * @opsysstring: (array length=len) (element-type guint8): a string in the
947 * encoding of the current locale. On Windows
948 * this means the system codepage.
949 * @len: the length of the string, or -1 if the string is
950 * nul-terminated (Note that some encodings may allow nul
951 * bytes to occur inside strings. In that case, using -1
952 * for the @len parameter is unsafe)
953 * @bytes_read: (out) (optional): location to store the number of bytes in the
954 * input string that were successfully converted, or %NULL.
955 * Even if the conversion was successful, this may be
956 * less than @len if there were partial characters
957 * at the end of the input. If the error
958 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
959 * stored will be the byte offset after the last valid
960 * input sequence.
961 * @bytes_written: (out) (optional): the number of bytes stored in the output
962 * buffer (not including the terminating nul).
963 * @error: location to store the error occurring, or %NULL to ignore
964 * errors. Any of the errors in #GConvertError may occur.
965 *
966 * Converts a string which is in the encoding used for strings by
967 * the C runtime (usually the same as that used by the operating
968 * system) in the [current locale][setlocale] into a UTF-8 string.
969 *
970 * If the source encoding is not UTF-8 and the conversion output contains a
971 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
972 * function returns %NULL.
973 * If the source encoding is UTF-8, an embedded nul character is treated with
974 * the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with
975 * earlier versions of this library. Use g_convert() to produce output that
976 * may contain embedded nul characters.
977 *
978 * Returns: (type utf8): The converted string, or %NULL on an error.
979 **/
980 gchar *
g_locale_to_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)981 g_locale_to_utf8 (const gchar *opsysstring,
982 gssize len,
983 gsize *bytes_read,
984 gsize *bytes_written,
985 GError **error)
986 {
987 const char *charset;
988
989 if (g_get_charset (&charset))
990 return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
991 else
992 return convert_checked (opsysstring, len, "UTF-8", charset,
993 CONVERT_CHECK_NO_NULS_IN_OUTPUT,
994 bytes_read, bytes_written, error);
995 }
996
997 /*
998 * Do the exact same as g_locale_to_utf8 except that the charset would
999 * be retrieved from _g_get_time_charset (which uses LC_TIME)
1000 *
1001 * Returns: The converted string, or %NULL on an error.
1002 */
1003 gchar *
_g_time_locale_to_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1004 _g_time_locale_to_utf8 (const gchar *opsysstring,
1005 gssize len,
1006 gsize *bytes_read,
1007 gsize *bytes_written,
1008 GError **error)
1009 {
1010 const char *charset;
1011
1012 if (_g_get_time_charset (&charset))
1013 return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1014 else
1015 return convert_checked (opsysstring, len, "UTF-8", charset,
1016 CONVERT_CHECK_NO_NULS_IN_OUTPUT,
1017 bytes_read, bytes_written, error);
1018 }
1019
1020 /*
1021 * Do the exact same as g_locale_to_utf8 except that the charset would
1022 * be retrieved from _g_get_ctype_charset (which uses LC_CTYPE)
1023 *
1024 * Returns: The converted string, or %NULL on an error.
1025 */
1026 gchar *
_g_ctype_locale_to_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1027 _g_ctype_locale_to_utf8 (const gchar *opsysstring,
1028 gssize len,
1029 gsize *bytes_read,
1030 gsize *bytes_written,
1031 GError **error)
1032 {
1033 const char *charset;
1034
1035 if (_g_get_ctype_charset (&charset))
1036 return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1037 else
1038 return convert_checked (opsysstring, len, "UTF-8", charset,
1039 CONVERT_CHECK_NO_NULS_IN_OUTPUT,
1040 bytes_read, bytes_written, error);
1041 }
1042
1043 /**
1044 * g_locale_from_utf8:
1045 * @utf8string: a UTF-8 encoded string
1046 * @len: the length of the string, or -1 if the string is
1047 * nul-terminated.
1048 * @bytes_read: (out) (optional): location to store the number of bytes in the
1049 * input string that were successfully converted, or %NULL.
1050 * Even if the conversion was successful, this may be
1051 * less than @len if there were partial characters
1052 * at the end of the input. If the error
1053 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1054 * stored will be the byte offset after the last valid
1055 * input sequence.
1056 * @bytes_written: (out) (optional): the number of bytes stored in the output
1057 * buffer (not including the terminating nul).
1058 * @error: location to store the error occurring, or %NULL to ignore
1059 * errors. Any of the errors in #GConvertError may occur.
1060 *
1061 * Converts a string from UTF-8 to the encoding used for strings by
1062 * the C runtime (usually the same as that used by the operating
1063 * system) in the [current locale][setlocale]. On Windows this means
1064 * the system codepage.
1065 *
1066 * The input string shall not contain nul characters even if the @len
1067 * argument is positive. A nul character found inside the string will result
1068 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert
1069 * input that may contain embedded nul characters.
1070 *
1071 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
1072 * A newly-allocated buffer containing the converted string,
1073 * or %NULL on an error, and error will be set.
1074 **/
1075 gchar *
g_locale_from_utf8(const gchar * utf8string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1076 g_locale_from_utf8 (const gchar *utf8string,
1077 gssize len,
1078 gsize *bytes_read,
1079 gsize *bytes_written,
1080 GError **error)
1081 {
1082 const gchar *charset;
1083
1084 if (g_get_charset (&charset))
1085 return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1086 else
1087 return convert_checked (utf8string, len, charset, "UTF-8",
1088 CONVERT_CHECK_NO_NULS_IN_INPUT,
1089 bytes_read, bytes_written, error);
1090 }
1091
1092 #ifndef G_PLATFORM_WIN32
1093
1094 typedef struct _GFilenameCharsetCache GFilenameCharsetCache;
1095
1096 struct _GFilenameCharsetCache {
1097 gboolean is_utf8;
1098 gchar *charset;
1099 gchar **filename_charsets;
1100 };
1101
1102 static void
filename_charset_cache_free(gpointer data)1103 filename_charset_cache_free (gpointer data)
1104 {
1105 GFilenameCharsetCache *cache = data;
1106 g_free (cache->charset);
1107 g_strfreev (cache->filename_charsets);
1108 g_free (cache);
1109 }
1110
1111 /**
1112 * g_get_filename_charsets:
1113 * @filename_charsets: (out) (transfer none) (array zero-terminated=1):
1114 * return location for the %NULL-terminated list of encoding names
1115 *
1116 * Determines the preferred character sets used for filenames.
1117 * The first character set from the @charsets is the filename encoding, the
1118 * subsequent character sets are used when trying to generate a displayable
1119 * representation of a filename, see g_filename_display_name().
1120 *
1121 * On Unix, the character sets are determined by consulting the
1122 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
1123 * On Windows, the character set used in the GLib API is always UTF-8
1124 * and said environment variables have no effect.
1125 *
1126 * `G_FILENAME_ENCODING` may be set to a comma-separated list of
1127 * character set names. The special token "\@locale" is taken
1128 * to mean the character set for the [current locale][setlocale].
1129 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
1130 * the character set of the current locale is taken as the filename
1131 * encoding. If neither environment variable is set, UTF-8 is taken
1132 * as the filename encoding, but the character set of the current locale
1133 * is also put in the list of encodings.
1134 *
1135 * The returned @charsets belong to GLib and must not be freed.
1136 *
1137 * Note that on Unix, regardless of the locale character set or
1138 * `G_FILENAME_ENCODING` value, the actual file names present
1139 * on a system might be in any random encoding or just gibberish.
1140 *
1141 * Returns: %TRUE if the filename encoding is UTF-8.
1142 *
1143 * Since: 2.6
1144 */
1145 gboolean
g_get_filename_charsets(const gchar *** filename_charsets)1146 g_get_filename_charsets (const gchar ***filename_charsets)
1147 {
1148 static GPrivate cache_private = G_PRIVATE_INIT (filename_charset_cache_free);
1149 GFilenameCharsetCache *cache = g_private_get (&cache_private);
1150 const gchar *charset;
1151
1152 if (!cache)
1153 cache = g_private_set_alloc0 (&cache_private, sizeof (GFilenameCharsetCache));
1154
1155 g_get_charset (&charset);
1156
1157 if (!(cache->charset && strcmp (cache->charset, charset) == 0))
1158 {
1159 const gchar *new_charset;
1160 const gchar *p;
1161 gint i;
1162
1163 g_free (cache->charset);
1164 g_strfreev (cache->filename_charsets);
1165 cache->charset = g_strdup (charset);
1166
1167 p = g_getenv ("G_FILENAME_ENCODING");
1168 if (p != NULL && p[0] != '\0')
1169 {
1170 cache->filename_charsets = g_strsplit (p, ",", 0);
1171 cache->is_utf8 = (strcmp (cache->filename_charsets[0], "UTF-8") == 0);
1172
1173 for (i = 0; cache->filename_charsets[i]; i++)
1174 {
1175 if (strcmp ("@locale", cache->filename_charsets[i]) == 0)
1176 {
1177 g_get_charset (&new_charset);
1178 g_free (cache->filename_charsets[i]);
1179 cache->filename_charsets[i] = g_strdup (new_charset);
1180 }
1181 }
1182 }
1183 else if (g_getenv ("G_BROKEN_FILENAMES") != NULL)
1184 {
1185 cache->filename_charsets = g_new0 (gchar *, 2);
1186 cache->is_utf8 = g_get_charset (&new_charset);
1187 cache->filename_charsets[0] = g_strdup (new_charset);
1188 }
1189 else
1190 {
1191 cache->filename_charsets = g_new0 (gchar *, 3);
1192 cache->is_utf8 = TRUE;
1193 cache->filename_charsets[0] = g_strdup ("UTF-8");
1194 if (!g_get_charset (&new_charset))
1195 cache->filename_charsets[1] = g_strdup (new_charset);
1196 }
1197 }
1198
1199 if (filename_charsets)
1200 *filename_charsets = (const gchar **)cache->filename_charsets;
1201
1202 return cache->is_utf8;
1203 }
1204
1205 #else /* G_PLATFORM_WIN32 */
1206
1207 gboolean
g_get_filename_charsets(const gchar *** filename_charsets)1208 g_get_filename_charsets (const gchar ***filename_charsets)
1209 {
1210 static const gchar *charsets[] = {
1211 "UTF-8",
1212 NULL
1213 };
1214
1215 #ifdef G_OS_WIN32
1216 /* On Windows GLib pretends that the filename charset is UTF-8 */
1217 if (filename_charsets)
1218 *filename_charsets = charsets;
1219
1220 return TRUE;
1221 #else
1222 gboolean result;
1223
1224 /* Cygwin works like before */
1225 result = g_get_charset (&(charsets[0]));
1226
1227 if (filename_charsets)
1228 *filename_charsets = charsets;
1229
1230 return result;
1231 #endif
1232 }
1233
1234 #endif /* G_PLATFORM_WIN32 */
1235
1236 static gboolean
get_filename_charset(const gchar ** filename_charset)1237 get_filename_charset (const gchar **filename_charset)
1238 {
1239 const gchar **charsets;
1240 gboolean is_utf8;
1241
1242 is_utf8 = g_get_filename_charsets (&charsets);
1243
1244 if (filename_charset)
1245 *filename_charset = charsets[0];
1246
1247 return is_utf8;
1248 }
1249
1250 /**
1251 * g_filename_to_utf8:
1252 * @opsysstring: (type filename): a string in the encoding for filenames
1253 * @len: the length of the string, or -1 if the string is
1254 * nul-terminated (Note that some encodings may allow nul
1255 * bytes to occur inside strings. In that case, using -1
1256 * for the @len parameter is unsafe)
1257 * @bytes_read: (out) (optional): location to store the number of bytes in the
1258 * input string that were successfully converted, or %NULL.
1259 * Even if the conversion was successful, this may be
1260 * less than @len if there were partial characters
1261 * at the end of the input. If the error
1262 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1263 * stored will be the byte offset after the last valid
1264 * input sequence.
1265 * @bytes_written: (out) (optional): the number of bytes stored in the output
1266 * buffer (not including the terminating nul).
1267 * @error: location to store the error occurring, or %NULL to ignore
1268 * errors. Any of the errors in #GConvertError may occur.
1269 *
1270 * Converts a string which is in the encoding used by GLib for
1271 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
1272 * for filenames; on other platforms, this function indirectly depends on
1273 * the [current locale][setlocale].
1274 *
1275 * The input string shall not contain nul characters even if the @len
1276 * argument is positive. A nul character found inside the string will result
1277 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
1278 * If the source encoding is not UTF-8 and the conversion output contains a
1279 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
1280 * function returns %NULL. Use g_convert() to produce output that
1281 * may contain embedded nul characters.
1282 *
1283 * Returns: (type utf8): The converted string, or %NULL on an error.
1284 **/
1285 gchar*
g_filename_to_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1286 g_filename_to_utf8 (const gchar *opsysstring,
1287 gssize len,
1288 gsize *bytes_read,
1289 gsize *bytes_written,
1290 GError **error)
1291 {
1292 const gchar *charset;
1293
1294 g_return_val_if_fail (opsysstring != NULL, NULL);
1295
1296 if (get_filename_charset (&charset))
1297 return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1298 else
1299 return convert_checked (opsysstring, len, "UTF-8", charset,
1300 CONVERT_CHECK_NO_NULS_IN_INPUT |
1301 CONVERT_CHECK_NO_NULS_IN_OUTPUT,
1302 bytes_read, bytes_written, error);
1303 }
1304
1305 /**
1306 * g_filename_from_utf8:
1307 * @utf8string: (type utf8): a UTF-8 encoded string.
1308 * @len: the length of the string, or -1 if the string is
1309 * nul-terminated.
1310 * @bytes_read: (out) (optional): location to store the number of bytes in
1311 * the input string that were successfully converted, or %NULL.
1312 * Even if the conversion was successful, this may be
1313 * less than @len if there were partial characters
1314 * at the end of the input. If the error
1315 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1316 * stored will be the byte offset after the last valid
1317 * input sequence.
1318 * @bytes_written: (out) (optional): the number of bytes stored in
1319 * the output buffer (not including the terminating nul).
1320 * @error: location to store the error occurring, or %NULL to ignore
1321 * errors. Any of the errors in #GConvertError may occur.
1322 *
1323 * Converts a string from UTF-8 to the encoding GLib uses for
1324 * filenames. Note that on Windows GLib uses UTF-8 for filenames;
1325 * on other platforms, this function indirectly depends on the
1326 * [current locale][setlocale].
1327 *
1328 * The input string shall not contain nul characters even if the @len
1329 * argument is positive. A nul character found inside the string will result
1330 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. If the filename encoding is
1331 * not UTF-8 and the conversion output contains a nul character, the error
1332 * %G_CONVERT_ERROR_EMBEDDED_NUL is set and the function returns %NULL.
1333 *
1334 * Returns: (type filename):
1335 * The converted string, or %NULL on an error.
1336 **/
1337 gchar*
g_filename_from_utf8(const gchar * utf8string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)1338 g_filename_from_utf8 (const gchar *utf8string,
1339 gssize len,
1340 gsize *bytes_read,
1341 gsize *bytes_written,
1342 GError **error)
1343 {
1344 const gchar *charset;
1345
1346 if (get_filename_charset (&charset))
1347 return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1348 else
1349 return convert_checked (utf8string, len, charset, "UTF-8",
1350 CONVERT_CHECK_NO_NULS_IN_INPUT |
1351 CONVERT_CHECK_NO_NULS_IN_OUTPUT,
1352 bytes_read, bytes_written, error);
1353 }
1354
1355 /* Test of haystack has the needle prefix, comparing case
1356 * insensitive. haystack may be UTF-8, but needle must
1357 * contain only ascii. */
1358 static gboolean
has_case_prefix(const gchar * haystack,const gchar * needle)1359 has_case_prefix (const gchar *haystack, const gchar *needle)
1360 {
1361 const gchar *h, *n;
1362
1363 /* Eat one character at a time. */
1364 h = haystack;
1365 n = needle;
1366
1367 while (*n && *h &&
1368 g_ascii_tolower (*n) == g_ascii_tolower (*h))
1369 {
1370 n++;
1371 h++;
1372 }
1373
1374 return *n == '\0';
1375 }
1376
1377 typedef enum {
1378 UNSAFE_ALL = 0x1, /* Escape all unsafe characters */
1379 UNSAFE_ALLOW_PLUS = 0x2, /* Allows '+' */
1380 UNSAFE_PATH = 0x8, /* Allows '/', '&', '=', ':', '@', '+', '$' and ',' */
1381 UNSAFE_HOST = 0x10, /* Allows '/' and ':' and '@' */
1382 UNSAFE_SLASHES = 0x20 /* Allows all characters except for '/' and '%' */
1383 } UnsafeCharacterSet;
1384
1385 static const guchar acceptable[96] = {
1386 /* A table of the ASCII chars from space (32) to DEL (127) */
1387 /* ! " # $ % & ' ( ) * + , - . / */
1388 0x00,0x3F,0x20,0x20,0x28,0x00,0x2C,0x3F,0x3F,0x3F,0x3F,0x2A,0x28,0x3F,0x3F,0x1C,
1389 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
1390 0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x38,0x20,0x20,0x2C,0x20,0x20,
1391 /* @ A B C D E F G H I J K L M N O */
1392 0x38,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1393 /* P Q R S T U V W X Y Z [ \ ] ^ _ */
1394 0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x20,0x3F,
1395 /* ` a b c d e f g h i j k l m n o */
1396 0x20,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1397 /* p q r s t u v w x y z { | } ~ DEL */
1398 0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x3F,0x20
1399 };
1400
1401 static const gchar hex[16] = "0123456789ABCDEF";
1402
1403 /* Note: This escape function works on file: URIs, but if you want to
1404 * escape something else, please read RFC-2396 */
1405 static gchar *
g_escape_uri_string(const gchar * string,UnsafeCharacterSet mask)1406 g_escape_uri_string (const gchar *string,
1407 UnsafeCharacterSet mask)
1408 {
1409 #define ACCEPTABLE(a) ((a)>=32 && (a)<128 && (acceptable[(a)-32] & use_mask))
1410
1411 const gchar *p;
1412 gchar *q;
1413 gchar *result;
1414 int c;
1415 gint unacceptable;
1416 UnsafeCharacterSet use_mask;
1417
1418 g_return_val_if_fail (mask == UNSAFE_ALL
1419 || mask == UNSAFE_ALLOW_PLUS
1420 || mask == UNSAFE_PATH
1421 || mask == UNSAFE_HOST
1422 || mask == UNSAFE_SLASHES, NULL);
1423
1424 unacceptable = 0;
1425 use_mask = mask;
1426 for (p = string; *p != '\0'; p++)
1427 {
1428 c = (guchar) *p;
1429 if (!ACCEPTABLE (c))
1430 unacceptable++;
1431 }
1432
1433 result = g_malloc (p - string + unacceptable * 2 + 1);
1434
1435 use_mask = mask;
1436 for (q = result, p = string; *p != '\0'; p++)
1437 {
1438 c = (guchar) *p;
1439
1440 if (!ACCEPTABLE (c))
1441 {
1442 *q++ = '%'; /* means hex coming */
1443 *q++ = hex[c >> 4];
1444 *q++ = hex[c & 15];
1445 }
1446 else
1447 *q++ = *p;
1448 }
1449
1450 *q = '\0';
1451
1452 return result;
1453 }
1454
1455
1456 static gchar *
g_escape_file_uri(const gchar * hostname,const gchar * pathname)1457 g_escape_file_uri (const gchar *hostname,
1458 const gchar *pathname)
1459 {
1460 char *escaped_hostname = NULL;
1461 char *escaped_path;
1462 char *res;
1463
1464 #ifdef G_OS_WIN32
1465 char *p, *backslash;
1466
1467 /* Turn backslashes into forward slashes. That's what Netscape
1468 * does, and they are actually more or less equivalent in Windows.
1469 */
1470
1471 pathname = g_strdup (pathname);
1472 p = (char *) pathname;
1473
1474 while ((backslash = strchr (p, '\\')) != NULL)
1475 {
1476 *backslash = '/';
1477 p = backslash + 1;
1478 }
1479 #endif
1480
1481 if (hostname && *hostname != '\0')
1482 {
1483 escaped_hostname = g_escape_uri_string (hostname, UNSAFE_HOST);
1484 }
1485
1486 escaped_path = g_escape_uri_string (pathname, UNSAFE_PATH);
1487
1488 res = g_strconcat ("file://",
1489 (escaped_hostname) ? escaped_hostname : "",
1490 (*escaped_path != '/') ? "/" : "",
1491 escaped_path,
1492 NULL);
1493
1494 #ifdef G_OS_WIN32
1495 g_free ((char *) pathname);
1496 #endif
1497
1498 g_free (escaped_hostname);
1499 g_free (escaped_path);
1500
1501 return res;
1502 }
1503
1504 static int
unescape_character(const char * scanner)1505 unescape_character (const char *scanner)
1506 {
1507 int first_digit;
1508 int second_digit;
1509
1510 first_digit = g_ascii_xdigit_value (scanner[0]);
1511 if (first_digit < 0)
1512 return -1;
1513
1514 second_digit = g_ascii_xdigit_value (scanner[1]);
1515 if (second_digit < 0)
1516 return -1;
1517
1518 return (first_digit << 4) | second_digit;
1519 }
1520
1521 static gchar *
g_unescape_uri_string(const char * escaped,int len,const char * illegal_escaped_characters,gboolean ascii_must_not_be_escaped)1522 g_unescape_uri_string (const char *escaped,
1523 int len,
1524 const char *illegal_escaped_characters,
1525 gboolean ascii_must_not_be_escaped)
1526 {
1527 const gchar *in, *in_end;
1528 gchar *out, *result;
1529 int c;
1530
1531 if (escaped == NULL)
1532 return NULL;
1533
1534 if (len < 0)
1535 len = strlen (escaped);
1536
1537 result = g_malloc (len + 1);
1538
1539 out = result;
1540 for (in = escaped, in_end = escaped + len; in < in_end; in++)
1541 {
1542 c = *in;
1543
1544 if (c == '%')
1545 {
1546 /* catch partial escape sequences past the end of the substring */
1547 if (in + 3 > in_end)
1548 break;
1549
1550 c = unescape_character (in + 1);
1551
1552 /* catch bad escape sequences and NUL characters */
1553 if (c <= 0)
1554 break;
1555
1556 /* catch escaped ASCII */
1557 if (ascii_must_not_be_escaped && c <= 0x7F)
1558 break;
1559
1560 /* catch other illegal escaped characters */
1561 if (strchr (illegal_escaped_characters, c) != NULL)
1562 break;
1563
1564 in += 2;
1565 }
1566
1567 *out++ = c;
1568 }
1569
1570 g_assert (out - result <= len);
1571 *out = '\0';
1572
1573 if (in != in_end)
1574 {
1575 g_free (result);
1576 return NULL;
1577 }
1578
1579 return result;
1580 }
1581
1582 static gboolean
is_asciialphanum(gunichar c)1583 is_asciialphanum (gunichar c)
1584 {
1585 return c <= 0x7F && g_ascii_isalnum (c);
1586 }
1587
1588 static gboolean
is_asciialpha(gunichar c)1589 is_asciialpha (gunichar c)
1590 {
1591 return c <= 0x7F && g_ascii_isalpha (c);
1592 }
1593
1594 /* allows an empty string */
1595 static gboolean
hostname_validate(const char * hostname)1596 hostname_validate (const char *hostname)
1597 {
1598 const char *p;
1599 gunichar c, first_char, last_char;
1600
1601 p = hostname;
1602 if (*p == '\0')
1603 return TRUE;
1604 do
1605 {
1606 /* read in a label */
1607 c = g_utf8_get_char (p);
1608 p = g_utf8_next_char (p);
1609 if (!is_asciialphanum (c))
1610 return FALSE;
1611 first_char = c;
1612 do
1613 {
1614 last_char = c;
1615 c = g_utf8_get_char (p);
1616 p = g_utf8_next_char (p);
1617 }
1618 while (is_asciialphanum (c) || c == '-');
1619 if (last_char == '-')
1620 return FALSE;
1621
1622 /* if that was the last label, check that it was a toplabel */
1623 if (c == '\0' || (c == '.' && *p == '\0'))
1624 return is_asciialpha (first_char);
1625 }
1626 while (c == '.');
1627 return FALSE;
1628 }
1629
1630 /**
1631 * g_filename_from_uri:
1632 * @uri: a uri describing a filename (escaped, encoded in ASCII).
1633 * @hostname: (out) (optional) (nullable): Location to store hostname for the URI.
1634 * If there is no hostname in the URI, %NULL will be
1635 * stored in this location.
1636 * @error: location to store the error occurring, or %NULL to ignore
1637 * errors. Any of the errors in #GConvertError may occur.
1638 *
1639 * Converts an escaped ASCII-encoded URI to a local filename in the
1640 * encoding used for filenames.
1641 *
1642 * Returns: (type filename): a newly-allocated string holding
1643 * the resulting filename, or %NULL on an error.
1644 **/
1645 gchar *
g_filename_from_uri(const gchar * uri,gchar ** hostname,GError ** error)1646 g_filename_from_uri (const gchar *uri,
1647 gchar **hostname,
1648 GError **error)
1649 {
1650 const char *path_part;
1651 const char *host_part;
1652 char *unescaped_hostname;
1653 char *result;
1654 char *filename;
1655 int offs;
1656 #ifdef G_OS_WIN32
1657 char *p, *slash;
1658 #endif
1659
1660 if (hostname)
1661 *hostname = NULL;
1662
1663 if (!has_case_prefix (uri, "file:/"))
1664 {
1665 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1666 _("The URI “%s” is not an absolute URI using the “file” scheme"),
1667 uri);
1668 return NULL;
1669 }
1670
1671 path_part = uri + strlen ("file:");
1672
1673 if (strchr (path_part, '#') != NULL)
1674 {
1675 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1676 _("The local file URI “%s” may not include a “#”"),
1677 uri);
1678 return NULL;
1679 }
1680
1681 if (has_case_prefix (path_part, "///"))
1682 path_part += 2;
1683 else if (has_case_prefix (path_part, "//"))
1684 {
1685 path_part += 2;
1686 host_part = path_part;
1687
1688 path_part = strchr (path_part, '/');
1689
1690 if (path_part == NULL)
1691 {
1692 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1693 _("The URI “%s” is invalid"),
1694 uri);
1695 return NULL;
1696 }
1697
1698 unescaped_hostname = g_unescape_uri_string (host_part, path_part - host_part, "", TRUE);
1699
1700 if (unescaped_hostname == NULL ||
1701 !hostname_validate (unescaped_hostname))
1702 {
1703 g_free (unescaped_hostname);
1704 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1705 _("The hostname of the URI “%s” is invalid"),
1706 uri);
1707 return NULL;
1708 }
1709
1710 if (hostname)
1711 *hostname = unescaped_hostname;
1712 else
1713 g_free (unescaped_hostname);
1714 }
1715
1716 filename = g_unescape_uri_string (path_part, -1, "/", FALSE);
1717
1718 if (filename == NULL)
1719 {
1720 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1721 _("The URI “%s” contains invalidly escaped characters"),
1722 uri);
1723 return NULL;
1724 }
1725
1726 offs = 0;
1727 #ifdef G_OS_WIN32
1728 /* Drop localhost */
1729 if (hostname && *hostname != NULL &&
1730 g_ascii_strcasecmp (*hostname, "localhost") == 0)
1731 {
1732 g_free (*hostname);
1733 *hostname = NULL;
1734 }
1735
1736 /* Turn slashes into backslashes, because that's the canonical spelling */
1737 p = filename;
1738 while ((slash = strchr (p, '/')) != NULL)
1739 {
1740 *slash = '\\';
1741 p = slash + 1;
1742 }
1743
1744 /* Windows URIs with a drive letter can be like "file://host/c:/foo"
1745 * or "file://host/c|/foo" (some Netscape versions). In those cases, start
1746 * the filename from the drive letter.
1747 */
1748 if (g_ascii_isalpha (filename[1]))
1749 {
1750 if (filename[2] == ':')
1751 offs = 1;
1752 else if (filename[2] == '|')
1753 {
1754 filename[2] = ':';
1755 offs = 1;
1756 }
1757 }
1758 #endif
1759
1760 result = g_strdup (filename + offs);
1761 g_free (filename);
1762
1763 return result;
1764 }
1765
1766 /**
1767 * g_filename_to_uri:
1768 * @filename: (type filename): an absolute filename specified in the GLib file
1769 * name encoding, which is the on-disk file name bytes on Unix, and UTF-8
1770 * on Windows
1771 * @hostname: (nullable): A UTF-8 encoded hostname, or %NULL for none.
1772 * @error: location to store the error occurring, or %NULL to ignore
1773 * errors. Any of the errors in #GConvertError may occur.
1774 *
1775 * Converts an absolute filename to an escaped ASCII-encoded URI, with the path
1776 * component following Section 3.3. of RFC 2396.
1777 *
1778 * Returns: a newly-allocated string holding the resulting
1779 * URI, or %NULL on an error.
1780 **/
1781 gchar *
g_filename_to_uri(const gchar * filename,const gchar * hostname,GError ** error)1782 g_filename_to_uri (const gchar *filename,
1783 const gchar *hostname,
1784 GError **error)
1785 {
1786 char *escaped_uri;
1787
1788 g_return_val_if_fail (filename != NULL, NULL);
1789
1790 if (!g_path_is_absolute (filename))
1791 {
1792 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH,
1793 _("The pathname “%s” is not an absolute path"),
1794 filename);
1795 return NULL;
1796 }
1797
1798 if (hostname &&
1799 !(g_utf8_validate (hostname, -1, NULL)
1800 && hostname_validate (hostname)))
1801 {
1802 g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1803 _("Invalid hostname"));
1804 return NULL;
1805 }
1806
1807 #ifdef G_OS_WIN32
1808 /* Don't use localhost unnecessarily */
1809 if (hostname && g_ascii_strcasecmp (hostname, "localhost") == 0)
1810 hostname = NULL;
1811 #endif
1812
1813 escaped_uri = g_escape_file_uri (hostname, filename);
1814
1815 return escaped_uri;
1816 }
1817
1818 /**
1819 * g_uri_list_extract_uris:
1820 * @uri_list: an URI list
1821 *
1822 * Splits an URI list conforming to the text/uri-list
1823 * mime type defined in RFC 2483 into individual URIs,
1824 * discarding any comments. The URIs are not validated.
1825 *
1826 * Returns: (transfer full): a newly allocated %NULL-terminated list
1827 * of strings holding the individual URIs. The array should be freed
1828 * with g_strfreev().
1829 *
1830 * Since: 2.6
1831 */
1832 gchar **
g_uri_list_extract_uris(const gchar * uri_list)1833 g_uri_list_extract_uris (const gchar *uri_list)
1834 {
1835 GPtrArray *uris;
1836 const gchar *p, *q;
1837
1838 uris = g_ptr_array_new ();
1839
1840 p = uri_list;
1841
1842 /* We don't actually try to validate the URI according to RFC
1843 * 2396, or even check for allowed characters - we just ignore
1844 * comments and trim whitespace off the ends. We also
1845 * allow LF delimination as well as the specified CRLF.
1846 *
1847 * We do allow comments like specified in RFC 2483.
1848 */
1849 while (p)
1850 {
1851 if (*p != '#')
1852 {
1853 while (g_ascii_isspace (*p))
1854 p++;
1855
1856 q = p;
1857 while (*q && (*q != '\n') && (*q != '\r'))
1858 q++;
1859
1860 if (q > p)
1861 {
1862 q--;
1863 while (q > p && g_ascii_isspace (*q))
1864 q--;
1865
1866 if (q > p)
1867 g_ptr_array_add (uris, g_strndup (p, q - p + 1));
1868 }
1869 }
1870 p = strchr (p, '\n');
1871 if (p)
1872 p++;
1873 }
1874
1875 g_ptr_array_add (uris, NULL);
1876
1877 return (gchar **) g_ptr_array_free (uris, FALSE);
1878 }
1879
1880 /**
1881 * g_filename_display_basename:
1882 * @filename: (type filename): an absolute pathname in the
1883 * GLib file name encoding
1884 *
1885 * Returns the display basename for the particular filename, guaranteed
1886 * to be valid UTF-8. The display name might not be identical to the filename,
1887 * for instance there might be problems converting it to UTF-8, and some files
1888 * can be translated in the display.
1889 *
1890 * If GLib cannot make sense of the encoding of @filename, as a last resort it
1891 * replaces unknown characters with U+FFFD, the Unicode replacement character.
1892 * You can search the result for the UTF-8 encoding of this character (which is
1893 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
1894 * encoding.
1895 *
1896 * You must pass the whole absolute pathname to this functions so that
1897 * translation of well known locations can be done.
1898 *
1899 * This function is preferred over g_filename_display_name() if you know the
1900 * whole path, as it allows translation.
1901 *
1902 * Returns: a newly allocated string containing
1903 * a rendition of the basename of the filename in valid UTF-8
1904 *
1905 * Since: 2.6
1906 **/
1907 gchar *
g_filename_display_basename(const gchar * filename)1908 g_filename_display_basename (const gchar *filename)
1909 {
1910 char *basename;
1911 char *display_name;
1912
1913 g_return_val_if_fail (filename != NULL, NULL);
1914
1915 basename = g_path_get_basename (filename);
1916 display_name = g_filename_display_name (basename);
1917 g_free (basename);
1918 return display_name;
1919 }
1920
1921 /**
1922 * g_filename_display_name:
1923 * @filename: (type filename): a pathname hopefully in the
1924 * GLib file name encoding
1925 *
1926 * Converts a filename into a valid UTF-8 string. The conversion is
1927 * not necessarily reversible, so you should keep the original around
1928 * and use the return value of this function only for display purposes.
1929 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL
1930 * even if the filename actually isn't in the GLib file name encoding.
1931 *
1932 * If GLib cannot make sense of the encoding of @filename, as a last resort it
1933 * replaces unknown characters with U+FFFD, the Unicode replacement character.
1934 * You can search the result for the UTF-8 encoding of this character (which is
1935 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
1936 * encoding.
1937 *
1938 * If you know the whole pathname of the file you should use
1939 * g_filename_display_basename(), since that allows location-based
1940 * translation of filenames.
1941 *
1942 * Returns: a newly allocated string containing
1943 * a rendition of the filename in valid UTF-8
1944 *
1945 * Since: 2.6
1946 **/
1947 gchar *
g_filename_display_name(const gchar * filename)1948 g_filename_display_name (const gchar *filename)
1949 {
1950 gint i;
1951 const gchar **charsets;
1952 gchar *display_name = NULL;
1953 gboolean is_utf8;
1954
1955 is_utf8 = g_get_filename_charsets (&charsets);
1956
1957 if (is_utf8)
1958 {
1959 if (g_utf8_validate (filename, -1, NULL))
1960 display_name = g_strdup (filename);
1961 }
1962
1963 if (!display_name)
1964 {
1965 /* Try to convert from the filename charsets to UTF-8.
1966 * Skip the first charset if it is UTF-8.
1967 */
1968 for (i = is_utf8 ? 1 : 0; charsets[i]; i++)
1969 {
1970 display_name = g_convert (filename, -1, "UTF-8", charsets[i],
1971 NULL, NULL, NULL);
1972
1973 if (display_name)
1974 break;
1975 }
1976 }
1977
1978 /* if all conversions failed, we replace invalid UTF-8
1979 * by a question mark
1980 */
1981 if (!display_name)
1982 display_name = g_utf8_make_valid (filename, -1);
1983
1984 return display_name;
1985 }
1986
1987 #ifdef G_OS_WIN32
1988
1989 /* Binary compatibility versions. Not for newly compiled code. */
1990
1991 _GLIB_EXTERN gchar *g_filename_to_utf8_utf8 (const gchar *opsysstring,
1992 gssize len,
1993 gsize *bytes_read,
1994 gsize *bytes_written,
1995 GError **error) G_GNUC_MALLOC;
1996 _GLIB_EXTERN gchar *g_filename_from_utf8_utf8 (const gchar *utf8string,
1997 gssize len,
1998 gsize *bytes_read,
1999 gsize *bytes_written,
2000 GError **error) G_GNUC_MALLOC;
2001 _GLIB_EXTERN gchar *g_filename_from_uri_utf8 (const gchar *uri,
2002 gchar **hostname,
2003 GError **error) G_GNUC_MALLOC;
2004 _GLIB_EXTERN gchar *g_filename_to_uri_utf8 (const gchar *filename,
2005 const gchar *hostname,
2006 GError **error) G_GNUC_MALLOC;
2007
2008 gchar *
g_filename_to_utf8_utf8(const gchar * opsysstring,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)2009 g_filename_to_utf8_utf8 (const gchar *opsysstring,
2010 gssize len,
2011 gsize *bytes_read,
2012 gsize *bytes_written,
2013 GError **error)
2014 {
2015 return g_filename_to_utf8 (opsysstring, len, bytes_read, bytes_written, error);
2016 }
2017
2018 gchar *
g_filename_from_utf8_utf8(const gchar * utf8string,gssize len,gsize * bytes_read,gsize * bytes_written,GError ** error)2019 g_filename_from_utf8_utf8 (const gchar *utf8string,
2020 gssize len,
2021 gsize *bytes_read,
2022 gsize *bytes_written,
2023 GError **error)
2024 {
2025 return g_filename_from_utf8 (utf8string, len, bytes_read, bytes_written, error);
2026 }
2027
2028 gchar *
g_filename_from_uri_utf8(const gchar * uri,gchar ** hostname,GError ** error)2029 g_filename_from_uri_utf8 (const gchar *uri,
2030 gchar **hostname,
2031 GError **error)
2032 {
2033 return g_filename_from_uri (uri, hostname, error);
2034 }
2035
2036 gchar *
g_filename_to_uri_utf8(const gchar * filename,const gchar * hostname,GError ** error)2037 g_filename_to_uri_utf8 (const gchar *filename,
2038 const gchar *hostname,
2039 GError **error)
2040 {
2041 return g_filename_to_uri (filename, hostname, error);
2042 }
2043
2044 #endif
2045