1 /* Charset handling for GNU tar.
2 
3    Copyright 2004-2021 Free Software Foundation, Inc.
4 
5    This file is part of GNU tar.
6 
7    GNU tar is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    GNU tar is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include <system.h>
21 #include <quotearg.h>
22 #include <localcharset.h>
23 #include "common.h"
24 #ifdef HAVE_ICONV_H
25 # include <iconv.h>
26 #endif
27 
28 #ifndef ICONV_CONST
29 # define ICONV_CONST
30 #endif
31 
32 #ifndef HAVE_ICONV
33 
34 # undef iconv_open
35 # define iconv_open(tocode, fromcode) ((iconv_t) -1)
36 
37 # undef iconv
38 # define iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft) (errno = ENOSYS, (size_t) -1)
39 
40 # undef iconv_close
41 # define iconv_close(cd) 0
42 
43 # undef iconv_t
44 # define iconv_t int
45 
46 #endif
47 
48 
49 
50 
51 static iconv_t conv_desc[2] = { (iconv_t) -1, (iconv_t) -1 };
52 
53 static iconv_t
utf8_init(bool to_utf)54 utf8_init (bool to_utf)
55 {
56   if (conv_desc[(int) to_utf] == (iconv_t) -1)
57     {
58       if (to_utf)
59 	conv_desc[(int) to_utf] = iconv_open ("UTF-8", locale_charset ());
60       else
61 	conv_desc[(int) to_utf] = iconv_open (locale_charset (), "UTF-8");
62     }
63   return conv_desc[(int) to_utf];
64 }
65 
66 bool
utf8_convert(bool to_utf,char const * input,char ** output)67 utf8_convert (bool to_utf, char const *input, char **output)
68 {
69   char ICONV_CONST *ib;
70   char *ob, *ret;
71   size_t inlen;
72   size_t outlen;
73   iconv_t cd = utf8_init (to_utf);
74 
75   if (cd == 0)
76     {
77       *output = xstrdup (input);
78       return true;
79     }
80   else if (cd == (iconv_t)-1)
81     return false;
82 
83   inlen = strlen (input) + 1;
84   outlen = inlen * MB_LEN_MAX + 1;
85   ob = ret = xmalloc (outlen);
86   ib = (char ICONV_CONST *) input;
87   /* According to POSIX, "if iconv() encounters a character in the input
88      buffer that is valid, but for which an identical character does not
89      exist in the target codeset, iconv() shall perform an
90      implementation-defined conversion on this character." It will "update
91      the variables pointed to by the arguments to reflect the extent of the
92      conversion and return the number of non-identical conversions performed".
93      On error, it returns -1.
94      In other words, non-zero return always indicates failure, either because
95      the input was not fully converted, or because it was converted in a
96      non-reversible way.
97    */
98   if (iconv (cd, &ib, &inlen, &ob, &outlen) != 0)
99     {
100       free (ret);
101       return false;
102     }
103   *ob = 0;
104   *output = ret;
105   return true;
106 }
107 
108 
109 bool
string_ascii_p(char const * p)110 string_ascii_p (char const *p)
111 {
112   for (; *p; p++)
113     if (*p & ~0x7f)
114       return false;
115   return true;
116 }
117