1 /*
2  * ProFTPD - mod_sftp UTF8 encoding
3  * Copyright (c) 2008-2017 TJ Saunders
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA.
18  *
19  * As a special exemption, TJ Saunders and other respective copyright holders
20  * give permission to link this program with OpenSSL, and distribute the
21  * resulting executable, without including the source code for OpenSSL in the
22  * source distribution.
23  */
24 
25 #include "mod_sftp.h"
26 #include "utf8.h"
27 
28 #ifdef HAVE_ICONV_H
29 # include <iconv.h>
30 #endif
31 
32 #ifdef HAVE_LANGINFO_H
33 # include <langinfo.h>
34 #endif
35 
36 static const char *local_charset = NULL;
37 
38 #if defined(PR_USE_NLS) && defined(HAVE_ICONV_H)
39 static iconv_t decode_conv = (iconv_t) -1;
40 static iconv_t encode_conv = (iconv_t) -1;
41 
utf8_convert(iconv_t conv,const char * inbuf,size_t * inbuflen,char * outbuf,size_t * outbuflen)42 static int utf8_convert(iconv_t conv, const char *inbuf, size_t *inbuflen,
43     char *outbuf, size_t *outbuflen) {
44 # ifdef HAVE_ICONV
45 
46   /* Reset the state machine before each conversion. */
47   (void) iconv(conv, NULL, NULL, NULL, NULL);
48 
49   while (*inbuflen > 0) {
50     size_t nconv;
51 
52     pr_signals_handle();
53 
54     /* Solaris/FreeBSD's iconv(3) takes a const char ** for the input buffer,
55      * whereas Linux/Mac OSX iconv(3) use char ** for the input buffer.
56      */
57 #if defined(LINUX) || defined(DARWIN6) || defined(DARWIN7) || \
58     defined(DARWIN8) || defined(DARWIN9) || defined(DARWIN10) || \
59     defined(DARWIN11) || defined(DARWIN12)
60 
61     nconv = iconv(conv, (char **) &inbuf, inbuflen, &outbuf, outbuflen);
62 #else
63     nconv = iconv(conv, &inbuf, inbuflen, &outbuf, outbuflen);
64 #endif
65 
66     if (nconv == (size_t) -1) {
67 
68       /* Note: an errno of EILSEQ here can indicate badly encoded strings OR
69        * (more likely) that the source character set used in the iconv_open(3)
70        * call for this iconv_t descriptor does not accurately describe the
71        * character encoding of the given string.  E.g. a filename may use
72        * the ISO8859-1 character set, but iconv_open(3) was called using
73        * US-ASCII.
74        */
75 
76       return -1;
77     }
78 
79     /* XXX We should let the loop condition work, rather than breaking out
80      * of the loop here.
81      */
82     break;
83   }
84 
85   return 0;
86 
87 # else
88   errno = ENOSYS;
89   return -1;
90 # endif /* HAVE_ICONV */
91 }
92 #endif /* !PR_USE_NLS && !HAVE_ICONV_H */
93 
sftp_utf8_set_charset(const char * charset)94 int sftp_utf8_set_charset(const char *charset) {
95   int res;
96 
97   if (charset == NULL) {
98     errno = EINVAL;
99     return -1;
100   }
101 
102   if (local_charset) {
103     pr_trace_msg("sftp", 5,
104       "attempting to switch local charset from %s to %s", local_charset,
105       charset);
106 
107   } else {
108     pr_trace_msg("sftp", 5, "attempting to use %s as local charset", charset);
109   }
110 
111   (void) sftp_utf8_free();
112 
113   local_charset = pstrdup(permanent_pool, charset);
114 
115   res = sftp_utf8_init();
116   if (res < 0) {
117     pr_trace_msg("sftp", 1,
118       "failed to initialize encoding for local charset %s", charset);
119     local_charset = NULL;
120     return -1;
121   }
122 
123   return res;
124 }
125 
sftp_utf8_free(void)126 int sftp_utf8_free(void) {
127 # if defined(PR_USE_NLS) && defined(HAVE_ICONV)
128   int res = 0;
129 
130   /* Close the iconv handles. */
131   if (encode_conv != (iconv_t) -1) {
132     res = iconv_close(encode_conv);
133     if (res < 0) {
134       pr_trace_msg("sftp", 1,
135         "error closing encoding conversion handle from '%s' to '%s': %s",
136           local_charset, "UTF-8", strerror(errno));
137       res = -1;
138     }
139 
140     encode_conv = (iconv_t) -1;
141   }
142 
143   if (decode_conv != (iconv_t) -1) {
144     res = iconv_close(decode_conv);
145     if (res < 0) {
146       pr_trace_msg("sftp", 1,
147         "error closing decoding conversion handle from '%s' to '%s': %s",
148           "UTF-8", local_charset, strerror(errno));
149       res = -1;
150     }
151 
152     decode_conv = (iconv_t) -1;
153   }
154 
155   return res;
156 # else
157   errno = ENOSYS;
158   return -1;
159 # endif
160 }
161 
sftp_utf8_init(void)162 int sftp_utf8_init(void) {
163 #if defined(PR_USE_NLS) && defined(HAVE_ICONV)
164 
165   if (local_charset == NULL) {
166     local_charset = pr_encode_get_local_charset();
167 
168   } else {
169     pr_trace_msg("sftp", 3,
170       "using '%s' as local charset for UTF8 conversion", local_charset);
171   }
172 
173   /* Get the iconv handles. */
174   encode_conv = iconv_open("UTF-8", local_charset);
175   if (encode_conv == (iconv_t) -1) {
176     pr_trace_msg("sftp", 1, "error opening conversion handle from '%s' "
177       "to '%s': %s", local_charset, "UTF-8", strerror(errno));
178     return -1;
179   }
180 
181   decode_conv = iconv_open(local_charset, "UTF-8");
182   if (decode_conv == (iconv_t) -1) {
183     int xerrno = errno;
184 
185     pr_trace_msg("sftp", 1, "error opening conversion handle from '%s' "
186       "to '%s': %s", "UTF-8", local_charset, strerror(errno));
187 
188     (void) iconv_close(encode_conv);
189     encode_conv = (iconv_t) -1;
190 
191     errno = xerrno;
192     return -1;
193   }
194 
195   return 0;
196 # else
197   errno = ENOSYS;
198   return -1;
199 #endif /* HAVE_ICONV */
200 }
201 
sftp_utf8_decode_str(pool * p,const char * str)202 char *sftp_utf8_decode_str(pool *p, const char *str) {
203 #if defined(PR_USE_NLS) && defined(HAVE_ICONV_H)
204   size_t inlen, inbuflen, outlen, outbuflen;
205   char *inbuf, outbuf[PR_TUNABLE_PATH_MAX*2], *res = NULL;
206 
207   if (p == NULL ||
208       str == NULL) {
209     errno = EINVAL;
210     return NULL;
211   }
212 
213   if (decode_conv == (iconv_t) -1) {
214     pr_trace_msg("sftp", 1, "decoding conversion handle is invalid, unable to "
215       "decode UTF8 string");
216     return (char *) str;
217   }
218 
219   /* If the local charset matches the remote charset (i.e. local_charset is
220    * "UTF-8"), then there's no point in converting; the charsets are the
221    * same.  Indeed, on some libiconv implementations, attempting to
222    * convert between the same charsets results in a tightly spinning CPU
223    * (see Bug#3272).
224    */
225   if (strncasecmp(local_charset, "UTF-8", 6) == 0) {
226     return (char *) str;
227   }
228 
229   inlen = strlen(str) + 1;
230   inbuf = pcalloc(p, inlen);
231   memcpy(inbuf, str, inlen);
232   inbuflen = inlen;
233 
234   outbuflen = sizeof(outbuf);
235 
236   if (utf8_convert(decode_conv, inbuf, &inbuflen, outbuf, &outbuflen) < 0) {
237     pr_trace_msg("sftp", 1, "error decoding string: %s", strerror(errno));
238 
239     if (pr_trace_get_level("sftp") >= 14) {
240       /* Write out the string we tried (and failed) to decode, in hex. */
241       register unsigned int i;
242       unsigned char *raw_str;
243       size_t len, raw_len;
244 
245       len = strlen(str);
246       raw_len = (len * 5) + 1;
247       raw_str = pcalloc(p, raw_len + 1);
248 
249       for (i = 0; i < len; i++) {
250         pr_snprintf((char *) (raw_str + (i * 5)), (raw_len - 1) - (i * 5),
251           "0x%02x ", (unsigned char) str[i]);
252       }
253 
254       pr_trace_msg("sftp", 14, "unable to decode string (raw bytes): %s",
255         raw_str);
256     }
257 
258     return (char *) str;
259   }
260 
261   outlen = sizeof(outbuf) - outbuflen;
262   res = pcalloc(p, outlen);
263   memcpy(res, outbuf, outlen);
264 
265   return res;
266 #else
267   return pstrdup(p, str);
268 #endif /* !PR_USE_NLS && !HAVE_ICONV_H */
269 }
270 
sftp_utf8_encode_str(pool * p,const char * str)271 char *sftp_utf8_encode_str(pool *p, const char *str) {
272 #if defined(PR_USE_NLS) && defined(HAVE_ICONV_H)
273   size_t inlen, inbuflen, outlen, outbuflen;
274   char *inbuf, outbuf[PR_TUNABLE_PATH_MAX*2], *res;
275 
276   if (p == NULL ||
277       str == NULL) {
278     errno = EINVAL;
279     return NULL;
280   }
281 
282   if (encode_conv == (iconv_t) -1) {
283     pr_trace_msg("sftp", 1, "encoding conversion handle is invalid, unable to "
284       "encode UTF8 string");
285     return (char *) str;
286   }
287 
288   inlen = strlen(str) + 1;
289   inbuf = pcalloc(p, inlen);
290   memcpy(inbuf, str, inlen);
291   inbuflen = inlen;
292 
293   outbuflen = sizeof(outbuf);
294 
295   if (utf8_convert(encode_conv, inbuf, &inbuflen, outbuf, &outbuflen) < 0) {
296     pr_trace_msg("sftp", 1, "error encoding string: %s", strerror(errno));
297 
298     if (pr_trace_get_level("sftp") >= 14) {
299       /* Write out the string we tried (and failed) to encode, in hex. */
300       register unsigned int i;
301       unsigned char *raw_str;
302       size_t len, raw_len;
303 
304       len = strlen(str);
305       raw_len = (len * 5) + 1;
306       raw_str = pcalloc(p, raw_len + 1);
307 
308       for (i = 0; i < len; i++) {
309         pr_snprintf((char *) (raw_str + (i * 5)), (raw_len - 1) - (i * 5),
310           "0x%02x ", (unsigned char) str[i]);
311       }
312 
313       pr_trace_msg("sftp", 14, "unable to encode string (raw bytes): %s",
314         raw_str);
315     }
316 
317     return (char *) str;
318   }
319 
320   outlen = sizeof(outbuf) - outbuflen;
321   res = pcalloc(p, outlen);
322   memcpy(res, outbuf, outlen);
323 
324   return res;
325 #else
326   return pstrdup(p, str);
327 #endif /* !PR_USE_NLS && !HAVE_ICONV_H */
328 }
329