1 /*
2  * ProFTPD - FTP server daemon
3  * Copyright (c) 2006-2015 The ProFTPD Project team
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA.
18  *
19  * As a special exemption, The ProFTPD Project team and other respective
20  * copyright holders give permission to link this program with OpenSSL, and
21  * distribute the resulting executable, without including the source code for
22  * OpenSSL in the source distribution.
23  */
24 
25 /* UTF8/charset encoding/decoding. */
26 
27 #include "conf.h"
28 
29 #ifdef PR_USE_NLS
30 
31 #ifdef HAVE_ICONV_H
32 # include <iconv.h>
33 #endif
34 
35 #ifdef HAVE_LANGINFO_H
36 # include <langinfo.h>
37 #endif
38 
39 #ifdef HAVE_ICONV_H
40 static iconv_t decode_conv = (iconv_t) -1;
41 static iconv_t encode_conv = (iconv_t) -1;
42 
43 static unsigned long encoding_policy = 0UL;
44 static const char *local_charset = NULL;
45 static const char *encoding = "UTF-8";
46 static int supports_telnet_iac = TRUE;
47 
48 static const char *trace_channel = "encode";
49 
str_convert(iconv_t conv,const char * inbuf,size_t * inbuflen,char * outbuf,size_t * outbuflen)50 static int str_convert(iconv_t conv, const char *inbuf, size_t *inbuflen,
51     char *outbuf, size_t *outbuflen) {
52 # ifdef HAVE_ICONV
53 
54   /* Reset the state machine before each conversion. */
55   (void) iconv(conv, NULL, NULL, NULL, NULL);
56 
57   while (*inbuflen > 0) {
58     size_t nconv;
59 
60     pr_signals_handle();
61 
62     /* Solaris/FreeBSD's iconv(3) takes a const char ** for the input buffer,
63      * whereas Linux/Mac OSX iconv(3) use char ** for the input buffer.
64      */
65 #if defined(LINUX) || defined(DARWIN6) || defined(DARWIN7) || \
66     defined(DARWIN8) || defined(DARWIN9) || defined(DARWIN10) || \
67     defined(DARWIN11) || defined(DARWIN12)
68 
69     nconv = iconv(conv, (char **) &inbuf, inbuflen, &outbuf, outbuflen);
70 #else
71     nconv = iconv(conv, &inbuf, inbuflen, &outbuf, outbuflen);
72 #endif
73 
74     if (nconv == (size_t) -1) {
75 
76       /* Note: an errno of EILSEQ here can indicate badly encoded strings OR
77        * (more likely) that the source character set used in the iconv_open(3)
78        * call for this iconv_t descriptor does not accurately describe the
79        * character encoding of the given string.  E.g. a filename may use
80        * the ISO8859-1 character set, but iconv_open(3) was called using
81        * US-ASCII.
82        */
83 
84       return -1;
85     }
86 
87     /* XXX We should let the loop condition work, rather than breaking out
88      * of the loop here.
89      */
90     break;
91   }
92 
93   return 0;
94 # else
95   errno = ENOSYS;
96   return -1;
97 # endif /* HAVE_ICONV */
98 }
99 #endif /* !HAVE_ICONV_H */
100 
101 #ifdef HAVE_ICONV
set_supports_telnet_iac(const char * codeset)102 static void set_supports_telnet_iac(const char *codeset) {
103 
104   /* The full list of character sets which use 0xFF could be obtained from
105    * the libiconv sources; for now, this list should contain the most
106    * commonly used character sets.
107    */
108 
109   if (strncasecmp(codeset, "CP1251", 7) == 0 ||
110       strncasecmp(codeset, "CP866", 6) == 0 ||
111       strncasecmp(codeset, "ISO-8859-1", 11) == 0 ||
112       strncasecmp(codeset, "KOI8-R", 7) == 0 ||
113       strncasecmp(codeset, "WINDOWS-1251", 13) == 0) {
114     supports_telnet_iac = FALSE;
115     return;
116   }
117 
118   supports_telnet_iac = TRUE;
119 }
120 #endif /* !HAVE_ICONV */
121 
encode_free(void)122 int encode_free(void) {
123 # ifdef HAVE_ICONV
124   int res = 0;
125 
126   /* Close the iconv handles. */
127   if (encode_conv != (iconv_t) -1) {
128     if (iconv_close(encode_conv) < 0) {
129       pr_trace_msg(trace_channel, 1,
130         "error closing conversion handle from '%s' to '%s': %s",
131         local_charset, encoding, strerror(errno));
132       res = -1;
133     }
134 
135     encode_conv = (iconv_t) -1;
136   }
137 
138   if (decode_conv != (iconv_t) -1) {
139     if (iconv_close(decode_conv) < 0) {
140       pr_trace_msg(trace_channel, 1,
141         "error closing conversion handle from '%s' to '%s': %s",
142         encoding, local_charset, strerror(errno));
143       res = -1;
144     }
145 
146     decode_conv = (iconv_t) -1;
147   }
148 
149   return res;
150 # else
151   errno = ENOSYS;
152   return -1;
153 # endif
154 }
155 
encode_init(void)156 int encode_init(void) {
157 
158   if (encoding == NULL) {
159     pr_trace_msg(trace_channel, 3, "no encoding configured");
160     return 0;
161   }
162 
163   if (local_charset == NULL) {
164     local_charset = pr_encode_get_local_charset();
165   }
166 
167   pr_log_debug(DEBUG10, "using '%s' as local charset for %s conversion",
168     local_charset, encoding);
169 
170 # ifdef HAVE_ICONV
171 
172   /* If the local charset matches the remote charset, then there's no point
173    * in converting; the charsets are the same.  Indeed, on some libiconv
174    * implementations, attempting to convert between the same charsets results
175    * in a tightly spinning CPU, or worse (see Bug#3272).
176    */
177   if (strcasecmp(local_charset, encoding) != 0) {
178 
179     /* Get the iconv handles. */
180     encode_conv = iconv_open(encoding, local_charset);
181     if (encode_conv == (iconv_t) -1) {
182       int xerrno = errno;
183 
184       pr_log_pri(PR_LOG_NOTICE, "error opening conversion handle "
185         "from '%s' to '%s': %s", local_charset, encoding, strerror(xerrno));
186 
187       errno = xerrno;
188       return -1;
189     }
190 
191     decode_conv = iconv_open(local_charset, encoding);
192     if (decode_conv == (iconv_t) -1) {
193       int xerrno = errno;
194 
195       pr_log_pri(PR_LOG_NOTICE, "error opening conversion handle "
196         "from '%s' to '%s': %s", encoding, local_charset, strerror(xerrno));
197 
198       (void) iconv_close(encode_conv);
199       encode_conv = (iconv_t) -1;
200 
201       errno = xerrno;
202       return -1;
203     }
204   }
205 
206   set_supports_telnet_iac(encoding);
207   return 0;
208 # else
209   errno = ENOSYS;
210   return -1;
211 # endif /* HAVE_ICONV */
212 }
213 
pr_decode_str(pool * p,const char * in,size_t inlen,size_t * outlen)214 char *pr_decode_str(pool *p, const char *in, size_t inlen, size_t *outlen) {
215 #ifdef HAVE_ICONV
216   size_t inbuflen, outbuflen, outbufsz;
217   char *inbuf, outbuf[PR_TUNABLE_PATH_MAX*2], *res = NULL;
218 
219   if (p == NULL ||
220       in == NULL ||
221       outlen == NULL) {
222     errno = EINVAL;
223     return NULL;
224   }
225 
226   /* If the local charset matches the remote charset, then there's no point
227    * in converting; the charsets are the same.  Indeed, on some libiconv
228    * implementations, attempting to convert between the same charsets results
229    * in a tightly spinning CPU (see Bug#3272).
230    */
231   if (local_charset != NULL &&
232       encoding != NULL &&
233       strcasecmp(local_charset, encoding) == 0) {
234     return pstrdup(p, in);
235   }
236 
237   if (decode_conv == (iconv_t) -1) {
238     pr_trace_msg(trace_channel, 1, "invalid decoding conversion handle, "
239       "unable to decode string");
240     return pstrdup(p, in);
241   }
242 
243   inbuf = pcalloc(p, inlen);
244   memcpy(inbuf, in, inlen);
245   inbuflen = inlen;
246 
247   outbuflen = sizeof(outbuf);
248 
249   if (str_convert(decode_conv, inbuf, &inbuflen, outbuf, &outbuflen) < 0) {
250     return NULL;
251   }
252 
253   *outlen = sizeof(outbuf) - outbuflen;
254 
255   /* We allocate one byte more, for a terminating NUL. */
256   outbufsz = sizeof(outbuf) - outbuflen + 1;
257   res = pcalloc(p, outbufsz);
258 
259   memcpy(res, outbuf, *outlen);
260 
261   return res;
262 #else
263   pr_trace_msg(trace_channel, 1,
264     "missing iconv support, no %s decoding possible", encoding);
265   return pstrdup(p, in);
266 #endif /* !HAVE_ICONV */
267 }
268 
pr_encode_str(pool * p,const char * in,size_t inlen,size_t * outlen)269 char *pr_encode_str(pool *p, const char *in, size_t inlen, size_t *outlen) {
270 #ifdef HAVE_ICONV
271   size_t inbuflen, outbuflen, outbufsz;
272   char *inbuf, outbuf[PR_TUNABLE_PATH_MAX*2], *res;
273 
274   if (p == NULL ||
275       in == NULL ||
276       outlen == NULL) {
277     errno = EINVAL;
278     return NULL;
279   }
280 
281   /* If the local charset matches the remote charset, then there's no point
282    * in converting; the charsets are the same.  Indeed, on some libiconv
283    * implementations, attempting to convert between the same charsets results
284    * in a tightly spinning CPU (see Bug#3272).
285    */
286   if (local_charset != NULL &&
287       encoding != NULL &&
288       strcasecmp(local_charset, encoding) == 0) {
289     return pstrdup(p, in);
290   }
291 
292   if (encode_conv == (iconv_t) -1) {
293     pr_trace_msg(trace_channel, 1, "invalid encoding conversion handle, "
294       "unable to encode string");
295     return pstrdup(p, in);
296   }
297 
298   inbuf = pcalloc(p, inlen);
299   memcpy(inbuf, in, inlen);
300   inbuflen = inlen;
301 
302   outbuflen = sizeof(outbuf);
303 
304   if (str_convert(encode_conv, inbuf, &inbuflen, outbuf, &outbuflen) < 0) {
305     return NULL;
306   }
307 
308   *outlen = sizeof(outbuf) - outbuflen;
309 
310   /* We allocate one byte more, for a terminating NUL. */
311   outbufsz = sizeof(outbuf) - outbuflen + 1;
312 
313   res = pcalloc(p, outbufsz);
314   memcpy(res, outbuf, *outlen);
315 
316   return res;
317 #else
318   pr_trace_msg(trace_channel, 1,
319     "missing iconv support, no %s encoding possible", encoding);
320   return pstrdup(p, in);
321 #endif /* !HAVE_ICONV */
322 }
323 
pr_encode_disable_encoding(void)324 void pr_encode_disable_encoding(void) {
325 #ifdef HAVE_ICONV_H
326   pr_trace_msg(trace_channel, 8, "%s encoding disabled", encoding);
327   (void) encode_free();
328   encoding = NULL;
329 #endif
330 }
331 
332 /* Enables runtime use of encoding using the specified character set (assuming
333  * NLS is supported).  Note that "UTF8", "utf8", "utf-8", and "UTF-8" are
334  * accepted "character set" designations.
335  */
pr_encode_enable_encoding(const char * codeset)336 int pr_encode_enable_encoding(const char *codeset) {
337 #ifdef HAVE_ICONV_H
338   int res;
339 
340   if (codeset == NULL) {
341     errno = EINVAL;
342     return -1;
343   }
344 
345   if (encoding != NULL &&
346       strcasecmp(encoding, codeset) == 0) {
347     pr_trace_msg(trace_channel, 5, "'%s' encoding already being used", codeset);
348     return 0;
349   }
350 
351   if (encoding) {
352     pr_trace_msg(trace_channel, 5,
353       "attempting to switch encoding from %s to %s", encoding, codeset);
354 
355   } else {
356     pr_trace_msg(trace_channel, 5, "attempting to enable %s encoding", codeset);
357   }
358 
359   (void) encode_free();
360   encoding = pstrdup(permanent_pool, codeset);
361 
362   res = encode_init();
363   if (res < 0) {
364     int xerrno = errno;
365 
366     pr_trace_msg(trace_channel, 1,
367       "failed to initialize encoding for %s, disabling encoding: %s", codeset,
368       strerror(xerrno));
369 
370     encoding = NULL;
371     errno = xerrno;
372   }
373 
374   return res;
375 
376 #else
377   errno = ENOSYS;
378   return -1;
379 #endif /* !HAVE_ICONV_H */
380 }
381 
pr_encode_get_policy(void)382 unsigned long pr_encode_get_policy(void) {
383   return encoding_policy;
384 }
385 
pr_encode_set_policy(unsigned long policy)386 int pr_encode_set_policy(unsigned long policy) {
387   encoding_policy = policy;
388   return 0;
389 }
390 
pr_encode_get_local_charset(void)391 const char *pr_encode_get_local_charset(void) {
392   const char *charset = NULL;
393 
394 #ifdef HAVE_NL_LANGINFO
395   /* Look up the current charset.  If there's a problem, default to
396    * UCS-2.  Make sure we pick up the locale of the environment.
397    */
398   charset = nl_langinfo(CODESET);
399   if (charset == NULL ||
400       strlen(charset) == 0) {
401     charset = "UTF-8";
402     pr_trace_msg(trace_channel, 1,
403       "unable to determine locale, defaulting to 'UTF-8' for %s conversion",
404       encoding);
405 
406   } else {
407 
408     /* Workaround a stupid bug in many implementations where nl_langinfo()
409      * returns "646" to mean "US-ASCII".  The problem is that iconv_open(3)
410      * doesn't accept "646" as an acceptable encoding.
411      */
412     if (strncmp(charset, "646", 4) == 0) {
413       charset = "US-ASCII";
414     }
415 
416     pr_trace_msg(trace_channel, 1,
417       "converting %s to local character set '%s'", encoding, charset);
418     }
419 #else
420   charset = "UTF-8";
421   pr_trace_msg(trace_channel, 1,
422     "nl_langinfo(3) not supported, defaulting to using 'UTF-8' for "
423     "%s conversion", encoding);
424 #endif /* HAVE_NL_LANGINFO */
425 
426   return charset;
427 }
428 
pr_encode_get_charset(void)429 const char *pr_encode_get_charset(void) {
430 #ifdef HAVE_ICONV_H
431   return local_charset;
432 
433 #else
434   errno = ENOSYS;
435   return NULL;
436 #endif /* !HAVE_ICONV_H */
437 }
438 
pr_encode_get_encoding(void)439 const char *pr_encode_get_encoding(void) {
440 #ifdef HAVE_ICONV_H
441   return encoding;
442 
443 #else
444   errno = ENOSYS;
445   return NULL;
446 #endif /* !HAVE_ICONV_H */
447 }
448 
pr_encode_set_charset_encoding(const char * charset,const char * codeset)449 int pr_encode_set_charset_encoding(const char *charset, const char *codeset) {
450 #ifdef HAVE_ICONV_H
451   int res;
452 
453   if (charset == NULL ||
454       codeset == NULL) {
455     errno = EINVAL;
456     return -1;
457   }
458 
459   if (local_charset) {
460     pr_trace_msg(trace_channel, 5,
461       "attempting to switch local charset from %s to %s", local_charset,
462       charset);
463 
464   } else {
465     pr_trace_msg(trace_channel, 5, "attempting to use %s as local charset",
466       charset);
467   }
468 
469   if (encoding) {
470     pr_trace_msg(trace_channel, 5,
471       "attempting to switch encoding from %s to %s", encoding, codeset);
472 
473   } else {
474     pr_trace_msg(trace_channel, 5, "attempting to use %s encoding", codeset);
475   }
476 
477   (void) encode_free();
478 
479   local_charset = pstrdup(permanent_pool, charset);
480   encoding = pstrdup(permanent_pool, codeset);
481 
482   res = encode_init();
483   if (res < 0) {
484     int xerrno = errno;
485 
486     pr_trace_msg(trace_channel, 1,
487       "failed to initialize encoding for local charset %s, encoding %s, "
488       "disabling encoding", charset, codeset);
489     local_charset = NULL;
490     encoding = NULL;
491 
492     errno = xerrno;
493   }
494 
495   return res;
496 
497 #else
498   errno = ENOSYS;
499   return -1;
500 #endif /* !HAVE_ICONV_H */
501 }
502 
pr_encode_is_utf8(const char * codeset)503 int pr_encode_is_utf8(const char *codeset) {
504   if (codeset == NULL) {
505     errno = EINVAL;
506     return -1;
507   }
508 
509   if (strncasecmp(codeset, "UTF8", 5) == 0 ||
510       strncasecmp(codeset, "UTF-8", 6) == 0) {
511     return TRUE;
512   }
513 
514   return FALSE;
515 }
516 
pr_encode_supports_telnet_iac(void)517 int pr_encode_supports_telnet_iac(void) {
518   return supports_telnet_iac;
519 }
520 
521 #endif /* PR_USE_NLS */
522