1 /*
2 * ProFTPD - FTP server daemon
3 * Copyright (c) 2006-2015 The ProFTPD Project team
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Suite 500, Boston, MA 02110-1335, USA.
18 *
19 * As a special exemption, The ProFTPD Project team and other respective
20 * copyright holders give permission to link this program with OpenSSL, and
21 * distribute the resulting executable, without including the source code for
22 * OpenSSL in the source distribution.
23 */
24
25 /* UTF8/charset encoding/decoding. */
26
27 #include "conf.h"
28
29 #ifdef PR_USE_NLS
30
31 #ifdef HAVE_ICONV_H
32 # include <iconv.h>
33 #endif
34
35 #ifdef HAVE_LANGINFO_H
36 # include <langinfo.h>
37 #endif
38
39 #ifdef HAVE_ICONV_H
40 static iconv_t decode_conv = (iconv_t) -1;
41 static iconv_t encode_conv = (iconv_t) -1;
42
43 static unsigned long encoding_policy = 0UL;
44 static const char *local_charset = NULL;
45 static const char *encoding = "UTF-8";
46 static int supports_telnet_iac = TRUE;
47
48 static const char *trace_channel = "encode";
49
str_convert(iconv_t conv,const char * inbuf,size_t * inbuflen,char * outbuf,size_t * outbuflen)50 static int str_convert(iconv_t conv, const char *inbuf, size_t *inbuflen,
51 char *outbuf, size_t *outbuflen) {
52 # ifdef HAVE_ICONV
53
54 /* Reset the state machine before each conversion. */
55 (void) iconv(conv, NULL, NULL, NULL, NULL);
56
57 while (*inbuflen > 0) {
58 size_t nconv;
59
60 pr_signals_handle();
61
62 /* Solaris/FreeBSD's iconv(3) takes a const char ** for the input buffer,
63 * whereas Linux/Mac OSX iconv(3) use char ** for the input buffer.
64 */
65 #if defined(LINUX) || defined(DARWIN6) || defined(DARWIN7) || \
66 defined(DARWIN8) || defined(DARWIN9) || defined(DARWIN10) || \
67 defined(DARWIN11) || defined(DARWIN12)
68
69 nconv = iconv(conv, (char **) &inbuf, inbuflen, &outbuf, outbuflen);
70 #else
71 nconv = iconv(conv, &inbuf, inbuflen, &outbuf, outbuflen);
72 #endif
73
74 if (nconv == (size_t) -1) {
75
76 /* Note: an errno of EILSEQ here can indicate badly encoded strings OR
77 * (more likely) that the source character set used in the iconv_open(3)
78 * call for this iconv_t descriptor does not accurately describe the
79 * character encoding of the given string. E.g. a filename may use
80 * the ISO8859-1 character set, but iconv_open(3) was called using
81 * US-ASCII.
82 */
83
84 return -1;
85 }
86
87 /* XXX We should let the loop condition work, rather than breaking out
88 * of the loop here.
89 */
90 break;
91 }
92
93 return 0;
94 # else
95 errno = ENOSYS;
96 return -1;
97 # endif /* HAVE_ICONV */
98 }
99 #endif /* !HAVE_ICONV_H */
100
101 #ifdef HAVE_ICONV
set_supports_telnet_iac(const char * codeset)102 static void set_supports_telnet_iac(const char *codeset) {
103
104 /* The full list of character sets which use 0xFF could be obtained from
105 * the libiconv sources; for now, this list should contain the most
106 * commonly used character sets.
107 */
108
109 if (strncasecmp(codeset, "CP1251", 7) == 0 ||
110 strncasecmp(codeset, "CP866", 6) == 0 ||
111 strncasecmp(codeset, "ISO-8859-1", 11) == 0 ||
112 strncasecmp(codeset, "KOI8-R", 7) == 0 ||
113 strncasecmp(codeset, "WINDOWS-1251", 13) == 0) {
114 supports_telnet_iac = FALSE;
115 return;
116 }
117
118 supports_telnet_iac = TRUE;
119 }
120 #endif /* !HAVE_ICONV */
121
encode_free(void)122 int encode_free(void) {
123 # ifdef HAVE_ICONV
124 int res = 0;
125
126 /* Close the iconv handles. */
127 if (encode_conv != (iconv_t) -1) {
128 if (iconv_close(encode_conv) < 0) {
129 pr_trace_msg(trace_channel, 1,
130 "error closing conversion handle from '%s' to '%s': %s",
131 local_charset, encoding, strerror(errno));
132 res = -1;
133 }
134
135 encode_conv = (iconv_t) -1;
136 }
137
138 if (decode_conv != (iconv_t) -1) {
139 if (iconv_close(decode_conv) < 0) {
140 pr_trace_msg(trace_channel, 1,
141 "error closing conversion handle from '%s' to '%s': %s",
142 encoding, local_charset, strerror(errno));
143 res = -1;
144 }
145
146 decode_conv = (iconv_t) -1;
147 }
148
149 return res;
150 # else
151 errno = ENOSYS;
152 return -1;
153 # endif
154 }
155
encode_init(void)156 int encode_init(void) {
157
158 if (encoding == NULL) {
159 pr_trace_msg(trace_channel, 3, "no encoding configured");
160 return 0;
161 }
162
163 if (local_charset == NULL) {
164 local_charset = pr_encode_get_local_charset();
165 }
166
167 pr_log_debug(DEBUG10, "using '%s' as local charset for %s conversion",
168 local_charset, encoding);
169
170 # ifdef HAVE_ICONV
171
172 /* If the local charset matches the remote charset, then there's no point
173 * in converting; the charsets are the same. Indeed, on some libiconv
174 * implementations, attempting to convert between the same charsets results
175 * in a tightly spinning CPU, or worse (see Bug#3272).
176 */
177 if (strcasecmp(local_charset, encoding) != 0) {
178
179 /* Get the iconv handles. */
180 encode_conv = iconv_open(encoding, local_charset);
181 if (encode_conv == (iconv_t) -1) {
182 int xerrno = errno;
183
184 pr_log_pri(PR_LOG_NOTICE, "error opening conversion handle "
185 "from '%s' to '%s': %s", local_charset, encoding, strerror(xerrno));
186
187 errno = xerrno;
188 return -1;
189 }
190
191 decode_conv = iconv_open(local_charset, encoding);
192 if (decode_conv == (iconv_t) -1) {
193 int xerrno = errno;
194
195 pr_log_pri(PR_LOG_NOTICE, "error opening conversion handle "
196 "from '%s' to '%s': %s", encoding, local_charset, strerror(xerrno));
197
198 (void) iconv_close(encode_conv);
199 encode_conv = (iconv_t) -1;
200
201 errno = xerrno;
202 return -1;
203 }
204 }
205
206 set_supports_telnet_iac(encoding);
207 return 0;
208 # else
209 errno = ENOSYS;
210 return -1;
211 # endif /* HAVE_ICONV */
212 }
213
pr_decode_str(pool * p,const char * in,size_t inlen,size_t * outlen)214 char *pr_decode_str(pool *p, const char *in, size_t inlen, size_t *outlen) {
215 #ifdef HAVE_ICONV
216 size_t inbuflen, outbuflen, outbufsz;
217 char *inbuf, outbuf[PR_TUNABLE_PATH_MAX*2], *res = NULL;
218
219 if (p == NULL ||
220 in == NULL ||
221 outlen == NULL) {
222 errno = EINVAL;
223 return NULL;
224 }
225
226 /* If the local charset matches the remote charset, then there's no point
227 * in converting; the charsets are the same. Indeed, on some libiconv
228 * implementations, attempting to convert between the same charsets results
229 * in a tightly spinning CPU (see Bug#3272).
230 */
231 if (local_charset != NULL &&
232 encoding != NULL &&
233 strcasecmp(local_charset, encoding) == 0) {
234 return pstrdup(p, in);
235 }
236
237 if (decode_conv == (iconv_t) -1) {
238 pr_trace_msg(trace_channel, 1, "invalid decoding conversion handle, "
239 "unable to decode string");
240 return pstrdup(p, in);
241 }
242
243 inbuf = pcalloc(p, inlen);
244 memcpy(inbuf, in, inlen);
245 inbuflen = inlen;
246
247 outbuflen = sizeof(outbuf);
248
249 if (str_convert(decode_conv, inbuf, &inbuflen, outbuf, &outbuflen) < 0) {
250 return NULL;
251 }
252
253 *outlen = sizeof(outbuf) - outbuflen;
254
255 /* We allocate one byte more, for a terminating NUL. */
256 outbufsz = sizeof(outbuf) - outbuflen + 1;
257 res = pcalloc(p, outbufsz);
258
259 memcpy(res, outbuf, *outlen);
260
261 return res;
262 #else
263 pr_trace_msg(trace_channel, 1,
264 "missing iconv support, no %s decoding possible", encoding);
265 return pstrdup(p, in);
266 #endif /* !HAVE_ICONV */
267 }
268
pr_encode_str(pool * p,const char * in,size_t inlen,size_t * outlen)269 char *pr_encode_str(pool *p, const char *in, size_t inlen, size_t *outlen) {
270 #ifdef HAVE_ICONV
271 size_t inbuflen, outbuflen, outbufsz;
272 char *inbuf, outbuf[PR_TUNABLE_PATH_MAX*2], *res;
273
274 if (p == NULL ||
275 in == NULL ||
276 outlen == NULL) {
277 errno = EINVAL;
278 return NULL;
279 }
280
281 /* If the local charset matches the remote charset, then there's no point
282 * in converting; the charsets are the same. Indeed, on some libiconv
283 * implementations, attempting to convert between the same charsets results
284 * in a tightly spinning CPU (see Bug#3272).
285 */
286 if (local_charset != NULL &&
287 encoding != NULL &&
288 strcasecmp(local_charset, encoding) == 0) {
289 return pstrdup(p, in);
290 }
291
292 if (encode_conv == (iconv_t) -1) {
293 pr_trace_msg(trace_channel, 1, "invalid encoding conversion handle, "
294 "unable to encode string");
295 return pstrdup(p, in);
296 }
297
298 inbuf = pcalloc(p, inlen);
299 memcpy(inbuf, in, inlen);
300 inbuflen = inlen;
301
302 outbuflen = sizeof(outbuf);
303
304 if (str_convert(encode_conv, inbuf, &inbuflen, outbuf, &outbuflen) < 0) {
305 return NULL;
306 }
307
308 *outlen = sizeof(outbuf) - outbuflen;
309
310 /* We allocate one byte more, for a terminating NUL. */
311 outbufsz = sizeof(outbuf) - outbuflen + 1;
312
313 res = pcalloc(p, outbufsz);
314 memcpy(res, outbuf, *outlen);
315
316 return res;
317 #else
318 pr_trace_msg(trace_channel, 1,
319 "missing iconv support, no %s encoding possible", encoding);
320 return pstrdup(p, in);
321 #endif /* !HAVE_ICONV */
322 }
323
pr_encode_disable_encoding(void)324 void pr_encode_disable_encoding(void) {
325 #ifdef HAVE_ICONV_H
326 pr_trace_msg(trace_channel, 8, "%s encoding disabled", encoding);
327 (void) encode_free();
328 encoding = NULL;
329 #endif
330 }
331
332 /* Enables runtime use of encoding using the specified character set (assuming
333 * NLS is supported). Note that "UTF8", "utf8", "utf-8", and "UTF-8" are
334 * accepted "character set" designations.
335 */
pr_encode_enable_encoding(const char * codeset)336 int pr_encode_enable_encoding(const char *codeset) {
337 #ifdef HAVE_ICONV_H
338 int res;
339
340 if (codeset == NULL) {
341 errno = EINVAL;
342 return -1;
343 }
344
345 if (encoding != NULL &&
346 strcasecmp(encoding, codeset) == 0) {
347 pr_trace_msg(trace_channel, 5, "'%s' encoding already being used", codeset);
348 return 0;
349 }
350
351 if (encoding) {
352 pr_trace_msg(trace_channel, 5,
353 "attempting to switch encoding from %s to %s", encoding, codeset);
354
355 } else {
356 pr_trace_msg(trace_channel, 5, "attempting to enable %s encoding", codeset);
357 }
358
359 (void) encode_free();
360 encoding = pstrdup(permanent_pool, codeset);
361
362 res = encode_init();
363 if (res < 0) {
364 int xerrno = errno;
365
366 pr_trace_msg(trace_channel, 1,
367 "failed to initialize encoding for %s, disabling encoding: %s", codeset,
368 strerror(xerrno));
369
370 encoding = NULL;
371 errno = xerrno;
372 }
373
374 return res;
375
376 #else
377 errno = ENOSYS;
378 return -1;
379 #endif /* !HAVE_ICONV_H */
380 }
381
pr_encode_get_policy(void)382 unsigned long pr_encode_get_policy(void) {
383 return encoding_policy;
384 }
385
pr_encode_set_policy(unsigned long policy)386 int pr_encode_set_policy(unsigned long policy) {
387 encoding_policy = policy;
388 return 0;
389 }
390
pr_encode_get_local_charset(void)391 const char *pr_encode_get_local_charset(void) {
392 const char *charset = NULL;
393
394 #ifdef HAVE_NL_LANGINFO
395 /* Look up the current charset. If there's a problem, default to
396 * UCS-2. Make sure we pick up the locale of the environment.
397 */
398 charset = nl_langinfo(CODESET);
399 if (charset == NULL ||
400 strlen(charset) == 0) {
401 charset = "UTF-8";
402 pr_trace_msg(trace_channel, 1,
403 "unable to determine locale, defaulting to 'UTF-8' for %s conversion",
404 encoding);
405
406 } else {
407
408 /* Workaround a stupid bug in many implementations where nl_langinfo()
409 * returns "646" to mean "US-ASCII". The problem is that iconv_open(3)
410 * doesn't accept "646" as an acceptable encoding.
411 */
412 if (strncmp(charset, "646", 4) == 0) {
413 charset = "US-ASCII";
414 }
415
416 pr_trace_msg(trace_channel, 1,
417 "converting %s to local character set '%s'", encoding, charset);
418 }
419 #else
420 charset = "UTF-8";
421 pr_trace_msg(trace_channel, 1,
422 "nl_langinfo(3) not supported, defaulting to using 'UTF-8' for "
423 "%s conversion", encoding);
424 #endif /* HAVE_NL_LANGINFO */
425
426 return charset;
427 }
428
pr_encode_get_charset(void)429 const char *pr_encode_get_charset(void) {
430 #ifdef HAVE_ICONV_H
431 return local_charset;
432
433 #else
434 errno = ENOSYS;
435 return NULL;
436 #endif /* !HAVE_ICONV_H */
437 }
438
pr_encode_get_encoding(void)439 const char *pr_encode_get_encoding(void) {
440 #ifdef HAVE_ICONV_H
441 return encoding;
442
443 #else
444 errno = ENOSYS;
445 return NULL;
446 #endif /* !HAVE_ICONV_H */
447 }
448
pr_encode_set_charset_encoding(const char * charset,const char * codeset)449 int pr_encode_set_charset_encoding(const char *charset, const char *codeset) {
450 #ifdef HAVE_ICONV_H
451 int res;
452
453 if (charset == NULL ||
454 codeset == NULL) {
455 errno = EINVAL;
456 return -1;
457 }
458
459 if (local_charset) {
460 pr_trace_msg(trace_channel, 5,
461 "attempting to switch local charset from %s to %s", local_charset,
462 charset);
463
464 } else {
465 pr_trace_msg(trace_channel, 5, "attempting to use %s as local charset",
466 charset);
467 }
468
469 if (encoding) {
470 pr_trace_msg(trace_channel, 5,
471 "attempting to switch encoding from %s to %s", encoding, codeset);
472
473 } else {
474 pr_trace_msg(trace_channel, 5, "attempting to use %s encoding", codeset);
475 }
476
477 (void) encode_free();
478
479 local_charset = pstrdup(permanent_pool, charset);
480 encoding = pstrdup(permanent_pool, codeset);
481
482 res = encode_init();
483 if (res < 0) {
484 int xerrno = errno;
485
486 pr_trace_msg(trace_channel, 1,
487 "failed to initialize encoding for local charset %s, encoding %s, "
488 "disabling encoding", charset, codeset);
489 local_charset = NULL;
490 encoding = NULL;
491
492 errno = xerrno;
493 }
494
495 return res;
496
497 #else
498 errno = ENOSYS;
499 return -1;
500 #endif /* !HAVE_ICONV_H */
501 }
502
pr_encode_is_utf8(const char * codeset)503 int pr_encode_is_utf8(const char *codeset) {
504 if (codeset == NULL) {
505 errno = EINVAL;
506 return -1;
507 }
508
509 if (strncasecmp(codeset, "UTF8", 5) == 0 ||
510 strncasecmp(codeset, "UTF-8", 6) == 0) {
511 return TRUE;
512 }
513
514 return FALSE;
515 }
516
pr_encode_supports_telnet_iac(void)517 int pr_encode_supports_telnet_iac(void) {
518 return supports_telnet_iac;
519 }
520
521 #endif /* PR_USE_NLS */
522