1 /* vi:ai:et:ts=8 sw=2
2 */
3 /*
4 * wzdftpd - a modular and cool ftp server
5 * Copyright (C) 2002-2004 Pierre Chifflier
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 *
21 * As a special exemption, Pierre Chifflier
22 * and other respective copyright holders give permission to link this program
23 * with OpenSSL, and distribute the resulting executable, without including
24 * the source code for OpenSSL in the source distribution.
25 */
26
27 #include "wzd_all.h"
28
29 #ifndef WZD_USE_PCH
30
31 #include <stdio.h>
32 #include <string.h>
33
34 #else /* WZD_USE_PCH */
35 #endif /*WZD_USE_PCH */
36
37 #ifdef HAVE_WCHAR_H
38 # include <wchar.h>
39 #endif
40
41 #ifdef HAVE_ICONV
42 # include <iconv.h>
43 #else
44 typedef void * iconv_t;
45 #endif
46
47 #if HAVE_LANGINFO_CODESET
48 # include <langinfo.h>
49 #endif
50
51 #ifdef WIN32
52 # include <winsock2.h>
53 #endif
54
55 #include "wzd_structs.h"
56
57 #include "wzd_log.h"
58 #include "wzd_utf8.h"
59
60 #include "wzd_debug.h"
61
62
63 #ifdef BSD
64 #define DL_ARG DL_LAZY
65 #else
66 #define DL_ARG RTLD_NOW
67 #endif
68
69 #ifdef NEED_UNDERSCORE
70 #define DL_PREFIX "_"
71 #else
72 #define DL_PREFIX
73 #endif
74
75 #define DEFAULT_CODESET "ISO-8859-1"
76
77 /*typedef void * iconv_t;*/
78 typedef size_t (*fn_iconv_t)(iconv_t, const char **, size_t *, char **, size_t *);
79 typedef iconv_t (*fn_iconv_open_t)(const char *, const char *);
80 typedef int (*fn_iconv_close_t)(iconv_t);
81
82 #ifdef WIN32
83 static void * _iconv_lib_handle = NULL;
84 #endif
85 static fn_iconv_t _iconv_fn_iconv = NULL;
86 static fn_iconv_open_t _iconv_fn_iconv_open = NULL;
87 static fn_iconv_close_t _iconv_fn_iconv_close = NULL;
88
89
_iconv_openlib(void)90 static void _iconv_openlib(void)
91 {
92 #ifdef HAVE_UTF8
93
94 #ifdef HAVE_ICONV
95 _iconv_fn_iconv = (fn_iconv_t)&iconv;
96 _iconv_fn_iconv_open = (fn_iconv_open_t)&iconv_open;
97 _iconv_fn_iconv_close = (fn_iconv_close_t)&iconv_close;
98 #else /* HAVE_ICONV */
99
100 #ifdef WIN32
101 if (_iconv_lib_handle == NULL)
102 {
103 _iconv_lib_handle = dlopen("libiconv-2.dll", DL_ARG);
104 if (_iconv_lib_handle == NULL) return;
105
106 /** \bug I don't understant why this f*cking windows does not find 'libiconv' using
107 * the name, I've checked with depends.exe: all API calls are good. Windows does
108 * just not find it, except if I use the ordinal value, which is _very_ bad.
109 * This clearly looks like a windows bug in GetProcAddress.
110 * cd c:\HOMEDIR\wzdftpd\visual
111 * c:\INSTALL\depends21_x86\depends.exe /pg:1 .\Debug\wzdftpd.exe -f wzd-win32.cfg
112 */
113 _iconv_fn_iconv = (fn_iconv_t)dlsym(_iconv_lib_handle, DL_PREFIX "libiconv");
114 if (!_iconv_fn_iconv) /* try by ordinal */
115 _iconv_fn_iconv = (fn_iconv_t)dlsym(_iconv_lib_handle, (char*)0x00000004);
116 _iconv_fn_iconv_open = (fn_iconv_open_t)dlsym(_iconv_lib_handle, DL_PREFIX "libiconv_open");
117 _iconv_fn_iconv_close = (fn_iconv_close_t)dlsym(_iconv_lib_handle, DL_PREFIX "libiconv_close");
118
119 if ( !_iconv_fn_iconv || !_iconv_fn_iconv || !_iconv_fn_iconv_close )
120 {
121 dlclose(_iconv_lib_handle);
122 _iconv_lib_handle = NULL;
123 }
124 }
125
126 #endif /* WIN32 */
127
128 #endif /* HAVE_ICONV */
129
130 #endif /* HAVE_UTF8 */
131 }
132
_iconv_closelib(void)133 static void _iconv_closelib(void)
134 {
135 #ifdef WIN32
136 if (_iconv_lib_handle)
137 {
138 dlclose(_iconv_lib_handle);
139 _iconv_lib_handle = NULL;
140 _iconv_fn_iconv = NULL;
141 _iconv_fn_iconv_open = NULL;
142 _iconv_fn_iconv_close = NULL;
143 }
144 #endif /* HAVE_ICONV */
145 }
146
147
148 static const char * _local_charset = NULL;
149
local_charset(void)150 const char * local_charset(void)
151 {
152 return _local_charset;
153 }
154
155
charset_detect_local(void)156 const char * charset_detect_local(void)
157 {
158 char * codeset = NULL;
159 #ifdef HAVE_UTF8
160
161 #if !(defined WIN32)
162
163 # if HAVE_LANGINFO_CODESET
164
165 /* should be very common now */
166 codeset = nl_langinfo (CODESET);
167 if (strcasecmp(codeset,"ansi_x3.4-1968")==0)
168 codeset = DEFAULT_CODESET;
169 out_log(LEVEL_FLOOD,"nl_langinfo: %s\n",codeset);
170
171 # else
172
173 const char * locale = NULL;
174
175 /* on old systems, use getenv */
176 locale = getenv("LC_ALL");
177 if (locale == NULL || locale[0] == '\0')
178 {
179 locale = getenv("LC_CTYPE");
180 if (locale == NULL || locale[0] == '\0')
181 locale = getenv("LANG");
182 }
183 codeset = locale; /* something like language_COUNTRY.charset */
184 out_log(LEVEL_FLOOD,"env: %s\n",codeset);
185
186 /* we need to try to translate that into an understandable
187 * codeset for iconv (see `iconv --list`)
188 */
189
190 # endif
191
192 #else /* !WIN32 */
193 static char buf[2 + 10 + 1];
194
195 /* win32 has a function returning the locale's codepage as a number */
196 sprintf (buf, "CP%u", GetACP());
197 codeset = buf;
198
199 #endif /* !WIN32 */
200
201 #endif /* HAVE_UTF8 */
202 return codeset;
203 }
204
local_charset_to_utf8(const char * src,char * dst_utf8,size_t max_len,const char * local_charset)205 int local_charset_to_utf8(const char *src, char *dst_utf8, size_t max_len, const char *local_charset)
206 {
207 #ifdef HAVE_UTF8
208 size_t nconv, size, avail;
209 mbstate_t state;
210 iconv_t cd;
211
212 if ( !_iconv_fn_iconv || !_iconv_fn_iconv || !_iconv_fn_iconv_close ) return -1;
213 cd = (*_iconv_fn_iconv_open)("UTF-8", local_charset);
214 if (cd == (iconv_t)-1) {
215 return -1;
216 }
217
218 size = strlen(src);
219 avail = max_len;
220 memset(&state, '\0', sizeof(state));
221
222 /* conversion to multibyte */
223 nconv = (*_iconv_fn_iconv)(cd, &src, &size, (char**)&dst_utf8, &avail);
224 if (nconv == (size_t)-1) {
225 /* error during conversion, see errno */
226 (*_iconv_fn_iconv_close)(cd);
227 return -1;
228 }
229 (*_iconv_fn_iconv_close)(cd);
230
231 /* terminate output string */
232 if (avail >= sizeof(wchar_t))
233 *((wchar_t*)dst_utf8) = L'\0';
234
235 return 0;
236 #else /* HAVE_UTF8 */
237 return 1;
238 #endif /* HAVE_UTF8 */
239 }
240
utf8_to_local_charset(const char * src_utf8,char * dst,size_t max_len,const char * local_charset)241 int utf8_to_local_charset(const char *src_utf8, char *dst, size_t max_len, const char *local_charset)
242 {
243 #ifdef HAVE_UTF8
244 size_t nconv, size, avail;
245 mbstate_t state;
246 iconv_t cd;
247
248 if ( !_iconv_fn_iconv || !_iconv_fn_iconv || !_iconv_fn_iconv_close ) return -1;
249 cd = (*_iconv_fn_iconv_open)(local_charset, "UTF-8");
250 if (cd == (iconv_t)-1) {
251 return -1;
252 }
253
254 size = strlen(src_utf8);
255 avail = max_len;
256 memset(&state, '\0', sizeof(state));
257
258 /* conversion to multibyte */
259 nconv = (*_iconv_fn_iconv)(cd, &src_utf8, &size, (char**)&dst, &avail);
260 if (nconv == (size_t)-1) {
261 /* error during conversion, see errno */
262 (*_iconv_fn_iconv_close)(cd);
263 return -1;
264 }
265 (*_iconv_fn_iconv_close)(cd);
266
267 /* terminate output string */
268 if (avail >= sizeof(char))
269 *((char*)dst) = '\0';
270
271 return 0;
272 #else /* HAVE_UTF8 */
273 return 1;
274 #endif /* HAVE_UTF8 */
275 }
276
277
278 /** \brief Valid UTF-8 check
279 *
280 * taken from RFC2640, adapted to remove warnings :)
281 * Checks if a byte sequence is valid UTF-8.
282 *
283 * \return 1 if input string is valid UTF-8, else 0
284 */
utf8_valid(const char * buf,size_t len)285 int utf8_valid(const char *buf, size_t len)
286 {
287 const unsigned char *endbuf = (unsigned char*)buf + len;
288 unsigned char byte2mask=0x00, c;
289 int trailing=0; // trailing (continuation) bytes to follow
290
291 while ((unsigned char*)buf != endbuf)
292 {
293 c = *buf++;
294 if (trailing)
295 if ((c & 0xc0) == 0x80) // does trailing byte follow UTF-8 format ?
296 {
297 if (byte2mask) // need to check 2nd byte for proper range
298 {
299 if (c & byte2mask) // are appropriate bits set ?
300 byte2mask = 0x00;
301 else
302 return 0;
303 }
304 trailing--;
305 }
306 else
307 return 0;
308 else
309 if ((c & 0x80) == 0x00) continue; // valid 1-byte UTF-8
310 else if ((c & 0xe0) == 0xc0) // valid 2-byte UTF-8
311 if (c & 0x1e) //is UTF-8 byte in proper range ?
312 trailing = 1;
313 else
314 return 0;
315 else if ((c & 0xf0) == 0xe0) // valid 3-byte UTF-8
316 {
317 if (!(c & 0x0f)) // is UTF-8 byte in proper range ?
318 byte2mask = 0x20; // if not set mask
319 trailing = 2; // to check next byte
320 }
321 else if ((c & 0xf8) == 0xf0) // valid 4-byte UTF-8
322 {
323 if (!(c & 0x07)) // is UTF-8 byte in proper range ?
324 byte2mask = 0x30; // if not set mask
325 trailing = 3; // to check next byte
326 }
327 else if ((c & 0xfc) == 0xf8) // valid 5-byte UTF-8
328 {
329 if (!(c & 0x03)) // is UTF-8 byte in proper range ?
330 byte2mask = 0x38; // if not set mask
331 trailing = 4; // to check next byte
332 }
333 else if ((c & 0xfe) == 0xfc) // valid 6-byte UTF-8
334 {
335 if (!(c & 0x01)) // is UTF-8 byte in proper range ?
336 byte2mask = 0x3c; // if not set mask
337 trailing = 5; // to check next byte
338 }
339 else
340 return 0;
341 }
342 return trailing == 0;
343 }
344
345
utf8_detect(wzd_config_t * config)346 void utf8_detect(wzd_config_t * config)
347 {
348 _local_charset = charset_detect_local();
349 _iconv_openlib();
350
351 if ( _local_charset && _iconv_fn_iconv && _iconv_fn_iconv && _iconv_fn_iconv_close )
352 {
353 out_log(LEVEL_INFO, "UTF-8 detected and enabled\n");
354 CFG_SET_OPTION(config,CFG_OPT_UTF8_CAPABLE);
355 } else {
356 CFG_CLR_OPTION(config,CFG_OPT_UTF8_CAPABLE);
357 }
358 }
359
utf8_end(wzd_config_t * config)360 void utf8_end(wzd_config_t * config)
361 {
362 _local_charset = NULL;
363 _iconv_closelib();
364 CFG_CLR_OPTION(config,CFG_OPT_UTF8_CAPABLE);
365 out_log(LEVEL_INFO, "UTF-8 disabled\n");
366 }
367