1 /* vi:ai:et:ts=8 sw=2
2  */
3 /*
4  * wzdftpd - a modular and cool ftp server
5  * Copyright (C) 2002-2004  Pierre Chifflier
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
20  *
21  * As a special exemption, Pierre Chifflier
22  * and other respective copyright holders give permission to link this program
23  * with OpenSSL, and distribute the resulting executable, without including
24  * the source code for OpenSSL in the source distribution.
25  */
26 
27 #include "wzd_all.h"
28 
29 #ifndef WZD_USE_PCH
30 
31 #include <stdio.h>
32 #include <string.h>
33 
34 #else /* WZD_USE_PCH */
35 #endif /*WZD_USE_PCH */
36 
37 #ifdef HAVE_WCHAR_H
38 # include <wchar.h>
39 #endif
40 
41 #ifdef HAVE_ICONV
42 # include <iconv.h>
43 #else
44 typedef void * iconv_t;
45 #endif
46 
47 #if HAVE_LANGINFO_CODESET
48 # include <langinfo.h>
49 #endif
50 
51 #ifdef WIN32
52 # include <winsock2.h>
53 #endif
54 
55 #include "wzd_structs.h"
56 
57 #include "wzd_log.h"
58 #include "wzd_utf8.h"
59 
60 #include "wzd_debug.h"
61 
62 
63 #ifdef BSD
64 #define	DL_ARG	DL_LAZY
65 #else
66 #define	DL_ARG	RTLD_NOW
67 #endif
68 
69 #ifdef NEED_UNDERSCORE
70 #define DL_PREFIX "_"
71 #else
72 #define DL_PREFIX
73 #endif
74 
75 #define DEFAULT_CODESET "ISO-8859-1"
76 
77 /*typedef void *  iconv_t;*/
78 typedef size_t (*fn_iconv_t)(iconv_t, const char **, size_t *, char **, size_t *);
79 typedef iconv_t (*fn_iconv_open_t)(const char *, const char *);
80 typedef int (*fn_iconv_close_t)(iconv_t);
81 
82 #ifdef WIN32
83 static void * _iconv_lib_handle = NULL;
84 #endif
85 static fn_iconv_t _iconv_fn_iconv = NULL;
86 static fn_iconv_open_t _iconv_fn_iconv_open = NULL;
87 static fn_iconv_close_t _iconv_fn_iconv_close = NULL;
88 
89 
_iconv_openlib(void)90 static void _iconv_openlib(void)
91 {
92 #ifdef HAVE_UTF8
93 
94 #ifdef HAVE_ICONV
95   _iconv_fn_iconv = (fn_iconv_t)&iconv;
96   _iconv_fn_iconv_open = (fn_iconv_open_t)&iconv_open;
97   _iconv_fn_iconv_close = (fn_iconv_close_t)&iconv_close;
98 #else /* HAVE_ICONV */
99 
100 #ifdef WIN32
101   if (_iconv_lib_handle == NULL)
102   {
103     _iconv_lib_handle = dlopen("libiconv-2.dll", DL_ARG);
104     if (_iconv_lib_handle == NULL) return;
105 
106     /** \bug I don't understant why this f*cking windows does not find 'libiconv' using
107      * the name, I've checked with depends.exe: all API calls are good. Windows does
108      * just not find it, except if I use the ordinal value, which is _very_ bad.
109      * This clearly looks like a windows bug in GetProcAddress.
110      * cd c:\HOMEDIR\wzdftpd\visual
111      * c:\INSTALL\depends21_x86\depends.exe /pg:1 .\Debug\wzdftpd.exe -f wzd-win32.cfg
112      */
113     _iconv_fn_iconv =       (fn_iconv_t)dlsym(_iconv_lib_handle, DL_PREFIX "libiconv");
114     if (!_iconv_fn_iconv) /* try by ordinal */
115       _iconv_fn_iconv =     (fn_iconv_t)dlsym(_iconv_lib_handle, (char*)0x00000004);
116     _iconv_fn_iconv_open =  (fn_iconv_open_t)dlsym(_iconv_lib_handle, DL_PREFIX "libiconv_open");
117     _iconv_fn_iconv_close = (fn_iconv_close_t)dlsym(_iconv_lib_handle, DL_PREFIX "libiconv_close");
118 
119     if ( !_iconv_fn_iconv || !_iconv_fn_iconv || !_iconv_fn_iconv_close )
120     {
121       dlclose(_iconv_lib_handle);
122       _iconv_lib_handle = NULL;
123     }
124   }
125 
126 #endif /* WIN32 */
127 
128 #endif /* HAVE_ICONV */
129 
130 #endif /* HAVE_UTF8 */
131 }
132 
_iconv_closelib(void)133 static void _iconv_closelib(void)
134 {
135 #ifdef WIN32
136   if (_iconv_lib_handle)
137   {
138     dlclose(_iconv_lib_handle);
139     _iconv_lib_handle = NULL;
140     _iconv_fn_iconv = NULL;
141     _iconv_fn_iconv_open = NULL;
142     _iconv_fn_iconv_close = NULL;
143   }
144 #endif /* HAVE_ICONV */
145 }
146 
147 
148 static const char * _local_charset = NULL;
149 
local_charset(void)150 const char * local_charset(void)
151 {
152   return _local_charset;
153 }
154 
155 
charset_detect_local(void)156 const char * charset_detect_local(void)
157 {
158   char * codeset = NULL;
159 #ifdef HAVE_UTF8
160 
161 #if !(defined WIN32)
162 
163 # if HAVE_LANGINFO_CODESET
164 
165   /* should be very common now */
166   codeset = nl_langinfo (CODESET);
167   if (strcasecmp(codeset,"ansi_x3.4-1968")==0)
168     codeset = DEFAULT_CODESET;
169   out_log(LEVEL_FLOOD,"nl_langinfo: %s\n",codeset);
170 
171 # else
172 
173   const char * locale = NULL;
174 
175   /* on old systems, use getenv */
176   locale = getenv("LC_ALL");
177   if (locale == NULL || locale[0] == '\0')
178   {
179     locale = getenv("LC_CTYPE");
180     if (locale == NULL || locale[0] == '\0')
181       locale = getenv("LANG");
182   }
183   codeset = locale; /* something like language_COUNTRY.charset */
184   out_log(LEVEL_FLOOD,"env: %s\n",codeset);
185 
186   /* we need to try to translate that into an understandable
187    * codeset for iconv (see `iconv --list`)
188    */
189 
190 # endif
191 
192 #else /* !WIN32 */
193   static char buf[2 + 10 + 1];
194 
195   /* win32 has a function returning the locale's codepage as a number */
196   sprintf (buf, "CP%u", GetACP());
197   codeset = buf;
198 
199 #endif /* !WIN32 */
200 
201 #endif /* HAVE_UTF8 */
202   return codeset;
203 }
204 
local_charset_to_utf8(const char * src,char * dst_utf8,size_t max_len,const char * local_charset)205 int local_charset_to_utf8(const char *src, char *dst_utf8, size_t max_len, const char *local_charset)
206 {
207 #ifdef HAVE_UTF8
208   size_t nconv, size, avail;
209   mbstate_t state;
210   iconv_t cd;
211 
212   if ( !_iconv_fn_iconv || !_iconv_fn_iconv || !_iconv_fn_iconv_close ) return -1;
213   cd = (*_iconv_fn_iconv_open)("UTF-8", local_charset);
214   if (cd == (iconv_t)-1) {
215     return -1;
216   }
217 
218   size = strlen(src);
219   avail = max_len;
220   memset(&state, '\0', sizeof(state));
221 
222   /* conversion to multibyte */
223   nconv = (*_iconv_fn_iconv)(cd, &src, &size, (char**)&dst_utf8, &avail);
224   if (nconv == (size_t)-1) {
225     /* error during conversion, see errno */
226     (*_iconv_fn_iconv_close)(cd);
227     return -1;
228   }
229   (*_iconv_fn_iconv_close)(cd);
230 
231   /* terminate output string */
232   if (avail >= sizeof(wchar_t))
233     *((wchar_t*)dst_utf8) = L'\0';
234 
235   return 0;
236 #else /* HAVE_UTF8 */
237   return 1;
238 #endif /* HAVE_UTF8 */
239 }
240 
utf8_to_local_charset(const char * src_utf8,char * dst,size_t max_len,const char * local_charset)241 int utf8_to_local_charset(const char *src_utf8, char *dst, size_t max_len, const char *local_charset)
242 {
243 #ifdef HAVE_UTF8
244   size_t nconv, size, avail;
245   mbstate_t state;
246   iconv_t cd;
247 
248   if ( !_iconv_fn_iconv || !_iconv_fn_iconv || !_iconv_fn_iconv_close ) return -1;
249   cd = (*_iconv_fn_iconv_open)(local_charset, "UTF-8");
250   if (cd == (iconv_t)-1) {
251     return -1;
252   }
253 
254   size = strlen(src_utf8);
255   avail = max_len;
256   memset(&state, '\0', sizeof(state));
257 
258   /* conversion to multibyte */
259   nconv = (*_iconv_fn_iconv)(cd, &src_utf8, &size, (char**)&dst, &avail);
260   if (nconv == (size_t)-1) {
261     /* error during conversion, see errno */
262     (*_iconv_fn_iconv_close)(cd);
263     return -1;
264   }
265   (*_iconv_fn_iconv_close)(cd);
266 
267   /* terminate output string */
268   if (avail >= sizeof(char))
269     *((char*)dst) = '\0';
270 
271   return 0;
272 #else /* HAVE_UTF8 */
273   return 1;
274 #endif /* HAVE_UTF8 */
275 }
276 
277 
278 /** \brief Valid UTF-8 check
279  *
280  * taken from RFC2640, adapted to remove warnings :)
281  * Checks if a byte sequence is valid UTF-8.
282  *
283  * \return 1 if input string is valid UTF-8, else 0
284  */
utf8_valid(const char * buf,size_t len)285 int utf8_valid(const char *buf, size_t len)
286 {
287   const unsigned char *endbuf = (unsigned char*)buf + len;
288   unsigned char byte2mask=0x00, c;
289   int trailing=0; // trailing (continuation) bytes to follow
290 
291   while ((unsigned char*)buf != endbuf)
292   {
293     c = *buf++;
294     if (trailing)
295       if ((c & 0xc0) == 0x80) // does trailing byte follow UTF-8 format ?
296       {
297         if (byte2mask) // need to check 2nd byte for proper range
298         {
299           if (c & byte2mask) // are appropriate bits set ?
300             byte2mask = 0x00;
301           else
302             return 0;
303         }
304         trailing--;
305       }
306       else
307         return 0;
308     else
309       if ((c & 0x80) == 0x00) continue; // valid 1-byte UTF-8
310       else if ((c & 0xe0) == 0xc0)      // valid 2-byte UTF-8
311         if (c & 0x1e) //is UTF-8 byte in proper range ?
312           trailing = 1;
313         else
314           return 0;
315       else if ((c & 0xf0) == 0xe0)      // valid 3-byte UTF-8
316       {
317         if (!(c & 0x0f))                // is UTF-8 byte in proper range ?
318           byte2mask = 0x20;             // if not set mask
319         trailing = 2;                   // to check next byte
320       }
321       else if ((c & 0xf8) == 0xf0)      // valid 4-byte UTF-8
322       {
323         if (!(c & 0x07))                // is UTF-8 byte in proper range ?
324           byte2mask = 0x30;             // if not set mask
325         trailing = 3;                   // to check next byte
326       }
327       else if ((c & 0xfc) == 0xf8)      // valid 5-byte UTF-8
328       {
329         if (!(c & 0x03))                // is UTF-8 byte in proper range ?
330           byte2mask = 0x38;             // if not set mask
331         trailing = 4;                   // to check next byte
332       }
333       else if ((c & 0xfe) == 0xfc)      // valid 6-byte UTF-8
334       {
335         if (!(c & 0x01))                // is UTF-8 byte in proper range ?
336           byte2mask = 0x3c;             // if not set mask
337         trailing = 5;                   // to check next byte
338       }
339       else
340         return 0;
341   }
342   return trailing == 0;
343 }
344 
345 
utf8_detect(wzd_config_t * config)346 void utf8_detect(wzd_config_t * config)
347 {
348   _local_charset = charset_detect_local();
349   _iconv_openlib();
350 
351   if ( _local_charset && _iconv_fn_iconv && _iconv_fn_iconv && _iconv_fn_iconv_close )
352   {
353     out_log(LEVEL_INFO, "UTF-8 detected and enabled\n");
354     CFG_SET_OPTION(config,CFG_OPT_UTF8_CAPABLE);
355   } else {
356     CFG_CLR_OPTION(config,CFG_OPT_UTF8_CAPABLE);
357   }
358 }
359 
utf8_end(wzd_config_t * config)360 void utf8_end(wzd_config_t * config)
361 {
362   _local_charset = NULL;
363   _iconv_closelib();
364   CFG_CLR_OPTION(config,CFG_OPT_UTF8_CAPABLE);
365   out_log(LEVEL_INFO, "UTF-8 disabled\n");
366 }
367