1 /* Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
2 * Copyright (C) Andrew Tridgell 2001
3 * Copyright (C) Simo Sorce 2001
4 * Copyright (C) Martin Pool 2003
5 *
6 * SPDX-License-Identifier: GPL-2.0-or-later
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 /**
24 * @file charcnv.c
25 * @brief Unix SMB/CIFS implementation: Character set conversion Extensions
26 *
27 * MODIFICATIONS: only those functions that are required for OpenVAS are
28 * retained, others are removed Modified By Preeti Subramanian
29 * <spreeti@secpod.com>
30 * 1. init_valid_table taken from samba/<source>/lib/util_unistr.c, using a
31 * dynamically created valid table only
32 * 2. valid_table taken from samba/<source>/lib/util_unistr.c
33 * 3. valid_table_use_unmap taken from samba/<source>/lib/util_unistr.c, BOOL is
34 * changed to bool
35 * 4. check_dos_char_slowly taken from samba/<source>/lib/util_unistr.c,
36 * smb_ucs2_t is changed to uint16
37 * 5. strlen_w taken from samba/<source>/lib/util_unistr.c, smb_ucs2_t is
38 * changed to uint16
39 * 6. strupper_m taken from samba/source/lib/util_str.c, and modified for
40 * OpenVAS
41 * 7. charset_name function changed for OpenVAS
42 * 8. in lazy_initialize_conv function, loading or generating the case handling
43 * tables removed
44 * 9. in init_iconv, init_doschar_table not required(removed)
45 */
46 #include "byteorder.h"
47 #include "iconv.h"
48 #include "proto.h"
49 #include "smb.h"
50
51 #include <gvm/base/logging.h>
52
53 #ifndef SMB_STRDUP
54 #define SMB_STRDUP(s) strdup (s)
55 #endif
56
57 #ifndef uint8
58 #define uint8 uint8_t
59 #endif
60
61 #ifndef uint16
62 #define uint16 uint16_t
63 #endif
64
65 #ifndef _PUBLIC_
66 #define _PUBLIC_
67 #endif
68
69 #undef G_LOG_DOMAIN
70 /**
71 * @brief GLib logging domain.
72 */
73 #define G_LOG_DOMAIN "lib nasl"
74
75 typedef unsigned int bool;
76 #define False 0
77 #define True 1
78
79 static uint8 *valid_table_ntlmssp;
80 static bool valid_table_use_unmap_ntlmssp;
81 size_t
82 convert_string_ntlmssp (charset_t from, charset_t to, void const *src,
83 size_t srclen, void *dest, size_t destlen,
84 bool allow_badcharcnv);
85 static int
check_dos_char_slowly_ntlmssp(uint16 c)86 check_dos_char_slowly_ntlmssp (uint16 c)
87 {
88 char buf[10];
89 uint16_t c2 = 0;
90 int len1, len2;
91
92 len1 = convert_string_ntlmssp (CH_UTF16LE, CH_DOS, &c, 2, buf, sizeof (buf),
93 False);
94 if (len1 == 0)
95 {
96 return 0;
97 }
98 len2 = convert_string_ntlmssp (CH_DOS, CH_UTF16LE, buf, len1, &c2, 2, False);
99 if (len2 != 2)
100 {
101 return 0;
102 }
103 return (c == c2);
104 }
105
106 /* We can parameterize this if someone complains.... JRA. */
107
108 char
lp_failed_convert_char_ntlmssp(void)109 lp_failed_convert_char_ntlmssp (void)
110 {
111 return '_';
112 }
113
114 /**
115 * @file
116 *
117 * @brief Character-set conversion routines built on our iconv.
118 *
119 * @note Samba's internal character set (at least in the 3.0 series)
120 * is always the same as the one for the Unix filesystem. It is
121 * <b>not</b> necessarily UTF-8 and may be different on machines that
122 * need i18n filenames to be compatible with Unix software. It does
123 * have to be a superset of ASCII. All multibyte sequences must start
124 * with a byte with the high bit set.
125 *
126 * @sa lib/iconv.c
127 */
128
129 static smb_iconv_t conv_handles_ntlmssp[NUM_CHARSETS][NUM_CHARSETS];
130 static bool
131 conv_silent_ntlmssp; /* Should we do a debug if the conversion fails ? */
132
133 void
init_valid_table_ntlmssp(void)134 init_valid_table_ntlmssp (void)
135 {
136 static int mapped_file;
137 int i;
138 const char *allowed = ".!#$%&'()_-@^`~";
139
140 if (mapped_file)
141 {
142 /* Can't unmap files, so stick with what we have */
143 return;
144 }
145
146 /* we're using a dynamically created valid_table.
147 * It might need to be regenerated if the code page changed.
148 * We know that we're not using a mapped file, so we can
149 * free() the old one. */
150
151 /* use free rather than unmap */
152 valid_table_use_unmap_ntlmssp = False;
153
154 valid_table_ntlmssp = (uint8 *) SMB_MALLOC (0x10000);
155 for (i = 0; i < 128; i++)
156 {
157 valid_table_ntlmssp[i] = isalnum (i) || strchr (allowed, i);
158 }
159
160 lazy_initialize_conv_ntlmssp ();
161
162 for (; i < 0x10000; i++)
163 {
164 uint16_t c;
165 SSVAL (&c, 0, i);
166 valid_table_ntlmssp[i] = check_dos_char_slowly_ntlmssp (c);
167 }
168 }
169
170 /*******************************************************************
171 * Count the number of characters in a uint16_t string.
172 * ********************************************************************/
173
174 size_t
strlen_w_ntlmssp(const uint16 * src)175 strlen_w_ntlmssp (const uint16 *src)
176 {
177 size_t len;
178 uint16 c;
179
180 for (len = 0; *(COPY_UCS2_CHAR (&c, src)); src++, len++)
181 {
182 ;
183 }
184
185 return len;
186 }
187
188 /**
189 * * Return the name of a charset to give to iconv().
190 * **/
191 static const char *
charset_name_ntlmssp(charset_t ch)192 charset_name_ntlmssp (charset_t ch)
193 {
194 const char *ret = NULL;
195
196 if (ch == CH_UTF16LE)
197 ret = "UTF-16LE";
198 else if (ch == CH_UTF16BE)
199 ret = "UTF-16BE";
200 else if (ch == CH_UTF8)
201 ret = "UTF8";
202
203 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
204 if (ret && !strcmp (ret, "LOCALE"))
205 {
206 const char *ln = NULL;
207
208 #ifdef HAVE_SETLOCALE
209 setlocale (LC_ALL, "");
210 #endif
211 ln = nl_langinfo (CODESET);
212 if (ln)
213 {
214 /* Check whether the charset name is supported
215 by iconv */
216 smb_iconv_t handle = smb_iconv_open_ntlmssp (ln, "UCS-2LE");
217 if (handle == (smb_iconv_t) -1)
218 {
219 ln = NULL;
220 }
221 else
222 {
223 smb_iconv_close_ntlmssp (handle);
224 }
225 }
226 ret = ln;
227 }
228 #endif
229
230 if (!ret || !*ret)
231 ret = "ASCII";
232 return ret;
233 }
234
235 void
lazy_initialize_conv_ntlmssp(void)236 lazy_initialize_conv_ntlmssp (void)
237 {
238 static int initialized = False;
239
240 if (!initialized)
241 {
242 initialized = True;
243 init_iconv_ntlmssp ();
244 }
245 }
246
247 /**
248 * Initialize iconv conversion descriptors.
249 *
250 * This is called the first time it is needed, and also called again
251 * every time the configuration is reloaded, because the charset or
252 * codepage might have changed.
253 **/
254 void
init_iconv_ntlmssp(void)255 init_iconv_ntlmssp (void)
256 {
257 int c1, c2;
258 bool did_reload = False;
259
260 /* so that charset_name() works we need to get the UNIX<->UCS2 going
261 first */
262 if (!conv_handles_ntlmssp[CH_UNIX][CH_UTF16LE])
263 conv_handles_ntlmssp[CH_UNIX][CH_UTF16LE] =
264 smb_iconv_open_ntlmssp (charset_name_ntlmssp (CH_UTF16LE), "ASCII");
265
266 if (!conv_handles_ntlmssp[CH_UTF16LE][CH_UNIX])
267 conv_handles_ntlmssp[CH_UTF16LE][CH_UNIX] =
268 smb_iconv_open_ntlmssp ("ASCII", charset_name_ntlmssp (CH_UTF16LE));
269
270 for (c1 = 0; c1 < NUM_CHARSETS; c1++)
271 {
272 for (c2 = 0; c2 < NUM_CHARSETS; c2++)
273 {
274 const char *n1 = charset_name_ntlmssp ((charset_t) c1);
275 const char *n2 = charset_name_ntlmssp ((charset_t) c2);
276 if (conv_handles_ntlmssp[c1][c2]
277 && strcmp (n1, conv_handles_ntlmssp[c1][c2]->from_name) == 0
278 && strcmp (n2, conv_handles_ntlmssp[c1][c2]->to_name) == 0)
279 continue;
280
281 did_reload = True;
282
283 if (conv_handles_ntlmssp[c1][c2])
284 smb_iconv_close_ntlmssp (conv_handles_ntlmssp[c1][c2]);
285
286 conv_handles_ntlmssp[c1][c2] = smb_iconv_open_ntlmssp (n2, n1);
287 if (conv_handles_ntlmssp[c1][c2] == (smb_iconv_t) -1)
288 {
289 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE)
290 {
291 n1 = "ASCII";
292 }
293 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE)
294 {
295 n2 = "ASCII";
296 }
297 conv_handles_ntlmssp[c1][c2] = smb_iconv_open_ntlmssp (n2, n1);
298 if (!conv_handles_ntlmssp[c1][c2])
299 {
300 g_message ("init_iconv_ntlmssp: conv_handle"
301 " initialization failed");
302 }
303 }
304 }
305 }
306
307 if (did_reload)
308 {
309 /* XXX: Does this really get called every time the dos
310 * codepage changes? */
311 /* XXX: Is the did_reload test too strict? */
312 conv_silent_ntlmssp = True;
313 init_valid_table_ntlmssp ();
314 conv_silent_ntlmssp = False;
315 }
316 }
317
318 /**
319 * Convert string from one encoding to another, making error checking etc
320 * Slow path version - uses (slow) iconv.
321 *
322 * @param src pointer to source string (multibyte or singlebyte)
323 * @param srclen length of the source string in bytes
324 * @param dest pointer to destination string (multibyte or singlebyte)
325 * @param destlen maximal length allowed for string
326 * @param allow_bad_conv determines if a "best effort" conversion is acceptable
327 *(never returns errors)
328 * @returns the number of bytes occupied in the destination
329 *
330 * Ensure the srclen contains the terminating zero.
331 *
332 **/
333
334 static size_t
convert_string_internal_ntlmssp(charset_t from,charset_t to,void const * src,size_t srclen,void * dest,size_t destlen,bool allow_bad_conv)335 convert_string_internal_ntlmssp (charset_t from, charset_t to, void const *src,
336 size_t srclen, void *dest, size_t destlen,
337 bool allow_bad_conv)
338 {
339 size_t i_len, o_len;
340 size_t retval;
341 const char *inbuf = (const char *) src;
342 char *outbuf = (char *) dest;
343 smb_iconv_t descriptor;
344
345 lazy_initialize_conv_ntlmssp ();
346
347 descriptor = conv_handles_ntlmssp[from][to];
348
349 if (srclen == (size_t) -1)
350 {
351 if (from == CH_UTF16LE || from == CH_UTF16BE)
352 {
353 srclen = (strlen_w_ntlmssp ((const uint16 *) src) + 1) * 2;
354 }
355 else
356 {
357 srclen = strlen ((const char *) src) + 1;
358 }
359 }
360
361 if (descriptor == (smb_iconv_t) -1 || descriptor == (smb_iconv_t) 0)
362 return (size_t) -1;
363
364 i_len = srclen;
365 o_len = destlen;
366
367 again:
368
369 retval = smb_iconv_ntlmssp (descriptor, &inbuf, &i_len, &outbuf, &o_len);
370 if (retval == (size_t) -1)
371 {
372 switch (errno)
373 {
374 case EINVAL:
375 /* Incomplete multibyte sequence */
376 if (!conv_silent_ntlmssp)
377 if (allow_bad_conv)
378 goto use_as_is;
379 return (size_t) -1;
380 case E2BIG:
381 /* No more room */
382 break;
383 case EILSEQ:
384 /* Illegal multibyte sequence */
385 if (allow_bad_conv)
386 goto use_as_is;
387
388 return (size_t) -1;
389 default:
390 /* unknown error */
391 return (size_t) -1;
392 }
393 }
394 return destlen - o_len;
395
396 use_as_is:
397
398 /*
399 * Conversion not supported. This is actually an error, but there are so
400 * many misconfigured iconv systems and smb.conf's out there we can't just
401 * fail. Do a very bad conversion instead.... JRA.
402 */
403
404 {
405 if (o_len == 0 || i_len == 0)
406 return destlen - o_len;
407
408 if (((from == CH_UTF16LE) || (from == CH_UTF16BE))
409 && ((to != CH_UTF16LE) && (to != CH_UTF16BE)))
410 {
411 /* Can't convert from utf16 any endian to multibyte.
412 Replace with the default fail char.
413 */
414 if (i_len < 2)
415 return destlen - o_len;
416 if (i_len >= 2)
417 {
418 *outbuf = lp_failed_convert_char_ntlmssp ();
419
420 outbuf++;
421 o_len--;
422
423 inbuf += 2;
424 i_len -= 2;
425 }
426
427 if (o_len == 0 || i_len == 0)
428 return destlen - o_len;
429
430 /* Keep trying with the next char... */
431 goto again;
432 }
433 else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE)
434 {
435 /* Can't convert to UTF16LE - just widen by adding the
436 default fail char then zero.
437 */
438 if (o_len < 2)
439 return destlen - o_len;
440
441 outbuf[0] = lp_failed_convert_char_ntlmssp ();
442 outbuf[1] = '\0';
443
444 inbuf++;
445 i_len--;
446
447 outbuf += 2;
448 o_len -= 2;
449
450 if (o_len == 0 || i_len == 0)
451 return destlen - o_len;
452
453 /* Keep trying with the next char... */
454 goto again;
455 }
456 else if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE
457 && to != CH_UTF16BE)
458 {
459 /* Failed multibyte to multibyte. Just copy the default fail char and
460 try again. */
461 outbuf[0] = lp_failed_convert_char_ntlmssp ();
462
463 inbuf++;
464 i_len--;
465
466 outbuf++;
467 o_len--;
468
469 if (o_len == 0 || i_len == 0)
470 return destlen - o_len;
471
472 /* Keep trying with the next char... */
473 goto again;
474 }
475 else
476 {
477 /* Keep compiler happy.... */
478 return destlen - o_len;
479 }
480 }
481 }
482
483 /**
484 * Convert string from one encoding to another, making error checking etc
485 * Fast path version - handles ASCII first.
486 *
487 * @param src pointer to source string (multibyte or singlebyte)
488 * @param srclen length of the source string in bytes, or -1 for nul terminated.
489 * @param dest pointer to destination string (multibyte or singlebyte)
490 * @param destlen maximal length allowed for string - *NEVER* -1.
491 * @param allow_bad_conv determines if a "best effort" conversion is acceptable
492 *(never returns errors)
493 * @returns the number of bytes occupied in the destination
494 *
495 * Ensure the srclen contains the terminating zero.
496 *
497 * This function has been hand-tuned to provide a fast path.
498 * Don't change unless you really know what you are doing. JRA.
499 **/
500
501 size_t
convert_string_ntlmssp(charset_t from,charset_t to,void const * src,size_t srclen,void * dest,size_t destlen,bool allow_bad_conv)502 convert_string_ntlmssp (charset_t from, charset_t to, void const *src,
503 size_t srclen, void *dest, size_t destlen,
504 bool allow_bad_conv)
505 {
506 /*
507 * NB. We deliberately don't do a strlen here if srclen == -1.
508 * This is very expensive over millions of calls and is taken
509 * care of in the slow path in convert_string_internal. JRA.
510 */
511
512 if (srclen == 0)
513 return 0;
514
515 if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE
516 && to != CH_UTF16BE)
517 {
518 const unsigned char *p = (const unsigned char *) src;
519 unsigned char *q = (unsigned char *) dest;
520 size_t slen = srclen;
521 size_t dlen = destlen;
522 unsigned char lastp = '\0';
523 size_t retval = 0;
524
525 /* If all characters are ascii, fast path here. */
526 while (slen && dlen)
527 {
528 if ((lastp = *p) <= 0x7f)
529 {
530 *q++ = *p++;
531 if (slen != (size_t) -1)
532 {
533 slen--;
534 }
535 dlen--;
536 retval++;
537 if (!lastp)
538 break;
539 }
540 else
541 {
542 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
543 goto general_case;
544 #else
545 size_t ret = convert_string_internal_ntlmssp (
546 from, to, p, slen, q, dlen, allow_bad_conv);
547 if (ret == (size_t) -1)
548 {
549 return ret;
550 }
551 return retval + ret;
552 #endif
553 }
554 }
555 if (!dlen)
556 {
557 /* Even if we fast path we should note if we ran out of room. */
558 if (((slen != (size_t) -1) && slen)
559 || ((slen == (size_t) -1) && lastp))
560 {
561 errno = E2BIG;
562 }
563 }
564 return retval;
565 }
566 else if (from == CH_UTF16LE && to != CH_UTF16LE)
567 {
568 const unsigned char *p = (const unsigned char *) src;
569 unsigned char *q = (unsigned char *) dest;
570 size_t retval = 0;
571 size_t slen = srclen;
572 size_t dlen = destlen;
573 unsigned char lastp = '\0';
574
575 /* If all characters are ascii, fast path here. */
576 while (((slen == (size_t) -1) || (slen >= 2)) && dlen)
577 {
578 if (((lastp = *p) <= 0x7f) && (p[1] == 0))
579 {
580 *q++ = *p;
581 if (slen != (size_t) -1)
582 {
583 slen -= 2;
584 }
585 p += 2;
586 dlen--;
587 retval++;
588 if (!lastp)
589 break;
590 }
591 else
592 {
593 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
594 goto general_case;
595 #else
596 return retval
597 + convert_string_internal_ntlmssp (from, to, p, slen, q,
598 dlen, allow_bad_conv);
599 #endif
600 }
601 }
602 if (!dlen)
603 {
604 /* Even if we fast path we should note if we ran out of room. */
605 if (((slen != (size_t) -1) && slen)
606 || ((slen == (size_t) -1) && lastp))
607 {
608 errno = E2BIG;
609 }
610 }
611 return retval;
612 }
613 else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE)
614 {
615 const unsigned char *p = (const unsigned char *) src;
616 unsigned char *q = (unsigned char *) dest;
617 size_t retval = 0;
618 size_t slen = srclen;
619 size_t dlen = destlen;
620 unsigned char lastp = '\0';
621
622 /* If all characters are ascii, fast path here. */
623 while (slen && (dlen >= 2))
624 {
625 if ((lastp = *p) <= 0x7F)
626 {
627 *q++ = *p++;
628 *q++ = '\0';
629 if (slen != (size_t) -1)
630 {
631 slen--;
632 }
633 dlen -= 2;
634 retval += 2;
635 if (!lastp)
636 break;
637 }
638 else
639 {
640 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
641 goto general_case;
642 #else
643 return retval
644 + convert_string_internal_ntlmssp (from, to, p, slen, q,
645 dlen, allow_bad_conv);
646 #endif
647 }
648 }
649 if (!dlen)
650 {
651 /* Even if we fast path we should note if we ran out of room. */
652 if (((slen != (size_t) -1) && slen)
653 || ((slen == (size_t) -1) && lastp))
654 {
655 errno = E2BIG;
656 }
657 }
658 return retval;
659 }
660
661 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
662 general_case:
663 #endif
664 return convert_string_internal_ntlmssp (from, to, src, srclen, dest, destlen,
665 allow_bad_conv);
666 }
667