1 /* Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
2  * Copyright (C) Andrew Tridgell 2001
3  * Copyright (C) Simo Sorce 2001
4  * Copyright (C) Martin Pool 2003
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version 2
11  * of the License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  */
22 
23 /**
24  * @file charcnv.c
25  * @brief Unix SMB/CIFS implementation: Character set conversion Extensions
26  *
27  * MODIFICATIONS: only those functions that are required for OpenVAS are
28  * retained, others are removed Modified By Preeti Subramanian
29  * <spreeti@secpod.com>
30  * 1. init_valid_table taken from samba/<source>/lib/util_unistr.c, using a
31  * dynamically created valid table only
32  * 2. valid_table taken from samba/<source>/lib/util_unistr.c
33  * 3. valid_table_use_unmap taken from samba/<source>/lib/util_unistr.c, BOOL is
34  * changed to bool
35  * 4. check_dos_char_slowly taken from samba/<source>/lib/util_unistr.c,
36  * smb_ucs2_t is changed to uint16
37  * 5. strlen_w taken from samba/<source>/lib/util_unistr.c, smb_ucs2_t is
38  * changed to uint16
39  * 6. strupper_m taken from samba/source/lib/util_str.c, and modified for
40  * OpenVAS
41  * 7. charset_name function changed for OpenVAS
42  * 8. in lazy_initialize_conv function, loading or generating the case handling
43  * tables removed
44  * 9. in init_iconv, init_doschar_table not required(removed)
45  */
46 #include "byteorder.h"
47 #include "iconv.h"
48 #include "proto.h"
49 #include "smb.h"
50 
51 #include <gvm/base/logging.h>
52 
53 #ifndef SMB_STRDUP
54 #define SMB_STRDUP(s) strdup (s)
55 #endif
56 
57 #ifndef uint8
58 #define uint8 uint8_t
59 #endif
60 
61 #ifndef uint16
62 #define uint16 uint16_t
63 #endif
64 
65 #ifndef _PUBLIC_
66 #define _PUBLIC_
67 #endif
68 
69 #undef G_LOG_DOMAIN
70 /**
71  * @brief GLib logging domain.
72  */
73 #define G_LOG_DOMAIN "lib  nasl"
74 
75 typedef unsigned int bool;
76 #define False 0
77 #define True 1
78 
79 static uint8 *valid_table_ntlmssp;
80 static bool valid_table_use_unmap_ntlmssp;
81 size_t
82 convert_string_ntlmssp (charset_t from, charset_t to, void const *src,
83                         size_t srclen, void *dest, size_t destlen,
84                         bool allow_badcharcnv);
85 static int
check_dos_char_slowly_ntlmssp(uint16 c)86 check_dos_char_slowly_ntlmssp (uint16 c)
87 {
88   char buf[10];
89   uint16_t c2 = 0;
90   int len1, len2;
91 
92   len1 = convert_string_ntlmssp (CH_UTF16LE, CH_DOS, &c, 2, buf, sizeof (buf),
93                                  False);
94   if (len1 == 0)
95     {
96       return 0;
97     }
98   len2 = convert_string_ntlmssp (CH_DOS, CH_UTF16LE, buf, len1, &c2, 2, False);
99   if (len2 != 2)
100     {
101       return 0;
102     }
103   return (c == c2);
104 }
105 
106 /* We can parameterize this if someone complains.... JRA. */
107 
108 char
lp_failed_convert_char_ntlmssp(void)109 lp_failed_convert_char_ntlmssp (void)
110 {
111   return '_';
112 }
113 
114 /**
115  * @file
116  *
117  * @brief Character-set conversion routines built on our iconv.
118  *
119  * @note Samba's internal character set (at least in the 3.0 series)
120  * is always the same as the one for the Unix filesystem.  It is
121  * <b>not</b> necessarily UTF-8 and may be different on machines that
122  * need i18n filenames to be compatible with Unix software.  It does
123  * have to be a superset of ASCII.  All multibyte sequences must start
124  * with a byte with the high bit set.
125  *
126  * @sa lib/iconv.c
127  */
128 
129 static smb_iconv_t conv_handles_ntlmssp[NUM_CHARSETS][NUM_CHARSETS];
130 static bool
131   conv_silent_ntlmssp; /* Should we do a debug if the conversion fails ? */
132 
133 void
init_valid_table_ntlmssp(void)134 init_valid_table_ntlmssp (void)
135 {
136   static int mapped_file;
137   int i;
138   const char *allowed = ".!#$%&'()_-@^`~";
139 
140   if (mapped_file)
141     {
142       /* Can't unmap files, so stick with what we have */
143       return;
144     }
145 
146   /* we're using a dynamically created valid_table.
147    * It might need to be regenerated if the code page changed.
148    * We know that we're not using a mapped file, so we can
149    * free() the old one. */
150 
151   /* use free rather than unmap */
152   valid_table_use_unmap_ntlmssp = False;
153 
154   valid_table_ntlmssp = (uint8 *) SMB_MALLOC (0x10000);
155   for (i = 0; i < 128; i++)
156     {
157       valid_table_ntlmssp[i] = isalnum (i) || strchr (allowed, i);
158     }
159 
160   lazy_initialize_conv_ntlmssp ();
161 
162   for (; i < 0x10000; i++)
163     {
164       uint16_t c;
165       SSVAL (&c, 0, i);
166       valid_table_ntlmssp[i] = check_dos_char_slowly_ntlmssp (c);
167     }
168 }
169 
170 /*******************************************************************
171  *  Count the number of characters in a uint16_t string.
172  *  ********************************************************************/
173 
174 size_t
strlen_w_ntlmssp(const uint16 * src)175 strlen_w_ntlmssp (const uint16 *src)
176 {
177   size_t len;
178   uint16 c;
179 
180   for (len = 0; *(COPY_UCS2_CHAR (&c, src)); src++, len++)
181     {
182       ;
183     }
184 
185   return len;
186 }
187 
188 /**
189  *  * Return the name of a charset to give to iconv().
190  *   **/
191 static const char *
charset_name_ntlmssp(charset_t ch)192 charset_name_ntlmssp (charset_t ch)
193 {
194   const char *ret = NULL;
195 
196   if (ch == CH_UTF16LE)
197     ret = "UTF-16LE";
198   else if (ch == CH_UTF16BE)
199     ret = "UTF-16BE";
200   else if (ch == CH_UTF8)
201     ret = "UTF8";
202 
203 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)
204   if (ret && !strcmp (ret, "LOCALE"))
205     {
206       const char *ln = NULL;
207 
208 #ifdef HAVE_SETLOCALE
209       setlocale (LC_ALL, "");
210 #endif
211       ln = nl_langinfo (CODESET);
212       if (ln)
213         {
214           /* Check whether the charset name is supported
215           by iconv */
216           smb_iconv_t handle = smb_iconv_open_ntlmssp (ln, "UCS-2LE");
217           if (handle == (smb_iconv_t) -1)
218             {
219               ln = NULL;
220             }
221           else
222             {
223               smb_iconv_close_ntlmssp (handle);
224             }
225         }
226       ret = ln;
227     }
228 #endif
229 
230   if (!ret || !*ret)
231     ret = "ASCII";
232   return ret;
233 }
234 
235 void
lazy_initialize_conv_ntlmssp(void)236 lazy_initialize_conv_ntlmssp (void)
237 {
238   static int initialized = False;
239 
240   if (!initialized)
241     {
242       initialized = True;
243       init_iconv_ntlmssp ();
244     }
245 }
246 
247 /**
248  * Initialize iconv conversion descriptors.
249  *
250  * This is called the first time it is needed, and also called again
251  * every time the configuration is reloaded, because the charset or
252  * codepage might have changed.
253  **/
254 void
init_iconv_ntlmssp(void)255 init_iconv_ntlmssp (void)
256 {
257   int c1, c2;
258   bool did_reload = False;
259 
260   /* so that charset_name() works we need to get the UNIX<->UCS2 going
261    first */
262   if (!conv_handles_ntlmssp[CH_UNIX][CH_UTF16LE])
263     conv_handles_ntlmssp[CH_UNIX][CH_UTF16LE] =
264       smb_iconv_open_ntlmssp (charset_name_ntlmssp (CH_UTF16LE), "ASCII");
265 
266   if (!conv_handles_ntlmssp[CH_UTF16LE][CH_UNIX])
267     conv_handles_ntlmssp[CH_UTF16LE][CH_UNIX] =
268       smb_iconv_open_ntlmssp ("ASCII", charset_name_ntlmssp (CH_UTF16LE));
269 
270   for (c1 = 0; c1 < NUM_CHARSETS; c1++)
271     {
272       for (c2 = 0; c2 < NUM_CHARSETS; c2++)
273         {
274           const char *n1 = charset_name_ntlmssp ((charset_t) c1);
275           const char *n2 = charset_name_ntlmssp ((charset_t) c2);
276           if (conv_handles_ntlmssp[c1][c2]
277               && strcmp (n1, conv_handles_ntlmssp[c1][c2]->from_name) == 0
278               && strcmp (n2, conv_handles_ntlmssp[c1][c2]->to_name) == 0)
279             continue;
280 
281           did_reload = True;
282 
283           if (conv_handles_ntlmssp[c1][c2])
284             smb_iconv_close_ntlmssp (conv_handles_ntlmssp[c1][c2]);
285 
286           conv_handles_ntlmssp[c1][c2] = smb_iconv_open_ntlmssp (n2, n1);
287           if (conv_handles_ntlmssp[c1][c2] == (smb_iconv_t) -1)
288             {
289               if (c1 != CH_UTF16LE && c1 != CH_UTF16BE)
290                 {
291                   n1 = "ASCII";
292                 }
293               if (c2 != CH_UTF16LE && c2 != CH_UTF16BE)
294                 {
295                   n2 = "ASCII";
296                 }
297               conv_handles_ntlmssp[c1][c2] = smb_iconv_open_ntlmssp (n2, n1);
298               if (!conv_handles_ntlmssp[c1][c2])
299                 {
300                   g_message ("init_iconv_ntlmssp: conv_handle"
301                              " initialization failed");
302                 }
303             }
304         }
305     }
306 
307   if (did_reload)
308     {
309       /* XXX: Does this really get called every time the dos
310        * codepage changes? */
311       /* XXX: Is the did_reload test too strict? */
312       conv_silent_ntlmssp = True;
313       init_valid_table_ntlmssp ();
314       conv_silent_ntlmssp = False;
315     }
316 }
317 
318 /**
319  * Convert string from one encoding to another, making error checking etc
320  * Slow path version - uses (slow) iconv.
321  *
322  * @param src pointer to source string (multibyte or singlebyte)
323  * @param srclen length of the source string in bytes
324  * @param dest pointer to destination string (multibyte or singlebyte)
325  * @param destlen maximal length allowed for string
326  * @param allow_bad_conv determines if a "best effort" conversion is acceptable
327  *(never returns errors)
328  * @returns the number of bytes occupied in the destination
329  *
330  * Ensure the srclen contains the terminating zero.
331  *
332  **/
333 
334 static size_t
convert_string_internal_ntlmssp(charset_t from,charset_t to,void const * src,size_t srclen,void * dest,size_t destlen,bool allow_bad_conv)335 convert_string_internal_ntlmssp (charset_t from, charset_t to, void const *src,
336                                  size_t srclen, void *dest, size_t destlen,
337                                  bool allow_bad_conv)
338 {
339   size_t i_len, o_len;
340   size_t retval;
341   const char *inbuf = (const char *) src;
342   char *outbuf = (char *) dest;
343   smb_iconv_t descriptor;
344 
345   lazy_initialize_conv_ntlmssp ();
346 
347   descriptor = conv_handles_ntlmssp[from][to];
348 
349   if (srclen == (size_t) -1)
350     {
351       if (from == CH_UTF16LE || from == CH_UTF16BE)
352         {
353           srclen = (strlen_w_ntlmssp ((const uint16 *) src) + 1) * 2;
354         }
355       else
356         {
357           srclen = strlen ((const char *) src) + 1;
358         }
359     }
360 
361   if (descriptor == (smb_iconv_t) -1 || descriptor == (smb_iconv_t) 0)
362     return (size_t) -1;
363 
364   i_len = srclen;
365   o_len = destlen;
366 
367 again:
368 
369   retval = smb_iconv_ntlmssp (descriptor, &inbuf, &i_len, &outbuf, &o_len);
370   if (retval == (size_t) -1)
371     {
372       switch (errno)
373         {
374         case EINVAL:
375           /* Incomplete multibyte sequence */
376           if (!conv_silent_ntlmssp)
377             if (allow_bad_conv)
378               goto use_as_is;
379           return (size_t) -1;
380         case E2BIG:
381           /* No more room */
382           break;
383         case EILSEQ:
384           /* Illegal multibyte sequence */
385           if (allow_bad_conv)
386             goto use_as_is;
387 
388           return (size_t) -1;
389         default:
390           /* unknown error */
391           return (size_t) -1;
392         }
393     }
394   return destlen - o_len;
395 
396 use_as_is:
397 
398   /*
399    * Conversion not supported. This is actually an error, but there are so
400    * many misconfigured iconv systems and smb.conf's out there we can't just
401    * fail. Do a very bad conversion instead.... JRA.
402    */
403 
404   {
405     if (o_len == 0 || i_len == 0)
406       return destlen - o_len;
407 
408     if (((from == CH_UTF16LE) || (from == CH_UTF16BE))
409         && ((to != CH_UTF16LE) && (to != CH_UTF16BE)))
410       {
411         /* Can't convert from utf16 any endian to multibyte.
412            Replace with the default fail char.
413          */
414         if (i_len < 2)
415           return destlen - o_len;
416         if (i_len >= 2)
417           {
418             *outbuf = lp_failed_convert_char_ntlmssp ();
419 
420             outbuf++;
421             o_len--;
422 
423             inbuf += 2;
424             i_len -= 2;
425           }
426 
427         if (o_len == 0 || i_len == 0)
428           return destlen - o_len;
429 
430         /* Keep trying with the next char... */
431         goto again;
432       }
433     else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE)
434       {
435         /* Can't convert to UTF16LE - just widen by adding the
436            default fail char then zero.
437          */
438         if (o_len < 2)
439           return destlen - o_len;
440 
441         outbuf[0] = lp_failed_convert_char_ntlmssp ();
442         outbuf[1] = '\0';
443 
444         inbuf++;
445         i_len--;
446 
447         outbuf += 2;
448         o_len -= 2;
449 
450         if (o_len == 0 || i_len == 0)
451           return destlen - o_len;
452 
453         /* Keep trying with the next char... */
454         goto again;
455       }
456     else if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE
457              && to != CH_UTF16BE)
458       {
459         /* Failed multibyte to multibyte. Just copy the default fail char and
460            try again. */
461         outbuf[0] = lp_failed_convert_char_ntlmssp ();
462 
463         inbuf++;
464         i_len--;
465 
466         outbuf++;
467         o_len--;
468 
469         if (o_len == 0 || i_len == 0)
470           return destlen - o_len;
471 
472         /* Keep trying with the next char... */
473         goto again;
474       }
475     else
476       {
477         /* Keep compiler happy.... */
478         return destlen - o_len;
479       }
480   }
481 }
482 
483 /**
484  * Convert string from one encoding to another, making error checking etc
485  * Fast path version - handles ASCII first.
486  *
487  * @param src pointer to source string (multibyte or singlebyte)
488  * @param srclen length of the source string in bytes, or -1 for nul terminated.
489  * @param dest pointer to destination string (multibyte or singlebyte)
490  * @param destlen maximal length allowed for string - *NEVER* -1.
491  * @param allow_bad_conv determines if a "best effort" conversion is acceptable
492  *(never returns errors)
493  * @returns the number of bytes occupied in the destination
494  *
495  * Ensure the srclen contains the terminating zero.
496  *
497  * This function has been hand-tuned to provide a fast path.
498  * Don't change unless you really know what you are doing. JRA.
499  **/
500 
501 size_t
convert_string_ntlmssp(charset_t from,charset_t to,void const * src,size_t srclen,void * dest,size_t destlen,bool allow_bad_conv)502 convert_string_ntlmssp (charset_t from, charset_t to, void const *src,
503                         size_t srclen, void *dest, size_t destlen,
504                         bool allow_bad_conv)
505 {
506   /*
507    * NB. We deliberately don't do a strlen here if srclen == -1.
508    * This is very expensive over millions of calls and is taken
509    * care of in the slow path in convert_string_internal. JRA.
510    */
511 
512   if (srclen == 0)
513     return 0;
514 
515   if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE
516       && to != CH_UTF16BE)
517     {
518       const unsigned char *p = (const unsigned char *) src;
519       unsigned char *q = (unsigned char *) dest;
520       size_t slen = srclen;
521       size_t dlen = destlen;
522       unsigned char lastp = '\0';
523       size_t retval = 0;
524 
525       /* If all characters are ascii, fast path here. */
526       while (slen && dlen)
527         {
528           if ((lastp = *p) <= 0x7f)
529             {
530               *q++ = *p++;
531               if (slen != (size_t) -1)
532                 {
533                   slen--;
534                 }
535               dlen--;
536               retval++;
537               if (!lastp)
538                 break;
539             }
540           else
541             {
542 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
543               goto general_case;
544 #else
545               size_t ret = convert_string_internal_ntlmssp (
546                 from, to, p, slen, q, dlen, allow_bad_conv);
547               if (ret == (size_t) -1)
548                 {
549                   return ret;
550                 }
551               return retval + ret;
552 #endif
553             }
554         }
555       if (!dlen)
556         {
557           /* Even if we fast path we should note if we ran out of room. */
558           if (((slen != (size_t) -1) && slen)
559               || ((slen == (size_t) -1) && lastp))
560             {
561               errno = E2BIG;
562             }
563         }
564       return retval;
565     }
566   else if (from == CH_UTF16LE && to != CH_UTF16LE)
567     {
568       const unsigned char *p = (const unsigned char *) src;
569       unsigned char *q = (unsigned char *) dest;
570       size_t retval = 0;
571       size_t slen = srclen;
572       size_t dlen = destlen;
573       unsigned char lastp = '\0';
574 
575       /* If all characters are ascii, fast path here. */
576       while (((slen == (size_t) -1) || (slen >= 2)) && dlen)
577         {
578           if (((lastp = *p) <= 0x7f) && (p[1] == 0))
579             {
580               *q++ = *p;
581               if (slen != (size_t) -1)
582                 {
583                   slen -= 2;
584                 }
585               p += 2;
586               dlen--;
587               retval++;
588               if (!lastp)
589                 break;
590             }
591           else
592             {
593 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
594               goto general_case;
595 #else
596               return retval
597                      + convert_string_internal_ntlmssp (from, to, p, slen, q,
598                                                         dlen, allow_bad_conv);
599 #endif
600             }
601         }
602       if (!dlen)
603         {
604           /* Even if we fast path we should note if we ran out of room. */
605           if (((slen != (size_t) -1) && slen)
606               || ((slen == (size_t) -1) && lastp))
607             {
608               errno = E2BIG;
609             }
610         }
611       return retval;
612     }
613   else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE)
614     {
615       const unsigned char *p = (const unsigned char *) src;
616       unsigned char *q = (unsigned char *) dest;
617       size_t retval = 0;
618       size_t slen = srclen;
619       size_t dlen = destlen;
620       unsigned char lastp = '\0';
621 
622       /* If all characters are ascii, fast path here. */
623       while (slen && (dlen >= 2))
624         {
625           if ((lastp = *p) <= 0x7F)
626             {
627               *q++ = *p++;
628               *q++ = '\0';
629               if (slen != (size_t) -1)
630                 {
631                   slen--;
632                 }
633               dlen -= 2;
634               retval += 2;
635               if (!lastp)
636                 break;
637             }
638           else
639             {
640 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
641               goto general_case;
642 #else
643               return retval
644                      + convert_string_internal_ntlmssp (from, to, p, slen, q,
645                                                         dlen, allow_bad_conv);
646 #endif
647             }
648         }
649       if (!dlen)
650         {
651           /* Even if we fast path we should note if we ran out of room. */
652           if (((slen != (size_t) -1) && slen)
653               || ((slen == (size_t) -1) && lastp))
654             {
655               errno = E2BIG;
656             }
657         }
658       return retval;
659     }
660 
661 #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
662 general_case:
663 #endif
664   return convert_string_internal_ntlmssp (from, to, src, srclen, dest, destlen,
665                                           allow_bad_conv);
666 }
667