xref: /reactos/sdk/tools/unicode/string.c (revision 8a978a17)
1 /*
2  * Unicode string manipulation functions
3  *
4  * Copyright 2000 Alexandre Julliard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20 
21 #include <assert.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdio.h>
25 
26 #include "wine/unicode.h"
27 
28 #ifdef __REACTOS__
29 #define min(a,b) (((a) < (b)) ? (a) : (b))
30 #endif
31 
32 int strcmpiW( const WCHAR *str1, const WCHAR *str2 )
33 {
34     for (;;)
35     {
36         int ret = tolowerW(*str1) - tolowerW(*str2);
37         if (ret || !*str1) return ret;
38         str1++;
39         str2++;
40     }
41 }
42 
43 int strncmpiW( const WCHAR *str1, const WCHAR *str2, int n )
44 {
45     int ret = 0;
46     for ( ; n > 0; n--, str1++, str2++)
47         if ((ret = tolowerW(*str1) - tolowerW(*str2)) || !*str1) break;
48     return ret;
49 }
50 
51 int memicmpW( const WCHAR *str1, const WCHAR *str2, int n )
52 {
53     int ret = 0;
54     for ( ; n > 0; n--, str1++, str2++)
55         if ((ret = tolowerW(*str1) - tolowerW(*str2))) break;
56     return ret;
57 }
58 
59 WCHAR *strstrW( const WCHAR *str, const WCHAR *sub )
60 {
61     while (*str)
62     {
63         const WCHAR *p1 = str, *p2 = sub;
64         while (*p1 && *p2 && *p1 == *p2) { p1++; p2++; }
65         if (!*p2) return (WCHAR *)str;
66         str++;
67     }
68     return NULL;
69 }
70 
71 /* strtolW and strtoulW implementation based on the GNU C library code */
72 /* Copyright (C) 1991,92,94,95,96,97,98,99,2000,2001 Free Software Foundation, Inc. */
73 
74 long int strtolW( const WCHAR *nptr, WCHAR **endptr, int base )
75 {
76   int negative;
77   register unsigned long int cutoff;
78   register unsigned int cutlim;
79   register unsigned long int i;
80   register const WCHAR *s;
81   register WCHAR c;
82   const WCHAR *save, *end;
83   int overflow;
84 
85   if (base < 0 || base == 1 || base > 36) return 0;
86 
87   save = s = nptr;
88 
89   /* Skip white space.  */
90   while (isspaceW (*s))
91     ++s;
92   if (!*s) goto noconv;
93 
94   /* Check for a sign.  */
95   negative = 0;
96   if (*s == '-')
97     {
98       negative = 1;
99       ++s;
100     }
101   else if (*s == '+')
102     ++s;
103 
104   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
105   if (*s == '0')
106     {
107       if ((base == 0 || base == 16) && toupperW(s[1]) == 'X')
108 	{
109 	  s += 2;
110 	  base = 16;
111 	}
112       else if (base == 0)
113 	base = 8;
114     }
115   else if (base == 0)
116     base = 10;
117 
118   /* Save the pointer so we can check later if anything happened.  */
119   save = s;
120   end = NULL;
121 
122   cutoff = ULONG_MAX / (unsigned long int) base;
123   cutlim = ULONG_MAX % (unsigned long int) base;
124 
125   overflow = 0;
126   i = 0;
127   c = *s;
128   for (;c != '\0'; c = *++s)
129   {
130       if (s == end)
131           break;
132       if (c >= '0' && c <= '9')
133           c -= '0';
134       else if (isalphaW (c))
135           c = toupperW (c) - 'A' + 10;
136       else
137           break;
138       if ((int) c >= base)
139           break;
140       /* Check for overflow.  */
141       if (i > cutoff || (i == cutoff && c > cutlim))
142           overflow = 1;
143       else
144       {
145           i *= (unsigned long int) base;
146           i += c;
147       }
148   }
149 
150   /* Check if anything actually happened.  */
151   if (s == save)
152     goto noconv;
153 
154   /* Store in ENDPTR the address of one character
155      past the last character we converted.  */
156   if (endptr != NULL)
157     *endptr = (WCHAR *)s;
158 
159   /* Check for a value that is within the range of
160      `unsigned LONG int', but outside the range of `LONG int'.  */
161   if (overflow == 0
162       && i > (negative
163 	      ? -((unsigned long int) (LONG_MIN + 1)) + 1
164 	      : (unsigned long int) LONG_MAX))
165     overflow = 1;
166 
167   if (overflow)
168     {
169       errno = ERANGE;
170       return negative ? LONG_MIN : LONG_MAX;
171     }
172 
173   /* Return the result of the appropriate sign.  */
174   return negative ? -i : i;
175 
176 noconv:
177   /* We must handle a special case here: the base is 0 or 16 and the
178      first two characters are '0' and 'x', but the rest are not
179      hexadecimal digits.  This is no error case.  We return 0 and
180      ENDPTR points to the `x`.  */
181   if (endptr != NULL)
182     {
183       if (save - nptr >= 2 && toupperW (save[-1]) == 'X'
184 	  && save[-2] == '0')
185 	*endptr = (WCHAR *)&save[-1];
186       else
187 	/*  There was no number to convert.  */
188 	*endptr = (WCHAR *)nptr;
189     }
190 
191   return 0L;
192 }
193 
194 
195 unsigned long int strtoulW( const WCHAR *nptr, WCHAR **endptr, int base )
196 {
197   int negative;
198   register unsigned long int cutoff;
199   register unsigned int cutlim;
200   register unsigned long int i;
201   register const WCHAR *s;
202   register WCHAR c;
203   const WCHAR *save, *end;
204   int overflow;
205 
206   if (base < 0 || base == 1 || base > 36) return 0;
207 
208   save = s = nptr;
209 
210   /* Skip white space.  */
211   while (isspaceW (*s))
212     ++s;
213   if (!*s) goto noconv;
214 
215   /* Check for a sign.  */
216   negative = 0;
217   if (*s == '-')
218     {
219       negative = 1;
220       ++s;
221     }
222   else if (*s == '+')
223     ++s;
224 
225   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
226   if (*s == '0')
227     {
228       if ((base == 0 || base == 16) && toupperW(s[1]) == 'X')
229 	{
230 	  s += 2;
231 	  base = 16;
232 	}
233       else if (base == 0)
234 	base = 8;
235     }
236   else if (base == 0)
237     base = 10;
238 
239   /* Save the pointer so we can check later if anything happened.  */
240   save = s;
241   end = NULL;
242 
243   cutoff = ULONG_MAX / (unsigned long int) base;
244   cutlim = ULONG_MAX % (unsigned long int) base;
245 
246   overflow = 0;
247   i = 0;
248   c = *s;
249   for (;c != '\0'; c = *++s)
250   {
251       if (s == end)
252           break;
253       if (c >= '0' && c <= '9')
254           c -= '0';
255       else if (isalphaW (c))
256           c = toupperW (c) - 'A' + 10;
257       else
258           break;
259       if ((int) c >= base)
260           break;
261       /* Check for overflow.  */
262       if (i > cutoff || (i == cutoff && c > cutlim))
263           overflow = 1;
264       else
265       {
266           i *= (unsigned long int) base;
267           i += c;
268       }
269   }
270 
271   /* Check if anything actually happened.  */
272   if (s == save)
273     goto noconv;
274 
275   /* Store in ENDPTR the address of one character
276      past the last character we converted.  */
277   if (endptr != NULL)
278     *endptr = (WCHAR *)s;
279 
280   if (overflow)
281     {
282       errno = ERANGE;
283       return ULONG_MAX;
284     }
285 
286   /* Return the result of the appropriate sign.  */
287   return negative ? -i : i;
288 
289 noconv:
290   /* We must handle a special case here: the base is 0 or 16 and the
291      first two characters are '0' and 'x', but the rest are not
292      hexadecimal digits.  This is no error case.  We return 0 and
293      ENDPTR points to the `x`.  */
294   if (endptr != NULL)
295     {
296       if (save - nptr >= 2 && toupperW (save[-1]) == 'X'
297 	  && save[-2] == '0')
298 	*endptr = (WCHAR *)&save[-1];
299       else
300 	/*  There was no number to convert.  */
301 	*endptr = (WCHAR *)nptr;
302     }
303 
304   return 0L;
305 }
306 
307 
308 /* format a WCHAR string according to a printf format; helper for vsnprintfW */
309 static size_t format_string( WCHAR *buffer, size_t len, const char *format, const WCHAR *str, int str_len )
310 {
311     size_t count = 0;
312     int i, left_align = 0, width = 0, max = 0;
313 
314     assert( *format == '%' );
315     format++; /* skip '%' */
316 
317     while (*format == '0' || *format == '+' || *format == '-' || *format == ' ' || *format == '#')
318     {
319         if (*format == '-') left_align = 1;
320         format++;
321     }
322 
323     while (isdigit(*format)) width = width * 10 + *format++ - '0';
324 
325     if (str_len == -1) str_len = strlenW( str );
326     if (*format == '.')
327     {
328         format++;
329         while (isdigit(*format)) max = max * 10 + *format++ - '0';
330         if (max > str_len) max = str_len;
331     }
332     else max = str_len;
333 
334     if (*format == 'h' || *format == 'l') format++;
335 
336     assert( *format == 's' );
337 
338     if (!left_align && width > max)
339     {
340         for (i = 0; i < width - max; i++)
341         {
342             if (count++ < len)
343                 *buffer++ = ' ';
344         }
345     }
346 
347     if (count < len)
348         memcpy( buffer, str, min( max, len - count ) * sizeof(WCHAR) );
349     count += max;
350     buffer += max;
351 
352     if (left_align && width > max)
353     {
354         for (i = 0; i < width - max; i++)
355         {
356             if (count++ < len)
357                 *buffer++ = ' ';
358         }
359     }
360     return count;
361 }
362 
363 int vsnprintfW(WCHAR *str, size_t len, const WCHAR *format, va_list valist)
364 {
365     unsigned int written = 0;
366     const WCHAR *iter = format;
367     char bufa[512], fmtbufa[64], *fmta;
368 
369     while (*iter)
370     {
371         while (*iter && *iter != '%')
372         {
373             if (written++ < len)
374                 *str++ = *iter;
375             iter++;
376         }
377         if (*iter == '%')
378         {
379             if (iter[1] == '%')
380             {
381                 if (written++ < len)
382                     *str++ = '%'; /* "%%"->'%' */
383                 iter += 2;
384                 continue;
385             }
386 
387             fmta = fmtbufa;
388             *fmta++ = *iter++;
389             while (*iter == '0' ||
390                    *iter == '+' ||
391                    *iter == '-' ||
392                    *iter == ' ' ||
393                    *iter == '*' ||
394                    *iter == '#')
395             {
396                 if (*iter == '*')
397                 {
398                     char *buffiter = bufa;
399                     int fieldlen = va_arg(valist, int);
400                     sprintf(buffiter, "%d", fieldlen);
401                     while (*buffiter)
402                         *fmta++ = *buffiter++;
403                 }
404                 else
405                     *fmta++ = *iter;
406                 iter++;
407             }
408 
409             while (isdigit(*iter))
410                 *fmta++ = *iter++;
411 
412             if (*iter == '.')
413             {
414                 *fmta++ = *iter++;
415                 if (*iter == '*')
416                 {
417                     char *buffiter = bufa;
418                     int fieldlen = va_arg(valist, int);
419                     sprintf(buffiter, "%d", fieldlen);
420                     while (*buffiter)
421                         *fmta++ = *buffiter++;
422                     iter++;
423                 }
424                 else
425                     while (isdigit(*iter))
426                         *fmta++ = *iter++;
427             }
428             if (*iter == 'h' || *iter == 'l')
429                 *fmta++ = *iter++;
430 
431             switch (*iter)
432             {
433             case 's':
434             {
435                 static const WCHAR none[] = { '(','n','u','l','l',')',0 };
436                 const WCHAR *wstr = va_arg(valist, const WCHAR *);
437                 size_t remaining = written < len ? len - written : 0;
438                 size_t count;
439 
440                 *fmta++ = 's';
441                 *fmta = 0;
442                 count = format_string( str, remaining, fmtbufa, wstr ? wstr : none, -1 );
443                 str += min( count, remaining );
444                 written += count;
445                 iter++;
446                 break;
447             }
448 
449             case 'c':
450             {
451                 WCHAR wstr;
452                 size_t remaining = written < len ? len - written : 0;
453                 size_t count;
454 
455                 wstr = va_arg(valist, int);
456                 *fmta++ = 's';
457                 *fmta = 0;
458                 count = format_string( str, remaining, fmtbufa, &wstr, 1 );
459                 str += min( count, remaining );
460                 written += count;
461                 iter++;
462                 break;
463             }
464 
465             default:
466             {
467                 /* For non wc types, use system sprintf and append to wide char output */
468                 /* FIXME: for unrecognised types, should ignore % when printing */
469                 char *bufaiter = bufa;
470                 if (*iter == 'p')
471 #ifdef __REACTOS__
472                     sprintf(bufaiter, "%p", va_arg(valist, void*));
473 #else
474                     sprintf(bufaiter, "%0*lX", 2 * (int)sizeof(void*),
475                             (unsigned long)va_arg(valist, void *));
476 #endif
477                 else
478                 {
479                     *fmta++ = *iter;
480                     *fmta = '\0';
481                     if (*iter == 'a' || *iter == 'A' ||
482                         *iter == 'e' || *iter == 'E' ||
483                         *iter == 'f' || *iter == 'F' ||
484                         *iter == 'g' || *iter == 'G')
485                         sprintf(bufaiter, fmtbufa, va_arg(valist, double));
486                     else
487                     {
488                         /* FIXME: On 32 bit systems this doesn't handle int 64's. */
489                         sprintf(bufaiter, fmtbufa, va_arg(valist, void *));
490                     }
491                 }
492                 while (*bufaiter)
493                 {
494                     if (written++ < len)
495                         *str++ = *bufaiter;
496                     bufaiter++;
497                 }
498                 iter++;
499                 break;
500             }
501             }
502         }
503     }
504     if (len)
505     {
506         if (written >= len)
507             str--;
508         *str++ = 0;
509     }
510 
511     /* FIXME: POSIX [v]snprintf() returns the equivalent of written, not -1, on short buffer. */
512     return written < len ? (int)written : -1;
513 }
514 
515 int vsprintfW( WCHAR *str, const WCHAR *format, va_list valist )
516 {
517     return vsnprintfW( str, INT_MAX, format, valist );
518 }
519 
520 int snprintfW( WCHAR *str, size_t len, const WCHAR *format, ...)
521 {
522     int retval;
523     va_list valist;
524     va_start(valist, format);
525     retval = vsnprintfW(str, len, format, valist);
526     va_end(valist);
527     return retval;
528 }
529 
530 int sprintfW( WCHAR *str, const WCHAR *format, ...)
531 {
532     int retval;
533     va_list valist;
534     va_start(valist, format);
535     retval = vsnprintfW(str, INT_MAX, format, valist);
536     va_end(valist);
537     return retval;
538 }
539