xref: /reactos/sdk/tools/unicode/string.c (revision cc439606)
1 /*
2  * Unicode string manipulation functions
3  *
4  * Copyright 2000 Alexandre Julliard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20 
21 #include <assert.h>
22 #include <errno.h>
23 #include <limits.h>
24 #include <stdio.h>
25 
26 #define WINE_UNICODE_INLINE  /* nothing */
27 #include "wine/unicode.h"
28 
29 #ifdef __REACTOS__
30 #define min(a,b) (((a) < (b)) ? (a) : (b))
31 #endif
32 
33 int strcmpiW( const WCHAR *str1, const WCHAR *str2 )
34 {
35     for (;;)
36     {
37         int ret = tolowerW(*str1) - tolowerW(*str2);
38         if (ret || !*str1) return ret;
39         str1++;
40         str2++;
41     }
42 }
43 
44 int strncmpiW( const WCHAR *str1, const WCHAR *str2, int n )
45 {
46     int ret = 0;
47     for ( ; n > 0; n--, str1++, str2++)
48         if ((ret = tolowerW(*str1) - tolowerW(*str2)) || !*str1) break;
49     return ret;
50 }
51 
52 int memicmpW( const WCHAR *str1, const WCHAR *str2, int n )
53 {
54     int ret = 0;
55     for ( ; n > 0; n--, str1++, str2++)
56         if ((ret = tolowerW(*str1) - tolowerW(*str2))) break;
57     return ret;
58 }
59 
60 WCHAR *strstrW( const WCHAR *str, const WCHAR *sub )
61 {
62     while (*str)
63     {
64         const WCHAR *p1 = str, *p2 = sub;
65         while (*p1 && *p2 && *p1 == *p2) { p1++; p2++; }
66         if (!*p2) return (WCHAR *)str;
67         str++;
68     }
69     return NULL;
70 }
71 
72 /* strtolW and strtoulW implementation based on the GNU C library code */
73 /* Copyright (C) 1991,92,94,95,96,97,98,99,2000,2001 Free Software Foundation, Inc. */
74 
75 long int strtolW( const WCHAR *nptr, WCHAR **endptr, int base )
76 {
77   int negative;
78   register unsigned long int cutoff;
79   register unsigned int cutlim;
80   register unsigned long int i;
81   register const WCHAR *s;
82   register WCHAR c;
83   const WCHAR *save, *end;
84   int overflow;
85 
86   if (base < 0 || base == 1 || base > 36) return 0;
87 
88   save = s = nptr;
89 
90   /* Skip white space.  */
91   while (isspaceW (*s))
92     ++s;
93   if (!*s) goto noconv;
94 
95   /* Check for a sign.  */
96   negative = 0;
97   if (*s == '-')
98     {
99       negative = 1;
100       ++s;
101     }
102   else if (*s == '+')
103     ++s;
104 
105   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
106   if (*s == '0')
107     {
108       if ((base == 0 || base == 16) && toupperW(s[1]) == 'X')
109 	{
110 	  s += 2;
111 	  base = 16;
112 	}
113       else if (base == 0)
114 	base = 8;
115     }
116   else if (base == 0)
117     base = 10;
118 
119   /* Save the pointer so we can check later if anything happened.  */
120   save = s;
121   end = NULL;
122 
123   cutoff = ULONG_MAX / (unsigned long int) base;
124   cutlim = ULONG_MAX % (unsigned long int) base;
125 
126   overflow = 0;
127   i = 0;
128   c = *s;
129   for (;c != '\0'; c = *++s)
130   {
131       if (s == end)
132           break;
133       if (c >= '0' && c <= '9')
134           c -= '0';
135       else if (isalphaW (c))
136           c = toupperW (c) - 'A' + 10;
137       else
138           break;
139       if ((int) c >= base)
140           break;
141       /* Check for overflow.  */
142       if (i > cutoff || (i == cutoff && c > cutlim))
143           overflow = 1;
144       else
145       {
146           i *= (unsigned long int) base;
147           i += c;
148       }
149   }
150 
151   /* Check if anything actually happened.  */
152   if (s == save)
153     goto noconv;
154 
155   /* Store in ENDPTR the address of one character
156      past the last character we converted.  */
157   if (endptr != NULL)
158     *endptr = (WCHAR *)s;
159 
160   /* Check for a value that is within the range of
161      `unsigned LONG int', but outside the range of `LONG int'.  */
162   if (overflow == 0
163       && i > (negative
164 	      ? -((unsigned long int) (LONG_MIN + 1)) + 1
165 	      : (unsigned long int) LONG_MAX))
166     overflow = 1;
167 
168   if (overflow)
169     {
170       errno = ERANGE;
171       return negative ? LONG_MIN : LONG_MAX;
172     }
173 
174   /* Return the result of the appropriate sign.  */
175   return negative ? -i : i;
176 
177 noconv:
178   /* We must handle a special case here: the base is 0 or 16 and the
179      first two characters are '0' and 'x', but the rest are not
180      hexadecimal digits.  This is no error case.  We return 0 and
181      ENDPTR points to the `x`.  */
182   if (endptr != NULL)
183     {
184       if (save - nptr >= 2 && toupperW (save[-1]) == 'X'
185 	  && save[-2] == '0')
186 	*endptr = (WCHAR *)&save[-1];
187       else
188 	/*  There was no number to convert.  */
189 	*endptr = (WCHAR *)nptr;
190     }
191 
192   return 0L;
193 }
194 
195 
196 unsigned long int strtoulW( const WCHAR *nptr, WCHAR **endptr, int base )
197 {
198   int negative;
199   register unsigned long int cutoff;
200   register unsigned int cutlim;
201   register unsigned long int i;
202   register const WCHAR *s;
203   register WCHAR c;
204   const WCHAR *save, *end;
205   int overflow;
206 
207   if (base < 0 || base == 1 || base > 36) return 0;
208 
209   save = s = nptr;
210 
211   /* Skip white space.  */
212   while (isspaceW (*s))
213     ++s;
214   if (!*s) goto noconv;
215 
216   /* Check for a sign.  */
217   negative = 0;
218   if (*s == '-')
219     {
220       negative = 1;
221       ++s;
222     }
223   else if (*s == '+')
224     ++s;
225 
226   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
227   if (*s == '0')
228     {
229       if ((base == 0 || base == 16) && toupperW(s[1]) == 'X')
230 	{
231 	  s += 2;
232 	  base = 16;
233 	}
234       else if (base == 0)
235 	base = 8;
236     }
237   else if (base == 0)
238     base = 10;
239 
240   /* Save the pointer so we can check later if anything happened.  */
241   save = s;
242   end = NULL;
243 
244   cutoff = ULONG_MAX / (unsigned long int) base;
245   cutlim = ULONG_MAX % (unsigned long int) base;
246 
247   overflow = 0;
248   i = 0;
249   c = *s;
250   for (;c != '\0'; c = *++s)
251   {
252       if (s == end)
253           break;
254       if (c >= '0' && c <= '9')
255           c -= '0';
256       else if (isalphaW (c))
257           c = toupperW (c) - 'A' + 10;
258       else
259           break;
260       if ((int) c >= base)
261           break;
262       /* Check for overflow.  */
263       if (i > cutoff || (i == cutoff && c > cutlim))
264           overflow = 1;
265       else
266       {
267           i *= (unsigned long int) base;
268           i += c;
269       }
270   }
271 
272   /* Check if anything actually happened.  */
273   if (s == save)
274     goto noconv;
275 
276   /* Store in ENDPTR the address of one character
277      past the last character we converted.  */
278   if (endptr != NULL)
279     *endptr = (WCHAR *)s;
280 
281   if (overflow)
282     {
283       errno = ERANGE;
284       return ULONG_MAX;
285     }
286 
287   /* Return the result of the appropriate sign.  */
288   return negative ? -i : i;
289 
290 noconv:
291   /* We must handle a special case here: the base is 0 or 16 and the
292      first two characters are '0' and 'x', but the rest are not
293      hexadecimal digits.  This is no error case.  We return 0 and
294      ENDPTR points to the `x`.  */
295   if (endptr != NULL)
296     {
297       if (save - nptr >= 2 && toupperW (save[-1]) == 'X'
298 	  && save[-2] == '0')
299 	*endptr = (WCHAR *)&save[-1];
300       else
301 	/*  There was no number to convert.  */
302 	*endptr = (WCHAR *)nptr;
303     }
304 
305   return 0L;
306 }
307 
308 
309 /* format a WCHAR string according to a printf format; helper for vsnprintfW */
310 static size_t format_string( WCHAR *buffer, size_t len, const char *format, const WCHAR *str, int str_len )
311 {
312     size_t count = 0;
313     int i, left_align = 0, width = 0, max = 0;
314 
315     assert( *format == '%' );
316     format++; /* skip '%' */
317 
318     while (*format == '0' || *format == '+' || *format == '-' || *format == ' ' || *format == '#')
319     {
320         if (*format == '-') left_align = 1;
321         format++;
322     }
323 
324     while (isdigit(*format)) width = width * 10 + *format++ - '0';
325 
326     if (str_len == -1) str_len = strlenW( str );
327     if (*format == '.')
328     {
329         format++;
330         while (isdigit(*format)) max = max * 10 + *format++ - '0';
331         if (max > str_len) max = str_len;
332     }
333     else max = str_len;
334 
335     if (*format == 'h' || *format == 'l') format++;
336 
337     assert( *format == 's' );
338 
339     if (!left_align && width > max)
340     {
341         for (i = 0; i < width - max; i++)
342         {
343             if (count++ < len)
344                 *buffer++ = ' ';
345         }
346     }
347 
348     if (count < len)
349         memcpy( buffer, str, min( max, len - count ) * sizeof(WCHAR) );
350     count += max;
351     buffer += max;
352 
353     if (left_align && width > max)
354     {
355         for (i = 0; i < width - max; i++)
356         {
357             if (count++ < len)
358                 *buffer++ = ' ';
359         }
360     }
361     return count;
362 }
363 
364 int vsnprintfW(WCHAR *str, size_t len, const WCHAR *format, va_list valist)
365 {
366     unsigned int written = 0;
367     const WCHAR *iter = format;
368     char bufa[512], fmtbufa[64], *fmta;
369 
370     while (*iter)
371     {
372         while (*iter && *iter != '%')
373         {
374             if (written++ < len)
375                 *str++ = *iter;
376             iter++;
377         }
378         if (*iter == '%')
379         {
380             if (iter[1] == '%')
381             {
382                 if (written++ < len)
383                     *str++ = '%'; /* "%%"->'%' */
384                 iter += 2;
385                 continue;
386             }
387 
388             fmta = fmtbufa;
389             *fmta++ = *iter++;
390             while (*iter == '0' ||
391                    *iter == '+' ||
392                    *iter == '-' ||
393                    *iter == ' ' ||
394                    *iter == '*' ||
395                    *iter == '#')
396             {
397                 if (*iter == '*')
398                 {
399                     char *buffiter = bufa;
400                     int fieldlen = va_arg(valist, int);
401                     sprintf(buffiter, "%d", fieldlen);
402                     while (*buffiter)
403                         *fmta++ = *buffiter++;
404                 }
405                 else
406                     *fmta++ = *iter;
407                 iter++;
408             }
409 
410             while (isdigit(*iter))
411                 *fmta++ = *iter++;
412 
413             if (*iter == '.')
414             {
415                 *fmta++ = *iter++;
416                 if (*iter == '*')
417                 {
418                     char *buffiter = bufa;
419                     int fieldlen = va_arg(valist, int);
420                     sprintf(buffiter, "%d", fieldlen);
421                     while (*buffiter)
422                         *fmta++ = *buffiter++;
423                     iter++;
424                 }
425                 else
426                     while (isdigit(*iter))
427                         *fmta++ = *iter++;
428             }
429             if (*iter == 'h' || *iter == 'l')
430                 *fmta++ = *iter++;
431 
432             switch (*iter)
433             {
434             case 's':
435             {
436                 static const WCHAR none[] = { '(','n','u','l','l',')',0 };
437                 const WCHAR *wstr = va_arg(valist, const WCHAR *);
438                 size_t remaining = written < len ? len - written : 0;
439                 size_t count;
440 
441                 *fmta++ = 's';
442                 *fmta = 0;
443                 count = format_string( str, remaining, fmtbufa, wstr ? wstr : none, -1 );
444                 str += min( count, remaining );
445                 written += count;
446                 iter++;
447                 break;
448             }
449 
450             case 'c':
451             {
452                 WCHAR wstr;
453                 size_t remaining = written < len ? len - written : 0;
454                 size_t count;
455 
456                 wstr = va_arg(valist, int);
457                 *fmta++ = 's';
458                 *fmta = 0;
459                 count = format_string( str, remaining, fmtbufa, &wstr, 1 );
460                 str += min( count, remaining );
461                 written += count;
462                 iter++;
463                 break;
464             }
465 
466             default:
467             {
468                 /* For non wc types, use system sprintf and append to wide char output */
469                 /* FIXME: for unrecognised types, should ignore % when printing */
470                 char *bufaiter = bufa;
471                 if (*iter == 'p')
472                     sprintf(bufaiter, "%0*lX", 2 * (int)sizeof(void*),
473                             (unsigned long)va_arg(valist, void *));
474                 else
475                 {
476                     *fmta++ = *iter;
477                     *fmta = '\0';
478                     if (*iter == 'a' || *iter == 'A' ||
479                         *iter == 'e' || *iter == 'E' ||
480                         *iter == 'f' || *iter == 'F' ||
481                         *iter == 'g' || *iter == 'G')
482                         sprintf(bufaiter, fmtbufa, va_arg(valist, double));
483                     else
484                     {
485                         /* FIXME: On 32 bit systems this doesn't handle int 64's. */
486                         sprintf(bufaiter, fmtbufa, va_arg(valist, void *));
487                     }
488                 }
489                 while (*bufaiter)
490                 {
491                     if (written++ < len)
492                         *str++ = *bufaiter;
493                     bufaiter++;
494                 }
495                 iter++;
496                 break;
497             }
498             }
499         }
500     }
501     if (len)
502     {
503         if (written >= len)
504             str--;
505         *str++ = 0;
506     }
507 
508     /* FIXME: POSIX [v]snprintf() returns the equivalent of written, not -1, on short buffer. */
509     return written < len ? (int)written : -1;
510 }
511 
512 int vsprintfW( WCHAR *str, const WCHAR *format, va_list valist )
513 {
514     return vsnprintfW( str, INT_MAX, format, valist );
515 }
516 
517 int snprintfW( WCHAR *str, size_t len, const WCHAR *format, ...)
518 {
519     int retval;
520     va_list valist;
521     va_start(valist, format);
522     retval = vsnprintfW(str, len, format, valist);
523     va_end(valist);
524     return retval;
525 }
526 
527 int sprintfW( WCHAR *str, const WCHAR *format, ...)
528 {
529     int retval;
530     va_list valist;
531     va_start(valist, format);
532     retval = vsnprintfW(str, INT_MAX, format, valist);
533     va_end(valist);
534     return retval;
535 }
536