1 /* sscanf function for S-Lang */
2 /* Copyright (c) 1999, 2001, 2002 John E. Davis
3  * This file is part of the S-Lang library.
4  *
5  * You may distribute under the terms of either the GNU General Public
6  * License or the Perl Artistic License.
7  */
8 
9 #include "slinclud.h"
10 #include <ctype.h>
11 #include <math.h>
12 #include <errno.h>
13 
14 #include "slang.h"
15 #include "_slang.h"
16 
skip_whitespace(char * s)17 static char *skip_whitespace (char *s)
18 {
19    while (isspace (*s))
20      s++;
21 
22    return s;
23 }
24 
init_map(unsigned char map[256],int base)25 static void init_map (unsigned char map[256], int base)
26 {
27    memset ((char *) map, 0xFF, 256);
28 
29    map['0'] = 0;   map['1'] = 1;   map['2'] = 2;   map['3'] = 3;
30    map['4'] = 4;   map['5'] = 5;   map['6'] = 6;   map['7'] = 7;
31    if (base == 8)
32      return;
33 
34    map['8'] = 8;   map['9'] = 9;
35    if (base == 10)
36      return;
37 
38    map['A'] = 10;   map['B'] = 11;   map['C'] = 12;   map['D'] = 13;
39    map['E'] = 14;   map['F'] = 15;   map['a'] = 10;   map['b'] = 11;
40    map['c'] = 12;   map['d'] = 13;   map['e'] = 14;   map['f'] = 15;
41 }
42 
get_sign(char * s,char * smax,int * sign)43 static char *get_sign (char *s, char *smax, int *sign)
44 {
45    *sign = 1;
46    if (s + 1 < smax)
47      {
48 	if (*s == '+') s++;
49 	else if (*s == '-')
50 	  {
51 	     s++;
52 	     *sign = -1;
53 	  }
54      }
55    return s;
56 }
57 
58 
parse_long(char ** sp,char * smax,long * np,long base,unsigned char map[256])59 static int parse_long (char **sp, char *smax, long *np,
60 		       long base, unsigned char map[256])
61 {
62    char *s, *s0;
63    long n;
64    int sign;
65 
66    s = s0 = get_sign (*sp, smax, &sign);
67 
68    n = 0;
69    while (s < smax)
70      {
71 	unsigned char value;
72 
73 	value = map [(unsigned char) *s];
74 	if (value == 0xFF)
75 	  break;
76 
77 	n = base * n + value;
78 	s++;
79      }
80 
81    *sp = s;
82    if (s == s0)
83      return 0;
84 
85    *np = n * sign;
86 
87    return 1;
88 }
89 
90 
parse_int(char ** sp,char * smax,int * np,long base,unsigned char map[256])91 static int parse_int (char **sp, char *smax, int *np,
92 		      long base, unsigned char map[256])
93 {
94    long n;
95    int status;
96 
97    if (1 == (status = parse_long (sp, smax, &n, base, map)))
98      *np = (int) n;
99    return status;
100 }
101 
parse_short(char ** sp,char * smax,short * np,long base,unsigned char map[256])102 static int parse_short (char **sp, char *smax, short *np,
103 			long base, unsigned char map[256])
104 {
105    long n;
106    int status;
107 
108    if (1 == (status = parse_long (sp, smax, &n, base, map)))
109      *np = (short) n;
110    return status;
111 }
112 
parse_ulong(char ** sp,char * smax,unsigned long * np,long base,unsigned char map[256])113 static int parse_ulong (char **sp, char *smax, unsigned long *np,
114 			long base, unsigned char map[256])
115 {
116    return parse_long (sp, smax, (long *) np, base, map);
117 }
118 
parse_uint(char ** sp,char * smax,unsigned int * np,long base,unsigned char map[256])119 static int parse_uint (char **sp, char *smax, unsigned int *np,
120 		       long base, unsigned char map[256])
121 {
122    return parse_int (sp, smax, (int *) np, base, map);
123 }
124 
parse_ushort(char ** sp,char * smax,unsigned short * np,long base,unsigned char map[256])125 static int parse_ushort (char **sp, char *smax, unsigned short *np,
126 			 long base, unsigned char map[256])
127 {
128    return parse_short (sp, smax, (short *) np, base, map);
129 }
130 
131 #if SLANG_HAS_FLOAT
132 /*
133  * In an ideal world, strtod would be the correct function to use.  However,
134  * there may be problems relying on this function because some systems do
135  * not support and some that do get it wrong.  So, I will handle the parsing
136  * of the string and let atof or strtod handle the arithmetic.
137  */
parse_double(char ** sp,char * smax,double * d)138 static int parse_double (char **sp, char *smax, double *d)
139 {
140    char *s, *s0;
141    int sign;
142    int expon;
143    unsigned char map[256];
144    char buf[128];
145    int has_leading_zeros;
146    char *start_pos, *sign_pos;
147    char *b, *bmax;
148 
149    start_pos = *sp;
150    s = get_sign (start_pos, smax, &sign);
151    if (s >= smax)
152      {
153 	errno = _SLerrno_errno = EINVAL;
154 	return 0;
155      }
156 
157    /* Prepare the buffer that will be passed to strtod */
158    /* Allow the exponent to be 5 significant digits: E+xxxxx\0 */
159    bmax = buf + (sizeof (buf) - 8);
160    buf[0] = '0'; buf[1] = '.';
161    b = buf + 2;
162 
163    init_map (map, 10);
164 
165    /* Skip leading 0s */
166    s0 = s;
167    while ((s < smax) && (*s == '0'))
168      s++;
169    has_leading_zeros = (s != s0);
170 
171    expon = 0;
172    while (s < smax)
173      {
174 	unsigned char value = map [(unsigned char) *s];
175 
176 	if (value == 0xFF)
177 	  break;
178 
179 	if (b < bmax)
180 	  *b++ = *s;
181 
182 	expon++;
183 	s++;
184      }
185 
186    if ((s < smax) && (*s == '.'))
187      {
188 	s++;
189 	if (b == buf + 2)	       /* nothing added yet */
190 	  {
191 	     while ((s < smax) && (*s == '0'))
192 	       {
193 		  expon--;
194 		  s++;
195 	       }
196 	  }
197 
198 	while (s < smax)
199 	  {
200 	     unsigned char value = map [(unsigned char) *s];
201 
202 	     if (value == 0xFF)
203 	       break;
204 
205 	     if (b < bmax)
206 	       *b++ = *s;
207 	     s++;
208 	  }
209      }
210 
211    if ((b == buf + 2)
212        && (has_leading_zeros == 0))
213      {
214 	*sp = start_pos;
215 	errno = EINVAL;
216 	return 0;
217      }
218 
219    if ((s + 1 < smax) && ((*s == 'E') || (*s == 'e')))
220      {
221 	int e;
222 	int esign;
223 
224 	s0 = s;
225 	s = get_sign (s + 1, smax, &esign);
226 	sign_pos = s;
227 	e = 0;
228 	while (s < smax)
229 	  {
230 	     unsigned char value = map [(unsigned char) *s];
231 	     if (value == 0xFF)
232 	       break;
233 	     if (e < 25000)	       /* avoid overflow if 16 bit */
234 	       e = 10 * e + value;
235 	     s++;
236 	  }
237 #ifdef ERANGE
238 	if (e >= 25000)
239 	  errno = ERANGE;
240 #endif
241 	if (s == sign_pos)
242 	  s = s0;		       /* ...E-X */
243 	else
244 	  {
245 	     e = esign * e;
246 	     expon += e;
247 	  }
248      }
249 
250    if (expon != 0)
251      sprintf (b, "e%d", expon);
252    else
253      *b = 0;
254 
255    *sp = s;
256 #if HAVE_STRTOD
257    *d = sign * strtod (buf, NULL);
258 #else
259    *d = sign * atof (buf);
260 #endif
261    return 1;
262 }
263 
parse_float(char ** sp,char * smax,float * d)264 static int parse_float (char **sp, char *smax, float *d)
265 {
266    double x;
267    if (1 == parse_double (sp, smax, &x))
268      {
269 	*d = (float) x;
270 	return 1;
271      }
272    return 0;
273 }
274 #endif				       /* SLANG_HAS_FLOAT */
275 
parse_string(char ** sp,char * smax,char ** str)276 static int parse_string (char **sp, char *smax, char **str)
277 {
278    char *s, *s0;
279 
280    s0 = s = *sp;
281    while (s < smax)
282      {
283 	if (isspace (*s))
284 	  break;
285 	s++;
286      }
287    if (NULL == (*str = SLang_create_nslstring (s0, (unsigned int) (s - s0))))
288      return -1;
289 
290    *sp = s;
291    return 1;
292 }
293 
parse_bstring(char ** sp,char * smax,char ** str)294 static int parse_bstring (char **sp, char *smax, char **str)
295 {
296    char *s;
297 
298    s = *sp;
299    if (NULL == (*str = SLang_create_nslstring (s, (unsigned int) (smax - s))))
300      return -1;
301 
302    *sp = smax;
303    return 1;
304 }
305 
parse_range(char ** sp,char * smax,char ** fp,char ** str)306 static int parse_range (char **sp, char *smax, char **fp, char **str)
307 {
308    char *s, *s0;
309    char *range;
310    char *f;
311    unsigned char map[256];
312    unsigned char reverse;
313 
314    /* How can one represent a range with just '^'?  The naive answer is
315     * is [^].  However, this may be interpreted as meaning any character
316     * but ']' and others.  Let's assume that the user will not use a range
317     * to match '^'.
318     */
319    f = *fp;
320    /* f is a pointer to (one char after) [...]. */
321    if (*f == '^')
322      {
323 	f++;
324 	reverse = 1;
325      }
326    else reverse = 0;
327 
328    s0 = f;
329    if (*f == ']')
330      f++;
331 
332    while (1)
333      {
334 	char ch = *f;
335 
336 	if (ch == 0)
337 	  {
338 	     SLang_verror (SL_INVALID_PARM, "Unexpected end of range in format");
339 	     return -1;
340 	  }
341 	if (ch == ']')
342 	  break;
343 	f++;
344      }
345    if (NULL == (range = SLmake_nstring (s0, (unsigned int) (f - s0))))
346      return -1;
347    *fp = f + 1;			       /* skip ] */
348 
349    SLmake_lut (map, (unsigned char *) range, reverse);
350    SLfree (range);
351 
352    s0 = s = *sp;
353    while ((s < smax) && map [(unsigned char) *s])
354      s++;
355 
356    if (NULL == (*str = SLang_create_nslstring (s0, (unsigned int) (s - s0))))
357      return -1;
358 
359    *sp = s;
360    return 1;
361 }
362 
363 
_SLang_sscanf(void)364 int _SLang_sscanf (void)
365 {
366    int num;
367    unsigned int num_refs;
368    char *format;
369    char *input_string, *input_string_max;
370    char *f, *s;
371    unsigned char map8[256], map10[256], map16[256];
372 
373    if (SLang_Num_Function_Args < 2)
374      {
375 	SLang_verror (SL_INVALID_PARM, "Int_Type sscanf (str, format, ...)");
376 	return -1;
377      }
378 
379    num_refs = (unsigned int) SLang_Num_Function_Args;
380    if (-1 == SLreverse_stack (num_refs))
381      return -1;
382    num_refs -= 2;
383 
384    if (-1 == SLang_pop_slstring (&input_string))
385      return -1;
386 
387    if (-1 == SLang_pop_slstring (&format))
388      {
389 	SLang_free_slstring (input_string);
390 	return -1;
391      }
392 
393    f = format;
394    s = input_string;
395    input_string_max = input_string + strlen (input_string);
396 
397    init_map (map8, 8);
398    init_map (map10, 10);
399    init_map (map16, 16);
400 
401    num = 0;
402 
403    while (num_refs != 0)
404      {
405 	SLang_Object_Type obj;
406 	SLang_Ref_Type *ref;
407 	char *smax;
408 	unsigned char *map;
409 	int base;
410 	int no_assign;
411 	int is_short;
412 	int is_long;
413 	int status;
414 	char chf;
415 	unsigned int width;
416 	int has_width;
417 
418 	chf = *f++;
419 
420 	if (chf == 0)
421 	  {
422 	     /* Hmmm....  what is the most useful thing to do?? */
423 #if 1
424 	     break;
425 #else
426 	     SLang_verror (SL_INVALID_PARM, "sscanf: format not big enough for output list");
427 	     goto return_error;
428 #endif
429 	  }
430 
431 	if (isspace (chf))
432 	  {
433 	     s = skip_whitespace (s);
434 	     continue;
435 	  }
436 
437 	if ((chf != '%')
438 	    || ((chf = *f++) == '%'))
439 	  {
440 	     if (*s != chf)
441 	       break;
442 	     s++;
443 	     continue;
444 	  }
445 
446 	no_assign = 0;
447 	is_short = 0;
448 	is_long = 0;
449 	width = 0;
450 	smax = input_string_max;
451 
452 	/* Look for the flag character */
453 	if (chf == '*')
454 	  {
455 	     no_assign = 1;
456 	     chf = *f++;
457 	  }
458 
459 	/* Width */
460 	has_width = isdigit (chf);
461 	if (has_width)
462 	  {
463 	     f--;
464 	     (void) parse_uint (&f, f + strlen(f), &width, 10, map10);
465 	     chf = *f++;
466 	  }
467 
468 	/* Now the type modifier */
469 	switch (chf)
470 	  {
471 	   case 'h':
472 	     is_short = 1;
473 	     chf = *f++;
474 	     break;
475 
476 	   case 'L':		       /* not implemented */
477 	   case 'l':
478 	     is_long = 1;
479 	     chf = *f++;
480 	     break;
481 	  }
482 
483 	status = -1;
484 
485 	if ((chf != 'c') && (chf != '['))
486 	  s = skip_whitespace (s);
487 
488 	if (has_width)
489 	  {
490 	     if (width > (unsigned int) (input_string_max - s))
491 	       width = (unsigned int) (input_string_max - s);
492 	     smax = s + width;
493 	  }
494 
495 	/* Now the format descriptor */
496 
497 	map = map10;
498 	base = 10;
499 
500 	try_again:		       /* used by i, x, and o, conversions */
501 	switch (chf)
502 	  {
503 	   case 0:
504 	     SLang_verror (SL_INVALID_PARM, "sscanf: Unexpected end of format");
505 	     goto return_error;
506 	   case 'D':
507 	     is_long = 1;
508 	   case 'd':
509 	     if (is_short)
510 	       {
511 		  obj.data_type = SLANG_SHORT_TYPE;
512 		  status = parse_short (&s, smax, &obj.v.short_val, base, map);
513 	       }
514 	     else if (is_long)
515 	       {
516 		  obj.data_type = SLANG_LONG_TYPE;
517 		  status = parse_long (&s, smax, &obj.v.long_val, base, map);
518 	       }
519 	     else
520 	       {
521 		  obj.data_type = SLANG_INT_TYPE;
522 		  status = parse_int (&s, smax, &obj.v.int_val, base, map);
523 	       }
524 	     break;
525 
526 
527 	   case 'U':
528 	     is_long = 1;
529 	   case 'u':
530 	     if (is_short)
531 	       {
532 		  obj.data_type = SLANG_USHORT_TYPE;
533 		  status = parse_ushort (&s, smax, &obj.v.ushort_val, base, map);
534 	       }
535 	     else if (is_long)
536 	       {
537 		  obj.data_type = SLANG_ULONG_TYPE;
538 		  status = parse_ulong (&s, smax, &obj.v.ulong_val, base, map);
539 	       }
540 	     else
541 	       {
542 		  obj.data_type = SLANG_INT_TYPE;
543 		  status = parse_uint (&s, smax, &obj.v.uint_val, base, map);
544 	       }
545 	     break;
546 
547 	   case 'I':
548 	     is_long = 1;
549 	   case 'i':
550 	     if ((s + 1 >= smax)
551 		 || (*s != 0))
552 	       chf = 'd';
553 	     else if (((s[1] == 'x') || (s[1] == 'X'))
554 		      && (s + 2 < smax))
555 	       {
556 		  s += 2;
557 		  chf = 'x';
558 	       }
559 	     else chf = 'o';
560 	     goto try_again;
561 
562 	   case 'O':
563 	     is_long = 1;
564 	   case 'o':
565 	     map = map8;
566 	     base = 8;
567 	     chf = 'd';
568 	     goto try_again;
569 
570 	   case 'X':
571 	     is_long = 1;
572 	   case 'x':
573 	     base = 16;
574 	     map = map16;
575 	     chf = 'd';
576 	     goto try_again;
577 
578 	   case 'E':
579 	   case 'F':
580 	     is_long = 1;
581 	   case 'e':
582 	   case 'f':
583 	   case 'g':
584 #if SLANG_HAS_FLOAT
585 	     if (is_long)
586 	       {
587 		  obj.data_type = SLANG_DOUBLE_TYPE;
588 		  status = parse_double (&s, smax, &obj.v.double_val);
589 	       }
590 	     else
591 	       {
592 		  obj.data_type = SLANG_FLOAT_TYPE;
593 		  status = parse_float (&s, smax, &obj.v.float_val);
594 	       }
595 #else
596 	     SLang_verror (SL_NOT_IMPLEMENTED,
597 			   "This version of the S-Lang does not support floating point");
598 	     status = -1;
599 #endif
600 	     break;
601 
602 	   case 's':
603 	     obj.data_type = SLANG_STRING_TYPE;
604 	     status = parse_string (&s, smax, &obj.v.s_val);
605 	     break;
606 
607 	   case 'c':
608 	     if (has_width == 0)
609 	       {
610 		  obj.data_type = SLANG_UCHAR_TYPE;
611 		  obj.v.uchar_val = *s++;
612 		  status = 1;
613 		  break;
614 	       }
615 	     obj.data_type = SLANG_STRING_TYPE;
616 	     status = parse_bstring (&s, smax, &obj.v.s_val);
617 	     break;
618 
619 	   case '[':
620 	     obj.data_type = SLANG_STRING_TYPE;
621 	     status = parse_range (&s, smax, &f, &obj.v.s_val);
622 	     break;
623 
624 	   case 'n':
625 	     obj.data_type = SLANG_UINT_TYPE;
626 	     obj.v.uint_val = (unsigned int) (s - input_string);
627 	     status = 1;
628 	     break;
629 
630 	   default:
631 	     status = -1;
632 	     SLang_verror (SL_NOT_IMPLEMENTED, "format specifier '%c' is not supported", chf);
633 	     break;
634 	  }
635 
636 	if (status == 0)
637 	  break;
638 
639 	if (status == -1)
640 	  goto return_error;
641 
642 	if (no_assign)
643 	  {
644 	     SLang_free_object (&obj);
645 	     continue;
646 	  }
647 
648 	if (-1 == SLang_pop_ref (&ref))
649 	  {
650 	     SLang_free_object (&obj);
651 	     goto return_error;
652 	  }
653 
654 	if (-1 == SLang_push (&obj))
655 	  {
656 	     SLang_free_object (&obj);
657 	     SLang_free_ref (ref);
658 	     goto return_error;
659 	  }
660 
661 	if (-1 == _SLang_deref_assign (ref))
662 	  {
663 	     SLang_free_ref (ref);
664 	     goto return_error;
665 	  }
666 	SLang_free_ref (ref);
667 
668 	num++;
669 	num_refs--;
670      }
671 
672    if (-1 == SLdo_pop_n (num_refs))
673      goto return_error;
674 
675    SLang_free_slstring (format);
676    SLang_free_slstring (input_string);
677    return num;
678 
679    return_error:
680    /* NULLS ok */
681    SLang_free_slstring (format);
682    SLang_free_slstring (input_string);
683    return -1;
684 }
685 
686 
687 # if SLANG_HAS_FLOAT
688 
689 #ifndef HAVE_STDLIB_H
690 /* Oh dear.  Where is the prototype for atof?  If not in stdlib, then
691  * I do not know where.  Not in math.h on some systems either.
692  */
693 extern double atof ();
694 #endif
695 
_SLang_atof(char * s)696 double _SLang_atof (char *s)
697 {
698    double x;
699 
700    s = skip_whitespace (s);
701    errno = 0;
702 
703    if (1 != parse_double (&s, s + strlen (s), &x))
704      {
705 	if ((0 == strcmp ("NaN", s))
706 	    || (0 == strcmp ("-Inf", s))
707 	    || (0 == strcmp ("Inf", s)))
708 	  return atof (s);	       /* let this deal with it */
709 #ifdef EINVAL
710 	errno = _SLerrno_errno = EINVAL;
711 #endif
712 	return 0.0;
713      }
714    if (errno)
715      _SLerrno_errno = errno;
716    return x;
717 }
718 #endif
719