1 /***********************************************************************
2  @C-file{
3     author              = "Nelson H. F. Beebe",
4     version             = "2.00",
5     date                = "10 December 2000",
6     time                = "07:53:44 MST",
7     filename            = "awklib.c",
8     address             = "Center for Scientific Computing
9 			   University of Utah
10 			   Department of Mathematics, 322 INSCC
11 			   155 S 1400 E RM 233
12 			   Salt Lake City, UT 84112-0090
13 			   USA",
14     telephone           = "+1 801 581 5254",
15     FAX                 = "+1 801 585 1640, +1 801 581 4148",
16     URL                 = "http://www.math.utah.edu/~beebe",
17     checksum            = "42645 1073 3121 29602",
18     email               = "beebe@math.utah.edu, beebe@acm.org,
19 			   beebe@computer.org, beebe@ieee.org
20 			   (Internet)",
21     codetable           = "ISO/ASCII",
22     keywords            = "awk compatibility library",
23     supported           = "yes",
24     docstring           = "This file defines a subset of the awk library
25 			   functions for use by C programs, to assist in
26 			   the manual translation of awk code to C.
27 
28 			   Provided that the caller is careful to
29 			   eventually invoke free() on every
30 			   newly-allocated string returned by any of
31 			   these primitives, awk_free_table() on
32 			   tables returned by awk_split(), and
33 			   awk_close_infile() on every open input
34 			   file, this library is designed to be
35 			   strictly free of memory leaks.
36 
37 			   The checksum field above contains a CRC-16
38 			   checksum as the first value, followed by the
39 			   equivalent of the standard UNIX wc (word
40 			   count) utility output of lines, words, and
41 			   characters.  This is produced by Robert
42 			   Solovay's checksum utility.",
43  }
44 ***********************************************************************/
45 
46 #include "awklib.h"
47 #include "regexp/regexp.h"
48 
49 static int _c;
50 #define awk_is_white(c) (_c = (c), ((_c == ' ') || (_c == '\t') || (_c == '\n')))
51 
52 #define FREE(p) (void)free((void*)(p))
53 
54 const char *FILENAME = (const char *)NULL;
55 const char *FS = (const char *)NULL;
56 const char *RS = (const char *)NULL;
57 
58 size_t FNR = 0;
59 awk_int_t RLENGTH = -1L;
60 awk_int_t RSTART = 0L;
61 
62 static int _awk_lib_initialized = 0;
63 
64 #define ENSURE_INITIALIZATION()	do { if (!_awk_lib_initialized) awk_initialize(); } while (0)
65 
66 #define OPTIONAL_SIGN_PATTERN		"[-+]?"
67 
68 #define OPTIONAL_WHITESPACE_PATTERN	"[ \t\n\r\f\v]*"
69 
70 #define EXPONENT_PATTERN		"[DdEeQq]" OPTIONAL_SIGN_PATTERN "[0-9]+"
71 
72 static const char *_awk_NaN_regexp =
73 	"^" OPTIONAL_WHITESPACE_PATTERN OPTIONAL_SIGN_PATTERN
74 	"("
75 		"[QqSs]?[Nn][Aa][Nn][QqSs]?"
76 		"|"
77 		"[?]+[.][?0]+" EXPONENT_PATTERN
78 		"|"
79 		"[?]+[.][?0]+"
80 	")" OPTIONAL_WHITESPACE_PATTERN "$" ;
81 
82 static const char *_awk_negative_infinity_regexp =
83 	"^" OPTIONAL_WHITESPACE_PATTERN
84 	"("
85 		"-[Ii][Nn][Ff]"
86 		"|"
87 		"-[Ii][Nn][Ff][Ii][Nn][Ii][Tt][Yy]"
88 		"|"
89 		"-+[.]-0+" EXPONENT_PATTERN
90 		"|"
91 		"-+[.]-0+"
92 	")"
93 	OPTIONAL_WHITESPACE_PATTERN "$" ;
94 
95 static const char *_awk_positive_infinity_regexp =
96 	"^" OPTIONAL_WHITESPACE_PATTERN
97 	"("
98 		"[+]?[Ii][Nn][Ff]"
99 		"|"
100 		"[+]?[Ii][Nn][Ff][Ii][Nn][Ii][Tt][Yy]"
101 		"|"
102 		"[+]+[.][+]0+" EXPONENT_PATTERN
103 		"|"
104 		"[+]+[.][+]0+"
105 	")"
106 	OPTIONAL_WHITESPACE_PATTERN "$" ;
107 
108 static const char *CNULL = (const char *)NULL;
109 static FILE *FNULL = (FILE *) NULL;
110 
111 static const char *_awk_concat(const char *s1, const char *s2,
112 			       const char *s3);
113 static void _awk_error(const char *s);
114 static FILE *_awk_file_to_fp(const char *filename);
115 static const char *_awk_find_char(const char *s, int c);
116 static int _awk_gsub(regexp * compiled_regexp, const char *replacement,
117 		     char **target);
118 static int _awk_match(const char *source, regexp * compiled_regexp);
119 static FILE *_awk_open_infile(const char *filename);
120 static int _awk_regmatch(const char *s, const char *regexp_pattern);
121 static const char *_awk_skip_nonwhite(const char *s);
122 static const char *_awk_skip_white(const char *s);
123 static size_t _awk_split_complex(const char *s, const char ***parts,
124 			      const char *field_separator);
125 static size_t _awk_split_regexp(const char *s, const char ***parts,
126 			     const char *field_separator);
127 static size_t _awk_split_simple(const char *s, const char ***parts);
128 static void regfree(regexp *s);
129 
130 typedef struct
131 {
132     const char *name;
133     FILE *fp;
134 }
135 filetable_t;
136 
137 #if !defined(MAXOPENFILES)
138 #define MAXOPENFILES 2
139 #endif
140 
141 #define CHUNKSIZE 256
142 
143 static filetable_t FileTable[MAXOPENFILES];
144 
145 
146 static const char *
_awk_concat(const char * s1,const char * s2,const char * s3)147 _awk_concat(const char *s1, const char *s2, const char *s3)
148 {
149     /*******************************************************************
150       Return a newly-allocated string containing the concatenation of
151       strings S1, S2, and S3.
152 
153       Terminate with a fatal error if memory cannot be allocated.
154     *******************************************************************/
155 
156     char *t;
157     size_t len1;
158     size_t len2;
159     size_t len3;
160 
161     len1 = strlen(s1);
162     len2 = strlen(s2);
163     len3 = strlen(s3);
164     t = awk_padstr(s1, len2 + len3);
165     (void)strcpy(&t[len1], s2);
166     (void)strcpy(&t[len1 + len2], s3);
167 
168     return ((const char *)t);
169 }
170 
171 
172 static void
_awk_error(const char * s)173 _awk_error(const char *s)
174 {
175     /*******************************************************************
176       Report a fatal error and terminate.
177     *******************************************************************/
178 
179     (void)fprintf(stderr, "FATAL ERROR: %s\n", s);
180     exit(EXIT_FAILURE);
181 }
182 
183 
184 static FILE *
_awk_file_to_fp(const char * filename)185 _awk_file_to_fp(const char *filename)
186 {
187     /*******************************************************************
188       Return the file pointer associated with FILENAME, or NULL if
189       none.
190     *******************************************************************/
191 
192     size_t k;
193 
194     for (k = 0; k < MAXOPENFILES; ++k)
195     {
196 	if ((FileTable[k].name != CNULL)
197 	    && (strcmp(FileTable[k].name, filename) == 0))
198 	    return (FileTable[k].fp);
199     }
200 
201     return (FNULL);
202 }
203 
204 
205 static const char *
_awk_find_char(const char * s,int c)206 _awk_find_char(const char *s, int c)
207 {
208     /*******************************************************************
209       Return a pointer to the next occurrence of C or NUL in S,
210       whichever comes first.
211     *******************************************************************/
212 
213     while (*s && ((int)(*s) != c))
214 	++s;
215 
216     return (s);
217 }
218 
219 
220 static int
_awk_gsub(regexp * compiled_regexp,const char * replacement,char ** target)221 _awk_gsub(regexp * compiled_regexp, const char *replacement, char **target)
222 {
223     /*******************************************************************
224       Substitute all leftmost longest substrings in target matching
225       COMPILED_REGEXP with REPLACEMENT, updating TARGET to point to
226       the new string.  Any storage previously pointed to by TARGET is
227       freed, and thus, must have originally been allocated by malloc().
228 
229       Return the number of substitions made.
230     *******************************************************************/
231 
232     int nsub;
233 
234     nsub = 0;
235 
236     if (_awk_match(*target, compiled_regexp))
237     {
238 	const char *first;
239 	char *rest;
240 
241 	first = awk_substr(*target, 1, RSTART - 1);
242 	rest = (char *)awk_substr(*target, RSTART + RLENGTH, LONG_MAX);
243 	nsub += _awk_gsub(compiled_regexp, replacement, &rest);
244 	awk_free_string(*target);
245 	*target = (char *)_awk_concat(first, replacement, rest);
246 	awk_free_string(rest);
247 	awk_free_string(first);
248     }
249 
250     return (nsub);
251 }
252 
253 
254 static int
_awk_match(const char * source,regexp * compiled_regexp)255 _awk_match(const char *source, regexp * compiled_regexp)
256 {
257     /*******************************************************************
258       Search SOURCE for the longest leftmost substring matched by
259       COMPILED_REGEXP.
260 
261       If a match is found, set the global variable RSTART to the index,
262       and RLENGTH to the length of the matched substring.
263 
264       If there is no match, then set RSTART to 0, and RLENGTH to -1.
265 
266       Return RSTART (one, if it starts at the beginning of source).
267     *******************************************************************/
268 
269     if (regexec(compiled_regexp, source))
270     {
271 	RSTART = 1 + (size_t) (compiled_regexp->startp[0] - source);
272 	RLENGTH =
273 	    (awk_int_t) (compiled_regexp->endp[0] -
274 			 compiled_regexp->startp[0]);
275     }
276     else
277     {
278 	RSTART = 0;
279 	RLENGTH = -1;
280     }
281     return (RSTART);
282 }
283 
284 
285 static FILE *
_awk_open_infile(const char * filename)286 _awk_open_infile(const char *filename)
287 {
288     /*******************************************************************
289       Open the input file FILENAME, creating an entry for it in
290       FileTable[], and return a FILE* pointer for it.  That pointer
291       may be NULL, if the file could not be opened, or too many
292       files are already open.
293     *******************************************************************/
294 
295     size_t k;
296     FILE *fp;
297 
298     if (strcmp(filename,"-") == 0)
299 	fp = stdin;
300     else if (strcmp(filename,"/dev/stdin") == 0)
301 	fp = stdin;
302     else if (strcmp(filename,"/dev/fd/0") == 0)
303 	fp = stdin;
304     else
305 	fp = fopen(filename, "r");
306     if (fp != FNULL)
307     {
308 	for (k = 0; k < MAXOPENFILES; ++k)
309 	{
310 	    if (FileTable[k].name == CNULL)
311 	    {
312 		FileTable[k].name = awk_dupstr(filename);
313 		FileTable[k].fp = fp;
314 		break;
315 	    }
316 	}
317 	if (k == MAXOPENFILES)
318 	{			/* too many open input files */
319 	    (void)fclose(fp);
320 	    fp = (FILE *) NULL;
321 	}
322     }
323     return (fp);
324 }
325 
326 
327 static int
_awk_regmatch(const char * s,const char * regexp_pattern)328 _awk_regmatch(const char *s, const char *regexp_pattern)
329 {
330     /*******************************************************************
331       Return 1 if S matches regular expression REGEXP, and 0 otherwise.
332     *******************************************************************/
333 
334     int result;
335     regexp *compiled_regexp;
336 
337     compiled_regexp = regcomp(regexp_pattern);
338     result = regexec(compiled_regexp, s);
339     regfree(compiled_regexp);
340 
341     return (result ? 1 : 0);
342 }
343 
344 
345 static const char *
_awk_skip_nonwhite(const char * s)346 _awk_skip_nonwhite(const char *s)
347 {
348     /*******************************************************************
349       Return a pointer to the next whitespace character in S, or its
350       trailing NUL, whichever comes first.
351     *******************************************************************/
352 
353     while (*s && !awk_is_white(*s))
354 	++s;
355     return (s);
356 }
357 
358 
359 static const char *
_awk_skip_white(const char * s)360 _awk_skip_white(const char *s)
361 {
362     /*******************************************************************
363       Return a pointer to the next non-whitespace character in S, which
364       might be its trailing NUL.
365     *******************************************************************/
366 
367     while (awk_is_white(*s))
368 	++s;
369     return (s);
370 }
371 
372 
373 static size_t
_awk_split_complex(const char * s,const char *** parts,const char * field_separator)374 _awk_split_complex(const char *s, const char ***parts,
375 		   const char *field_separator)
376 {
377     /*******************************************************************
378       Split the string S into tokens delimited by the single character
379       in FIELD_SEPARATOR.  Return them in the newly-allocated array
380       PARTS[], and return the number of tokens stored in PARTS[].
381 
382       PARTS[] should ultimately freed by a call to awk_free_table().
383     *******************************************************************/
384 
385     size_t n_parts;
386     const char *t;
387     awk_table_t the_table;
388 
389     awk_new_table(&the_table);
390     n_parts = 0;
391     t = _awk_find_char(s, field_separator[0]);
392 
393     while (t >= s)
394     {
395 	awk_add_element(&the_table, ++n_parts,
396 			awk_substr(s, 1, (awk_int_t) (t - s)));
397 	if (*t == '\0')
398 	    break;
399 	s = t + 1;
400 	t = _awk_find_char(s, field_separator[0]);
401     }
402     *parts = the_table.table;
403 
404     return (n_parts);
405 }
406 
407 
408 static size_t
_awk_split_regexp(const char * s,const char *** parts,const char * field_separator)409 _awk_split_regexp(const char *s, const char ***parts,
410 		  const char *field_separator)
411 {
412     /*******************************************************************
413       Split the string S into tokens delimited by the regular expression
414       in FIELD_SEPARATOR.  Return them in the newly-allocated array
415       PARTS[], and return the number of tokens stored in PARTS[].
416 
417       PARTS[] should ultimately freed by a call to awk_free_table().
418     *******************************************************************/
419 
420     size_t n_parts;
421     awk_table_t the_table;
422     regexp *compiled_regexp;
423 
424     awk_new_table(&the_table);
425     n_parts = 0;
426 
427     compiled_regexp = regcomp(field_separator);
428     while (_awk_match(s, compiled_regexp))
429     {
430 	awk_add_element(&the_table, ++n_parts, awk_substr(s, 1, RSTART - 1));
431 	s = &s[-1 + RSTART + RLENGTH];
432     }
433     regfree(compiled_regexp);
434 
435     awk_add_element(&the_table, ++n_parts, awk_dupstr(s));
436     *parts = the_table.table;
437 
438     return (n_parts);
439 }
440 
441 
442 static size_t
_awk_split_simple(const char * s,const char *** parts)443 _awk_split_simple(const char *s, const char ***parts)
444 {
445     /*******************************************************************
446       Split the string S into tokens delimited by whitespace, ignoring
447       leading and trailing space. Return them in the newly-allocated
448       array PARTS[], and return the number of tokens stored in PARTS[].
449 
450       PARTS[] should ultimately freed by a call to awk_free_table().
451     *******************************************************************/
452 
453     size_t n_parts;
454     const char *t;
455     awk_table_t the_table;
456 
457     awk_new_table(&the_table);
458     n_parts = 0;
459     s = _awk_skip_white(s);
460     t = _awk_skip_nonwhite(s);
461     while (t > s)
462     {
463 	awk_add_element(&the_table, ++n_parts,
464 			awk_substr(s, 1, (awk_int_t) (t - s)));
465 	s = _awk_skip_white(t);
466 	t = _awk_skip_nonwhite(s);
467     }
468     *parts = the_table.table;
469 
470     return (n_parts);
471 }
472 
473 
474 void
awk_add_element(awk_table_t * the_table,size_t the_index,const char * the_value)475 awk_add_element(awk_table_t * the_table, size_t the_index,
476 		const char *the_value)
477 {
478     /*******************************************************************
479       Set THE_TABLE[THE_INDEX] = THE_VALUE, growing the table as needed.
480       THE_INDEX counts from 1, and THE_TABLE[0] is allocated, but
481       unused.  THE_VALUE is not duplicated to ensure a unique string:
482       that is the caller's responsibility.
483     *******************************************************************/
484 
485     ENSURE_INITIALIZATION();
486 
487     if (the_table->table == (const char **)NULL)
488     {
489 	the_table->size = the_index + 1 + CHUNKSIZE;
490 	the_table->table =
491 	    (const char **)malloc(sizeof(const char *) * the_table->size);
492     }
493     else if (the_table->size <= the_index)
494     {
495 	the_table->size = the_index + 1 + CHUNKSIZE;
496 	the_table->table =
497 	    (const char **)realloc(the_table->table,
498 				   sizeof(const char *) * the_table->size);
499     }
500     if (the_table->table == (const char **)NULL)
501 	_awk_error("awk_add_element(): out of memory");
502     the_table->table[the_index] = the_value;
503 }
504 
505 
506 void
awk_close_infile(const char * filename)507 awk_close_infile(const char *filename)
508 {
509     /*******************************************************************
510       Close the file pointer associated with FILENAME, and clear its
511       FileTable[] entry.
512     *******************************************************************/
513 
514     size_t k;
515 
516     ENSURE_INITIALIZATION();
517 
518     for (k = 0; k < MAXOPENFILES; ++k)
519     {
520 	if ((FileTable[k].name != CNULL)
521 	    && (strcmp(FileTable[k].name, filename) == 0))
522 	{
523 	    (void)fclose(FileTable[k].fp);
524 	    awk_free_string(FileTable[k].name);
525 	    FileTable[k].name = CNULL;
526 	    FileTable[k].fp = FNULL;
527 	    return;
528 	}
529     }
530 }
531 
532 
533 const char *
awk_dupstr(const char * s)534 awk_dupstr(const char *s)
535 {
536     /*******************************************************************
537       Return a newly-allocated string containing a copy of S.
538 
539       Terminate with a fatal error if memory cannot be allocated.
540     *******************************************************************/
541 
542     ENSURE_INITIALIZATION();
543 
544     return ((const char *)awk_padstr(s, 0));
545 }
546 
547 
548 void
awk_free_string(const char * s)549 awk_free_string(const char *s)
550 {
551     /*******************************************************************
552       Free the string s, which should have been dynamically allocated
553       by one of the public awk_xxx() functions.
554     *******************************************************************/
555 
556     ENSURE_INITIALIZATION();
557 
558     if (s != CNULL)
559 	FREE(s);
560 }
561 
562 
563 void
awk_free_table(const char ** table,size_t n)564 awk_free_table(const char **table, size_t n)
565 {
566     /*******************************************************************
567       Free N elements of table[], and then the table itself.
568 
569       Behavior is unpredictable if table[] and its elements were not
570       allocated by malloc().
571     *******************************************************************/
572 
573     ENSURE_INITIALIZATION();
574 
575     if (table != (const char**)NULL)
576     {
577 	while (n > 0)			/* free elements 1..n (NOT 0!) */
578 	    awk_free_string(table[n--]);
579 
580 	awk_free_string((const char*)table);
581     }
582 }
583 
584 
585 int
awk_getline(const char * infile,const char ** line)586 awk_getline(const char *infile, const char **line)
587 {
588     /*******************************************************************
589       Get the next input line from INFILE, and return a
590       freshly-allocated string, LINE, containing that line (excluding
591       any final newline).  The caller should call free() when the line
592       is no longer needed.  The return value is -1 (error), 0
593       (end-of-file), or +1 (line found).
594     *******************************************************************/
595 
596     static char *buffer = (char *)NULL;
597     int c;
598     FILE *fp;
599     size_t len_buffer = 0;
600     size_t max_buffer = 0;
601 
602     ENSURE_INITIALIZATION();
603 
604     fp = _awk_file_to_fp(infile);
605     if (fp == (FILE *) NULL)
606 	fp = _awk_open_infile(infile);
607     if (fp == (FILE *) NULL)
608 	return (-1);
609     else if (feof(fp))
610 	return (0);
611     else
612     {
613 	while (((c = fgetc(fp)) != EOF) && (c != '\n'))
614 	{
615 	    if (len_buffer >= max_buffer)
616 	    {
617 		max_buffer += CHUNKSIZE;
618 		buffer = (buffer == (char*)NULL) ? (char*)malloc(max_buffer) :
619 		    (char *)realloc(buffer, max_buffer);
620 		if (buffer == (char *)NULL)
621 		    _awk_error("getline(): out of memory");
622 	    }
623 	    buffer[len_buffer++] = c;
624 	}
625 	if (buffer != (char*)NULL)
626 	    buffer[len_buffer] = '\0';
627 	if ((c == EOF) && (len_buffer == 0))
628 	{
629 	    if (buffer != (char *)NULL)
630 		FREE(buffer);
631 	    buffer = (char *)NULL;
632 	    len_buffer = 0;
633 	    max_buffer = 0;
634 	    return (0);		/* no more data in file */
635 	}
636 	else
637 	{
638 	    *line = awk_dupstr(buffer);
639 	    len_buffer = 0;
640 	    return (1);
641 	}
642     }
643 }
644 
645 
646 int
awk_gsub(const char * regular_expression,const char * replacement,char ** target)647 awk_gsub(const char *regular_expression, const char *replacement,
648 	 char **target)
649 {
650     /*******************************************************************
651       Substitute all leftmost longest substrings in target matching
652       REGULAR_EXPRESSION with REPLACEMENT, updating TARGET to point to
653       the new string.  Any storage previously pointed to by TARGET is
654       freed, and thus, must have originally been allocated by malloc().
655 
656       Return the number of substitions made.
657     *******************************************************************/
658 
659     int nsub;
660     regexp *compiled_regexp;
661 
662     ENSURE_INITIALIZATION();
663 
664     compiled_regexp = regcomp(regular_expression);
665     nsub = _awk_gsub(compiled_regexp, replacement, target);
666     regfree(compiled_regexp);
667 
668     return (nsub);
669 }
670 
671 
672 void
awk_initialize(void)673 awk_initialize(void)
674 {
675     /*******************************************************************
676       Initialize the awk compatibility library.  This function MUST be
677       called before any of the other awk_xxx() functions are called;
678       failure to do so will produce unpredictable results.
679 
680       On completion of use of the library, call awk_terminate() to
681       clean up.
682 
683       Calls to the pair awk_initialize() and awk_terminate() must
684       bracket all calls to other members of this library.
685     *******************************************************************/
686 
687     FS = " ";
688     RS = "\n";
689     RLENGTH = -1;
690     RSTART = 0;
691     _awk_lib_initialized = 1;
692 }
693 
694 
695 int
awk_is_NaN(const char * s)696 awk_is_NaN(const char *s)
697 {
698     /*******************************************************************
699       Return 1 if S matches a pattern for a NaN (Not-a-Number), and 0
700       otherwise.
701     *******************************************************************/
702 
703     ENSURE_INITIALIZATION();
704 
705     return (_awk_regmatch(s,_awk_NaN_regexp));
706 }
707 
708 
709 int
awk_is_negative_infinity(const char * s)710 awk_is_negative_infinity(const char *s)
711 {
712     /*******************************************************************
713       Return 1 if S matches a pattern for a negative infinity, and 0
714       otherwise.
715     *******************************************************************/
716 
717     ENSURE_INITIALIZATION();
718 
719     return (_awk_regmatch(s,_awk_negative_infinity_regexp));
720 }
721 
722 
723 int
awk_is_positive_infinity(const char * s)724 awk_is_positive_infinity(const char *s)
725 {
726     /*******************************************************************
727       Return 1 if S matches a pattern for a positive infinity, and 0
728       otherwise.
729     *******************************************************************/
730 
731     ENSURE_INITIALIZATION();
732 
733     return (_awk_regmatch(s,_awk_positive_infinity_regexp));
734 }
735 
736 
737 const char *
awk_long_to_string(long n)738 awk_long_to_string(long n)
739 {
740     /*******************************************************************
741       Return a newly-allocated string containing the decimal
742       representation of N.
743     *******************************************************************/
744 
745     char s[40 + 1];		/* long enough for 128-bit integer */
746 
747     ENSURE_INITIALIZATION();
748 
749     (void)sprintf(s, "%ld", n);
750     return (awk_dupstr(s));
751 }
752 
753 
754 int
awk_match(const char * source,const char * regular_expression)755 awk_match(const char *source, const char *regular_expression)
756 {
757     /*******************************************************************
758       Search SOURCE for the longest leftmost substring matched by
759       REGULAR_EXPRESSION.
760 
761       If a match is found, set the global variable RSTART to the index,
762       and RLENGTH to the length of the matched substring.
763 
764       If there is no match, then set RSTART to 0, and RLENGTH to -1.
765 
766       Return RSTART (one, if it starts at the beginning of source).
767     *******************************************************************/
768 
769     regexp *compiled_regexp;
770 
771     ENSURE_INITIALIZATION();
772 
773     compiled_regexp = regcomp(regular_expression);
774 
775     (void)_awk_match(source, compiled_regexp);
776 
777     regfree(compiled_regexp);
778 
779     return (RSTART);
780 }
781 
782 
783 void
awk_new_table(awk_table_t * the_table)784 awk_new_table(awk_table_t * the_table)
785 {
786     /*******************************************************************
787       Initialize an awk table to empty.
788     *******************************************************************/
789 
790     ENSURE_INITIALIZATION();
791 
792     the_table->table = (const char **)NULL;
793     the_table->size = 0;
794 }
795 
796 
797 char *
awk_padstr(const char * s,size_t extra)798 awk_padstr(const char *s, size_t extra)
799 {
800     /*******************************************************************
801       Return a newly-allocated string containing a copy of S, with
802       EXTRA additional slots at the end.
803 
804       Terminate with a fatal error if memory cannot be allocated.
805     *******************************************************************/
806 
807     size_t n;
808     char *t;
809 
810     ENSURE_INITIALIZATION();
811 
812     n = (s == (char*)NULL) ? 0 : strlen(s);
813     t = (char *)malloc(n + 1 + extra);
814     if (t == (char *)NULL)
815 	_awk_error("awk_padstr(): out of memory");
816     if (s == (char*)NULL)
817 	*t = '\0';
818     else
819 	strcpy(t, s);
820 
821     return (t);
822 }
823 
824 
825 size_t
awk_split(const char * s,const char *** parts,const char * field_separator)826 awk_split(const char *s, const char ***parts, const char *field_separator)
827 {
828     /*******************************************************************
829       Split the string S into tokens delimited by separators defined by
830       FIELD_SEPARATOR, returning them in the newly-allocated array
831       PARTS[], and return the number of tokens stored in PARTS[].
832 
833       If FIELD_SEPARATOR is NULL, use the current value of the global
834       variable FS in its place.
835 
836       When FIELD_SEPARATOR is " " (a single blank), then leading and
837       trailing whitespace is ignored, and the tokens are separated by
838       runs of whitespace (blanks, tabs, or newlines).
839 
840       This quote from Arnold Robbins ``Effective AWK Programming''
841       (also available as the online gawk manual) is significant:
842 
843 	``Normally, fields are separated by whitespace sequences (spaces,
844 	tabs and newlines), not by single spaces: two spaces in a row do
845 	not delimit an empty field.  The default value of the field
846 	separator `FS' is a string containing a single space, `" "'.  If
847 	this value were interpreted in the usual way, each space
848 	character would separate fields, so two spaces in a row would
849 	make an empty field between them.  The reason this does not
850 	happen is that a single space as the value of `FS' is a special
851 	case: it is taken to specify the default manner of delimiting
852 	fields.
853 
854 	If `FS' is any other single character, such as `","', then each
855 	occurrence of that character separates two fields.  Two
856 	consecutive occurrences delimit an empty field.  If the
857 	character occurs at the beginning or the end of the line, that
858 	too delimits an empty field.  The space character is the only
859 	single character which does not follow these rules.''
860 
861       When FIELD_SEPARATOR is longer than a single character, it is
862       taken to be a regular expression.
863 
864       PARTS[] should ultimately freed by a call to awk_free_table().
865     *******************************************************************/
866 
867     ENSURE_INITIALIZATION();
868 
869     if (field_separator == CNULL)
870 	field_separator = FS;
871 
872     if (strcmp(field_separator, " ") == 0)
873 	return (_awk_split_simple(s, parts));
874     else if (strlen(field_separator) == 1)
875 	return (_awk_split_complex(s, parts, field_separator));
876     else
877 	return (_awk_split_regexp(s, parts, field_separator));
878 }
879 
880 
881 int
awk_sub(const char * regular_expression,const char * replacement,char ** target)882 awk_sub(const char *regular_expression, const char *replacement,
883 	char **target)
884 {
885     /*******************************************************************
886       Substitute the leftmost longest substring in TARGET matching
887       REGULAR_EXPRESSION with replacement, updating TARGET to point to
888       the new string.  Any storage previously pointed to by TARGET is
889       freed, and thus, must have originally been allocated by malloc().
890 
891       Return the number of substitions made.
892     *******************************************************************/
893 
894     int nsub;
895 
896     ENSURE_INITIALIZATION();
897 
898     nsub = 0;
899     if (awk_match(*target, regular_expression))
900     {
901 	const char *first;
902 	const char *rest;
903 
904 	first = awk_substr(*target, 1, RSTART - 1);
905 	rest = awk_substr(*target, RSTART + RLENGTH, LONG_MAX);
906 	awk_free_string(*target);
907 	*target = (char *)_awk_concat(first, replacement, rest);
908 	awk_free_string(first);
909 	awk_free_string(rest);
910 	nsub++;
911     }
912 
913     return (nsub);
914 }
915 
916 
917 const char *
awk_substr(const char * source,awk_int_t start,awk_int_t length)918 awk_substr(const char *source, awk_int_t start, awk_int_t length)
919 {
920     /*******************************************************************
921       Return a newly-allocated string containing a substring of SOURCE,
922       beginning at START (indexes begin at 1), of LENGTH characters, or
923       fewer if the string ends early.
924 
925       Design note: because of the common awk idiom,
926       substr(s,1,RSTART-1), for the string prefixing the current match,
927       it is necessary that the LENGTH argument be a signed integer,
928       rather than the usual unsigned integer type size_t used in the
929       strxxx() family.
930     *******************************************************************/
931 
932     size_t n;
933     char *t;
934 
935     ENSURE_INITIALIZATION();
936 
937     n = strlen(&source[start - 1]);
938 
939     if (length <= 0)
940 	n = 0;
941     else if ((size_t) length < n)
942 	n = (size_t) length;
943 
944     t = awk_padstr("", n);
945     if (n > 0)
946 	(void)strncpy(t, &source[start - 1], n);
947     t[n] = '\0';
948 
949     return ((const char *)t);
950 }
951 
952 
953 double
awk_string_to_double(const char * s)954 awk_string_to_double(const char *s)
955 {
956     ENSURE_INITIALIZATION();
957 
958     return (strtod(s,(char**)NULL));
959 }
960 
961 
962 long
awk_string_to_long(const char * s)963 awk_string_to_long(const char *s)
964 {
965     ENSURE_INITIALIZATION();
966 
967     return (strtol(s,(char**)NULL,10));
968 }
969 
970 
971 unsigned long
awk_string_to_unsigned_long(const char * s)972 awk_string_to_unsigned_long(const char *s)
973 {
974     ENSURE_INITIALIZATION();
975 
976     return (strtoul(s,(char**)NULL,10));
977 }
978 
979 
980 void
awk_terminate(void)981 awk_terminate(void)
982 {
983     /*******************************************************************
984       Terminate use of the awk compatibility library.  This function
985       MUST be called if memory leaks are to be avoided, and it MUST not
986       be called if awk_initialize() has not be called.
987 
988       Calls to the pair awk_initialize() and awk_terminate() must
989       bracket all calls to other members of this library.
990     *******************************************************************/
991 
992     ENSURE_INITIALIZATION();
993 
994     _awk_lib_initialized = 0;
995 }
996 
997 
998 const char *
awk_tolower(const char * s)999 awk_tolower(const char *s)
1000 {
1001     /*******************************************************************
1002       Return a copy of S converted to lowercase.
1003     *******************************************************************/
1004 
1005     const char *t;
1006     char *u;
1007 
1008     ENSURE_INITIALIZATION();
1009 
1010     t = awk_dupstr(s);
1011     for (u = (char *)t; *u; ++u)
1012     {
1013 	if (isupper(*u))
1014 	    *u = tolower(*u);
1015     }
1016 
1017     return (t);
1018 }
1019 
1020 
1021 const char *
awk_toupper(const char * s)1022 awk_toupper(const char *s)
1023 {
1024     /*******************************************************************
1025       Return a copy of S converted to uppercase.
1026     *******************************************************************/
1027 
1028     const char *t;
1029     char *u;
1030 
1031     ENSURE_INITIALIZATION();
1032 
1033     t = awk_dupstr(s);
1034     for (u = (char *)t; *u; ++u)
1035     {
1036 	if (islower(*u))
1037 	    *u = toupper(*u);
1038     }
1039 
1040     return (t);
1041 }
1042 
1043 
1044 const char *
awk_unsigned_long_to_string(unsigned long n)1045 awk_unsigned_long_to_string(unsigned long n)
1046 {
1047     /*******************************************************************
1048       Return a newly-allocated string containing the decimal
1049       representation of N.
1050     *******************************************************************/
1051 
1052     char s[40 + 1];		/* long enough for 128-bit integer */
1053 
1054     ENSURE_INITIALIZATION();
1055 
1056     (void)sprintf(s, "%lu", n);
1057     return (awk_dupstr(s));
1058 }
1059 
1060 
1061 static void
regfree(regexp * s)1062 regfree(regexp *s)
1063 {
1064     /*******************************************************************
1065       Free the compiled regular expression, S, which should have been
1066       returned from an earlier call to regcomp().
1067 
1068       This function should logically be part of the regexp() package,
1069       but is sadly missing from it.
1070     *******************************************************************/
1071 
1072     FREE(s);
1073 }
1074