1 /***********************************************************************
2 @C-file{
3 author = "Nelson H. F. Beebe",
4 version = "2.00",
5 date = "10 December 2000",
6 time = "07:53:44 MST",
7 filename = "awklib.c",
8 address = "Center for Scientific Computing
9 University of Utah
10 Department of Mathematics, 322 INSCC
11 155 S 1400 E RM 233
12 Salt Lake City, UT 84112-0090
13 USA",
14 telephone = "+1 801 581 5254",
15 FAX = "+1 801 585 1640, +1 801 581 4148",
16 URL = "http://www.math.utah.edu/~beebe",
17 checksum = "42645 1073 3121 29602",
18 email = "beebe@math.utah.edu, beebe@acm.org,
19 beebe@computer.org, beebe@ieee.org
20 (Internet)",
21 codetable = "ISO/ASCII",
22 keywords = "awk compatibility library",
23 supported = "yes",
24 docstring = "This file defines a subset of the awk library
25 functions for use by C programs, to assist in
26 the manual translation of awk code to C.
27
28 Provided that the caller is careful to
29 eventually invoke free() on every
30 newly-allocated string returned by any of
31 these primitives, awk_free_table() on
32 tables returned by awk_split(), and
33 awk_close_infile() on every open input
34 file, this library is designed to be
35 strictly free of memory leaks.
36
37 The checksum field above contains a CRC-16
38 checksum as the first value, followed by the
39 equivalent of the standard UNIX wc (word
40 count) utility output of lines, words, and
41 characters. This is produced by Robert
42 Solovay's checksum utility.",
43 }
44 ***********************************************************************/
45
46 #include "awklib.h"
47 #include "regexp/regexp.h"
48
49 static int _c;
50 #define awk_is_white(c) (_c = (c), ((_c == ' ') || (_c == '\t') || (_c == '\n')))
51
52 #define FREE(p) (void)free((void*)(p))
53
54 const char *FILENAME = (const char *)NULL;
55 const char *FS = (const char *)NULL;
56 const char *RS = (const char *)NULL;
57
58 size_t FNR = 0;
59 awk_int_t RLENGTH = -1L;
60 awk_int_t RSTART = 0L;
61
62 static int _awk_lib_initialized = 0;
63
64 #define ENSURE_INITIALIZATION() do { if (!_awk_lib_initialized) awk_initialize(); } while (0)
65
66 #define OPTIONAL_SIGN_PATTERN "[-+]?"
67
68 #define OPTIONAL_WHITESPACE_PATTERN "[ \t\n\r\f\v]*"
69
70 #define EXPONENT_PATTERN "[DdEeQq]" OPTIONAL_SIGN_PATTERN "[0-9]+"
71
72 static const char *_awk_NaN_regexp =
73 "^" OPTIONAL_WHITESPACE_PATTERN OPTIONAL_SIGN_PATTERN
74 "("
75 "[QqSs]?[Nn][Aa][Nn][QqSs]?"
76 "|"
77 "[?]+[.][?0]+" EXPONENT_PATTERN
78 "|"
79 "[?]+[.][?0]+"
80 ")" OPTIONAL_WHITESPACE_PATTERN "$" ;
81
82 static const char *_awk_negative_infinity_regexp =
83 "^" OPTIONAL_WHITESPACE_PATTERN
84 "("
85 "-[Ii][Nn][Ff]"
86 "|"
87 "-[Ii][Nn][Ff][Ii][Nn][Ii][Tt][Yy]"
88 "|"
89 "-+[.]-0+" EXPONENT_PATTERN
90 "|"
91 "-+[.]-0+"
92 ")"
93 OPTIONAL_WHITESPACE_PATTERN "$" ;
94
95 static const char *_awk_positive_infinity_regexp =
96 "^" OPTIONAL_WHITESPACE_PATTERN
97 "("
98 "[+]?[Ii][Nn][Ff]"
99 "|"
100 "[+]?[Ii][Nn][Ff][Ii][Nn][Ii][Tt][Yy]"
101 "|"
102 "[+]+[.][+]0+" EXPONENT_PATTERN
103 "|"
104 "[+]+[.][+]0+"
105 ")"
106 OPTIONAL_WHITESPACE_PATTERN "$" ;
107
108 static const char *CNULL = (const char *)NULL;
109 static FILE *FNULL = (FILE *) NULL;
110
111 static const char *_awk_concat(const char *s1, const char *s2,
112 const char *s3);
113 static void _awk_error(const char *s);
114 static FILE *_awk_file_to_fp(const char *filename);
115 static const char *_awk_find_char(const char *s, int c);
116 static int _awk_gsub(regexp * compiled_regexp, const char *replacement,
117 char **target);
118 static int _awk_match(const char *source, regexp * compiled_regexp);
119 static FILE *_awk_open_infile(const char *filename);
120 static int _awk_regmatch(const char *s, const char *regexp_pattern);
121 static const char *_awk_skip_nonwhite(const char *s);
122 static const char *_awk_skip_white(const char *s);
123 static size_t _awk_split_complex(const char *s, const char ***parts,
124 const char *field_separator);
125 static size_t _awk_split_regexp(const char *s, const char ***parts,
126 const char *field_separator);
127 static size_t _awk_split_simple(const char *s, const char ***parts);
128 static void regfree(regexp *s);
129
130 typedef struct
131 {
132 const char *name;
133 FILE *fp;
134 }
135 filetable_t;
136
137 #if !defined(MAXOPENFILES)
138 #define MAXOPENFILES 2
139 #endif
140
141 #define CHUNKSIZE 256
142
143 static filetable_t FileTable[MAXOPENFILES];
144
145
146 static const char *
_awk_concat(const char * s1,const char * s2,const char * s3)147 _awk_concat(const char *s1, const char *s2, const char *s3)
148 {
149 /*******************************************************************
150 Return a newly-allocated string containing the concatenation of
151 strings S1, S2, and S3.
152
153 Terminate with a fatal error if memory cannot be allocated.
154 *******************************************************************/
155
156 char *t;
157 size_t len1;
158 size_t len2;
159 size_t len3;
160
161 len1 = strlen(s1);
162 len2 = strlen(s2);
163 len3 = strlen(s3);
164 t = awk_padstr(s1, len2 + len3);
165 (void)strcpy(&t[len1], s2);
166 (void)strcpy(&t[len1 + len2], s3);
167
168 return ((const char *)t);
169 }
170
171
172 static void
_awk_error(const char * s)173 _awk_error(const char *s)
174 {
175 /*******************************************************************
176 Report a fatal error and terminate.
177 *******************************************************************/
178
179 (void)fprintf(stderr, "FATAL ERROR: %s\n", s);
180 exit(EXIT_FAILURE);
181 }
182
183
184 static FILE *
_awk_file_to_fp(const char * filename)185 _awk_file_to_fp(const char *filename)
186 {
187 /*******************************************************************
188 Return the file pointer associated with FILENAME, or NULL if
189 none.
190 *******************************************************************/
191
192 size_t k;
193
194 for (k = 0; k < MAXOPENFILES; ++k)
195 {
196 if ((FileTable[k].name != CNULL)
197 && (strcmp(FileTable[k].name, filename) == 0))
198 return (FileTable[k].fp);
199 }
200
201 return (FNULL);
202 }
203
204
205 static const char *
_awk_find_char(const char * s,int c)206 _awk_find_char(const char *s, int c)
207 {
208 /*******************************************************************
209 Return a pointer to the next occurrence of C or NUL in S,
210 whichever comes first.
211 *******************************************************************/
212
213 while (*s && ((int)(*s) != c))
214 ++s;
215
216 return (s);
217 }
218
219
220 static int
_awk_gsub(regexp * compiled_regexp,const char * replacement,char ** target)221 _awk_gsub(regexp * compiled_regexp, const char *replacement, char **target)
222 {
223 /*******************************************************************
224 Substitute all leftmost longest substrings in target matching
225 COMPILED_REGEXP with REPLACEMENT, updating TARGET to point to
226 the new string. Any storage previously pointed to by TARGET is
227 freed, and thus, must have originally been allocated by malloc().
228
229 Return the number of substitions made.
230 *******************************************************************/
231
232 int nsub;
233
234 nsub = 0;
235
236 if (_awk_match(*target, compiled_regexp))
237 {
238 const char *first;
239 char *rest;
240
241 first = awk_substr(*target, 1, RSTART - 1);
242 rest = (char *)awk_substr(*target, RSTART + RLENGTH, LONG_MAX);
243 nsub += _awk_gsub(compiled_regexp, replacement, &rest);
244 awk_free_string(*target);
245 *target = (char *)_awk_concat(first, replacement, rest);
246 awk_free_string(rest);
247 awk_free_string(first);
248 }
249
250 return (nsub);
251 }
252
253
254 static int
_awk_match(const char * source,regexp * compiled_regexp)255 _awk_match(const char *source, regexp * compiled_regexp)
256 {
257 /*******************************************************************
258 Search SOURCE for the longest leftmost substring matched by
259 COMPILED_REGEXP.
260
261 If a match is found, set the global variable RSTART to the index,
262 and RLENGTH to the length of the matched substring.
263
264 If there is no match, then set RSTART to 0, and RLENGTH to -1.
265
266 Return RSTART (one, if it starts at the beginning of source).
267 *******************************************************************/
268
269 if (regexec(compiled_regexp, source))
270 {
271 RSTART = 1 + (size_t) (compiled_regexp->startp[0] - source);
272 RLENGTH =
273 (awk_int_t) (compiled_regexp->endp[0] -
274 compiled_regexp->startp[0]);
275 }
276 else
277 {
278 RSTART = 0;
279 RLENGTH = -1;
280 }
281 return (RSTART);
282 }
283
284
285 static FILE *
_awk_open_infile(const char * filename)286 _awk_open_infile(const char *filename)
287 {
288 /*******************************************************************
289 Open the input file FILENAME, creating an entry for it in
290 FileTable[], and return a FILE* pointer for it. That pointer
291 may be NULL, if the file could not be opened, or too many
292 files are already open.
293 *******************************************************************/
294
295 size_t k;
296 FILE *fp;
297
298 if (strcmp(filename,"-") == 0)
299 fp = stdin;
300 else if (strcmp(filename,"/dev/stdin") == 0)
301 fp = stdin;
302 else if (strcmp(filename,"/dev/fd/0") == 0)
303 fp = stdin;
304 else
305 fp = fopen(filename, "r");
306 if (fp != FNULL)
307 {
308 for (k = 0; k < MAXOPENFILES; ++k)
309 {
310 if (FileTable[k].name == CNULL)
311 {
312 FileTable[k].name = awk_dupstr(filename);
313 FileTable[k].fp = fp;
314 break;
315 }
316 }
317 if (k == MAXOPENFILES)
318 { /* too many open input files */
319 (void)fclose(fp);
320 fp = (FILE *) NULL;
321 }
322 }
323 return (fp);
324 }
325
326
327 static int
_awk_regmatch(const char * s,const char * regexp_pattern)328 _awk_regmatch(const char *s, const char *regexp_pattern)
329 {
330 /*******************************************************************
331 Return 1 if S matches regular expression REGEXP, and 0 otherwise.
332 *******************************************************************/
333
334 int result;
335 regexp *compiled_regexp;
336
337 compiled_regexp = regcomp(regexp_pattern);
338 result = regexec(compiled_regexp, s);
339 regfree(compiled_regexp);
340
341 return (result ? 1 : 0);
342 }
343
344
345 static const char *
_awk_skip_nonwhite(const char * s)346 _awk_skip_nonwhite(const char *s)
347 {
348 /*******************************************************************
349 Return a pointer to the next whitespace character in S, or its
350 trailing NUL, whichever comes first.
351 *******************************************************************/
352
353 while (*s && !awk_is_white(*s))
354 ++s;
355 return (s);
356 }
357
358
359 static const char *
_awk_skip_white(const char * s)360 _awk_skip_white(const char *s)
361 {
362 /*******************************************************************
363 Return a pointer to the next non-whitespace character in S, which
364 might be its trailing NUL.
365 *******************************************************************/
366
367 while (awk_is_white(*s))
368 ++s;
369 return (s);
370 }
371
372
373 static size_t
_awk_split_complex(const char * s,const char *** parts,const char * field_separator)374 _awk_split_complex(const char *s, const char ***parts,
375 const char *field_separator)
376 {
377 /*******************************************************************
378 Split the string S into tokens delimited by the single character
379 in FIELD_SEPARATOR. Return them in the newly-allocated array
380 PARTS[], and return the number of tokens stored in PARTS[].
381
382 PARTS[] should ultimately freed by a call to awk_free_table().
383 *******************************************************************/
384
385 size_t n_parts;
386 const char *t;
387 awk_table_t the_table;
388
389 awk_new_table(&the_table);
390 n_parts = 0;
391 t = _awk_find_char(s, field_separator[0]);
392
393 while (t >= s)
394 {
395 awk_add_element(&the_table, ++n_parts,
396 awk_substr(s, 1, (awk_int_t) (t - s)));
397 if (*t == '\0')
398 break;
399 s = t + 1;
400 t = _awk_find_char(s, field_separator[0]);
401 }
402 *parts = the_table.table;
403
404 return (n_parts);
405 }
406
407
408 static size_t
_awk_split_regexp(const char * s,const char *** parts,const char * field_separator)409 _awk_split_regexp(const char *s, const char ***parts,
410 const char *field_separator)
411 {
412 /*******************************************************************
413 Split the string S into tokens delimited by the regular expression
414 in FIELD_SEPARATOR. Return them in the newly-allocated array
415 PARTS[], and return the number of tokens stored in PARTS[].
416
417 PARTS[] should ultimately freed by a call to awk_free_table().
418 *******************************************************************/
419
420 size_t n_parts;
421 awk_table_t the_table;
422 regexp *compiled_regexp;
423
424 awk_new_table(&the_table);
425 n_parts = 0;
426
427 compiled_regexp = regcomp(field_separator);
428 while (_awk_match(s, compiled_regexp))
429 {
430 awk_add_element(&the_table, ++n_parts, awk_substr(s, 1, RSTART - 1));
431 s = &s[-1 + RSTART + RLENGTH];
432 }
433 regfree(compiled_regexp);
434
435 awk_add_element(&the_table, ++n_parts, awk_dupstr(s));
436 *parts = the_table.table;
437
438 return (n_parts);
439 }
440
441
442 static size_t
_awk_split_simple(const char * s,const char *** parts)443 _awk_split_simple(const char *s, const char ***parts)
444 {
445 /*******************************************************************
446 Split the string S into tokens delimited by whitespace, ignoring
447 leading and trailing space. Return them in the newly-allocated
448 array PARTS[], and return the number of tokens stored in PARTS[].
449
450 PARTS[] should ultimately freed by a call to awk_free_table().
451 *******************************************************************/
452
453 size_t n_parts;
454 const char *t;
455 awk_table_t the_table;
456
457 awk_new_table(&the_table);
458 n_parts = 0;
459 s = _awk_skip_white(s);
460 t = _awk_skip_nonwhite(s);
461 while (t > s)
462 {
463 awk_add_element(&the_table, ++n_parts,
464 awk_substr(s, 1, (awk_int_t) (t - s)));
465 s = _awk_skip_white(t);
466 t = _awk_skip_nonwhite(s);
467 }
468 *parts = the_table.table;
469
470 return (n_parts);
471 }
472
473
474 void
awk_add_element(awk_table_t * the_table,size_t the_index,const char * the_value)475 awk_add_element(awk_table_t * the_table, size_t the_index,
476 const char *the_value)
477 {
478 /*******************************************************************
479 Set THE_TABLE[THE_INDEX] = THE_VALUE, growing the table as needed.
480 THE_INDEX counts from 1, and THE_TABLE[0] is allocated, but
481 unused. THE_VALUE is not duplicated to ensure a unique string:
482 that is the caller's responsibility.
483 *******************************************************************/
484
485 ENSURE_INITIALIZATION();
486
487 if (the_table->table == (const char **)NULL)
488 {
489 the_table->size = the_index + 1 + CHUNKSIZE;
490 the_table->table =
491 (const char **)malloc(sizeof(const char *) * the_table->size);
492 }
493 else if (the_table->size <= the_index)
494 {
495 the_table->size = the_index + 1 + CHUNKSIZE;
496 the_table->table =
497 (const char **)realloc(the_table->table,
498 sizeof(const char *) * the_table->size);
499 }
500 if (the_table->table == (const char **)NULL)
501 _awk_error("awk_add_element(): out of memory");
502 the_table->table[the_index] = the_value;
503 }
504
505
506 void
awk_close_infile(const char * filename)507 awk_close_infile(const char *filename)
508 {
509 /*******************************************************************
510 Close the file pointer associated with FILENAME, and clear its
511 FileTable[] entry.
512 *******************************************************************/
513
514 size_t k;
515
516 ENSURE_INITIALIZATION();
517
518 for (k = 0; k < MAXOPENFILES; ++k)
519 {
520 if ((FileTable[k].name != CNULL)
521 && (strcmp(FileTable[k].name, filename) == 0))
522 {
523 (void)fclose(FileTable[k].fp);
524 awk_free_string(FileTable[k].name);
525 FileTable[k].name = CNULL;
526 FileTable[k].fp = FNULL;
527 return;
528 }
529 }
530 }
531
532
533 const char *
awk_dupstr(const char * s)534 awk_dupstr(const char *s)
535 {
536 /*******************************************************************
537 Return a newly-allocated string containing a copy of S.
538
539 Terminate with a fatal error if memory cannot be allocated.
540 *******************************************************************/
541
542 ENSURE_INITIALIZATION();
543
544 return ((const char *)awk_padstr(s, 0));
545 }
546
547
548 void
awk_free_string(const char * s)549 awk_free_string(const char *s)
550 {
551 /*******************************************************************
552 Free the string s, which should have been dynamically allocated
553 by one of the public awk_xxx() functions.
554 *******************************************************************/
555
556 ENSURE_INITIALIZATION();
557
558 if (s != CNULL)
559 FREE(s);
560 }
561
562
563 void
awk_free_table(const char ** table,size_t n)564 awk_free_table(const char **table, size_t n)
565 {
566 /*******************************************************************
567 Free N elements of table[], and then the table itself.
568
569 Behavior is unpredictable if table[] and its elements were not
570 allocated by malloc().
571 *******************************************************************/
572
573 ENSURE_INITIALIZATION();
574
575 if (table != (const char**)NULL)
576 {
577 while (n > 0) /* free elements 1..n (NOT 0!) */
578 awk_free_string(table[n--]);
579
580 awk_free_string((const char*)table);
581 }
582 }
583
584
585 int
awk_getline(const char * infile,const char ** line)586 awk_getline(const char *infile, const char **line)
587 {
588 /*******************************************************************
589 Get the next input line from INFILE, and return a
590 freshly-allocated string, LINE, containing that line (excluding
591 any final newline). The caller should call free() when the line
592 is no longer needed. The return value is -1 (error), 0
593 (end-of-file), or +1 (line found).
594 *******************************************************************/
595
596 static char *buffer = (char *)NULL;
597 int c;
598 FILE *fp;
599 size_t len_buffer = 0;
600 size_t max_buffer = 0;
601
602 ENSURE_INITIALIZATION();
603
604 fp = _awk_file_to_fp(infile);
605 if (fp == (FILE *) NULL)
606 fp = _awk_open_infile(infile);
607 if (fp == (FILE *) NULL)
608 return (-1);
609 else if (feof(fp))
610 return (0);
611 else
612 {
613 while (((c = fgetc(fp)) != EOF) && (c != '\n'))
614 {
615 if (len_buffer >= max_buffer)
616 {
617 max_buffer += CHUNKSIZE;
618 buffer = (buffer == (char*)NULL) ? (char*)malloc(max_buffer) :
619 (char *)realloc(buffer, max_buffer);
620 if (buffer == (char *)NULL)
621 _awk_error("getline(): out of memory");
622 }
623 buffer[len_buffer++] = c;
624 }
625 if (buffer != (char*)NULL)
626 buffer[len_buffer] = '\0';
627 if ((c == EOF) && (len_buffer == 0))
628 {
629 if (buffer != (char *)NULL)
630 FREE(buffer);
631 buffer = (char *)NULL;
632 len_buffer = 0;
633 max_buffer = 0;
634 return (0); /* no more data in file */
635 }
636 else
637 {
638 *line = awk_dupstr(buffer);
639 len_buffer = 0;
640 return (1);
641 }
642 }
643 }
644
645
646 int
awk_gsub(const char * regular_expression,const char * replacement,char ** target)647 awk_gsub(const char *regular_expression, const char *replacement,
648 char **target)
649 {
650 /*******************************************************************
651 Substitute all leftmost longest substrings in target matching
652 REGULAR_EXPRESSION with REPLACEMENT, updating TARGET to point to
653 the new string. Any storage previously pointed to by TARGET is
654 freed, and thus, must have originally been allocated by malloc().
655
656 Return the number of substitions made.
657 *******************************************************************/
658
659 int nsub;
660 regexp *compiled_regexp;
661
662 ENSURE_INITIALIZATION();
663
664 compiled_regexp = regcomp(regular_expression);
665 nsub = _awk_gsub(compiled_regexp, replacement, target);
666 regfree(compiled_regexp);
667
668 return (nsub);
669 }
670
671
672 void
awk_initialize(void)673 awk_initialize(void)
674 {
675 /*******************************************************************
676 Initialize the awk compatibility library. This function MUST be
677 called before any of the other awk_xxx() functions are called;
678 failure to do so will produce unpredictable results.
679
680 On completion of use of the library, call awk_terminate() to
681 clean up.
682
683 Calls to the pair awk_initialize() and awk_terminate() must
684 bracket all calls to other members of this library.
685 *******************************************************************/
686
687 FS = " ";
688 RS = "\n";
689 RLENGTH = -1;
690 RSTART = 0;
691 _awk_lib_initialized = 1;
692 }
693
694
695 int
awk_is_NaN(const char * s)696 awk_is_NaN(const char *s)
697 {
698 /*******************************************************************
699 Return 1 if S matches a pattern for a NaN (Not-a-Number), and 0
700 otherwise.
701 *******************************************************************/
702
703 ENSURE_INITIALIZATION();
704
705 return (_awk_regmatch(s,_awk_NaN_regexp));
706 }
707
708
709 int
awk_is_negative_infinity(const char * s)710 awk_is_negative_infinity(const char *s)
711 {
712 /*******************************************************************
713 Return 1 if S matches a pattern for a negative infinity, and 0
714 otherwise.
715 *******************************************************************/
716
717 ENSURE_INITIALIZATION();
718
719 return (_awk_regmatch(s,_awk_negative_infinity_regexp));
720 }
721
722
723 int
awk_is_positive_infinity(const char * s)724 awk_is_positive_infinity(const char *s)
725 {
726 /*******************************************************************
727 Return 1 if S matches a pattern for a positive infinity, and 0
728 otherwise.
729 *******************************************************************/
730
731 ENSURE_INITIALIZATION();
732
733 return (_awk_regmatch(s,_awk_positive_infinity_regexp));
734 }
735
736
737 const char *
awk_long_to_string(long n)738 awk_long_to_string(long n)
739 {
740 /*******************************************************************
741 Return a newly-allocated string containing the decimal
742 representation of N.
743 *******************************************************************/
744
745 char s[40 + 1]; /* long enough for 128-bit integer */
746
747 ENSURE_INITIALIZATION();
748
749 (void)sprintf(s, "%ld", n);
750 return (awk_dupstr(s));
751 }
752
753
754 int
awk_match(const char * source,const char * regular_expression)755 awk_match(const char *source, const char *regular_expression)
756 {
757 /*******************************************************************
758 Search SOURCE for the longest leftmost substring matched by
759 REGULAR_EXPRESSION.
760
761 If a match is found, set the global variable RSTART to the index,
762 and RLENGTH to the length of the matched substring.
763
764 If there is no match, then set RSTART to 0, and RLENGTH to -1.
765
766 Return RSTART (one, if it starts at the beginning of source).
767 *******************************************************************/
768
769 regexp *compiled_regexp;
770
771 ENSURE_INITIALIZATION();
772
773 compiled_regexp = regcomp(regular_expression);
774
775 (void)_awk_match(source, compiled_regexp);
776
777 regfree(compiled_regexp);
778
779 return (RSTART);
780 }
781
782
783 void
awk_new_table(awk_table_t * the_table)784 awk_new_table(awk_table_t * the_table)
785 {
786 /*******************************************************************
787 Initialize an awk table to empty.
788 *******************************************************************/
789
790 ENSURE_INITIALIZATION();
791
792 the_table->table = (const char **)NULL;
793 the_table->size = 0;
794 }
795
796
797 char *
awk_padstr(const char * s,size_t extra)798 awk_padstr(const char *s, size_t extra)
799 {
800 /*******************************************************************
801 Return a newly-allocated string containing a copy of S, with
802 EXTRA additional slots at the end.
803
804 Terminate with a fatal error if memory cannot be allocated.
805 *******************************************************************/
806
807 size_t n;
808 char *t;
809
810 ENSURE_INITIALIZATION();
811
812 n = (s == (char*)NULL) ? 0 : strlen(s);
813 t = (char *)malloc(n + 1 + extra);
814 if (t == (char *)NULL)
815 _awk_error("awk_padstr(): out of memory");
816 if (s == (char*)NULL)
817 *t = '\0';
818 else
819 strcpy(t, s);
820
821 return (t);
822 }
823
824
825 size_t
awk_split(const char * s,const char *** parts,const char * field_separator)826 awk_split(const char *s, const char ***parts, const char *field_separator)
827 {
828 /*******************************************************************
829 Split the string S into tokens delimited by separators defined by
830 FIELD_SEPARATOR, returning them in the newly-allocated array
831 PARTS[], and return the number of tokens stored in PARTS[].
832
833 If FIELD_SEPARATOR is NULL, use the current value of the global
834 variable FS in its place.
835
836 When FIELD_SEPARATOR is " " (a single blank), then leading and
837 trailing whitespace is ignored, and the tokens are separated by
838 runs of whitespace (blanks, tabs, or newlines).
839
840 This quote from Arnold Robbins ``Effective AWK Programming''
841 (also available as the online gawk manual) is significant:
842
843 ``Normally, fields are separated by whitespace sequences (spaces,
844 tabs and newlines), not by single spaces: two spaces in a row do
845 not delimit an empty field. The default value of the field
846 separator `FS' is a string containing a single space, `" "'. If
847 this value were interpreted in the usual way, each space
848 character would separate fields, so two spaces in a row would
849 make an empty field between them. The reason this does not
850 happen is that a single space as the value of `FS' is a special
851 case: it is taken to specify the default manner of delimiting
852 fields.
853
854 If `FS' is any other single character, such as `","', then each
855 occurrence of that character separates two fields. Two
856 consecutive occurrences delimit an empty field. If the
857 character occurs at the beginning or the end of the line, that
858 too delimits an empty field. The space character is the only
859 single character which does not follow these rules.''
860
861 When FIELD_SEPARATOR is longer than a single character, it is
862 taken to be a regular expression.
863
864 PARTS[] should ultimately freed by a call to awk_free_table().
865 *******************************************************************/
866
867 ENSURE_INITIALIZATION();
868
869 if (field_separator == CNULL)
870 field_separator = FS;
871
872 if (strcmp(field_separator, " ") == 0)
873 return (_awk_split_simple(s, parts));
874 else if (strlen(field_separator) == 1)
875 return (_awk_split_complex(s, parts, field_separator));
876 else
877 return (_awk_split_regexp(s, parts, field_separator));
878 }
879
880
881 int
awk_sub(const char * regular_expression,const char * replacement,char ** target)882 awk_sub(const char *regular_expression, const char *replacement,
883 char **target)
884 {
885 /*******************************************************************
886 Substitute the leftmost longest substring in TARGET matching
887 REGULAR_EXPRESSION with replacement, updating TARGET to point to
888 the new string. Any storage previously pointed to by TARGET is
889 freed, and thus, must have originally been allocated by malloc().
890
891 Return the number of substitions made.
892 *******************************************************************/
893
894 int nsub;
895
896 ENSURE_INITIALIZATION();
897
898 nsub = 0;
899 if (awk_match(*target, regular_expression))
900 {
901 const char *first;
902 const char *rest;
903
904 first = awk_substr(*target, 1, RSTART - 1);
905 rest = awk_substr(*target, RSTART + RLENGTH, LONG_MAX);
906 awk_free_string(*target);
907 *target = (char *)_awk_concat(first, replacement, rest);
908 awk_free_string(first);
909 awk_free_string(rest);
910 nsub++;
911 }
912
913 return (nsub);
914 }
915
916
917 const char *
awk_substr(const char * source,awk_int_t start,awk_int_t length)918 awk_substr(const char *source, awk_int_t start, awk_int_t length)
919 {
920 /*******************************************************************
921 Return a newly-allocated string containing a substring of SOURCE,
922 beginning at START (indexes begin at 1), of LENGTH characters, or
923 fewer if the string ends early.
924
925 Design note: because of the common awk idiom,
926 substr(s,1,RSTART-1), for the string prefixing the current match,
927 it is necessary that the LENGTH argument be a signed integer,
928 rather than the usual unsigned integer type size_t used in the
929 strxxx() family.
930 *******************************************************************/
931
932 size_t n;
933 char *t;
934
935 ENSURE_INITIALIZATION();
936
937 n = strlen(&source[start - 1]);
938
939 if (length <= 0)
940 n = 0;
941 else if ((size_t) length < n)
942 n = (size_t) length;
943
944 t = awk_padstr("", n);
945 if (n > 0)
946 (void)strncpy(t, &source[start - 1], n);
947 t[n] = '\0';
948
949 return ((const char *)t);
950 }
951
952
953 double
awk_string_to_double(const char * s)954 awk_string_to_double(const char *s)
955 {
956 ENSURE_INITIALIZATION();
957
958 return (strtod(s,(char**)NULL));
959 }
960
961
962 long
awk_string_to_long(const char * s)963 awk_string_to_long(const char *s)
964 {
965 ENSURE_INITIALIZATION();
966
967 return (strtol(s,(char**)NULL,10));
968 }
969
970
971 unsigned long
awk_string_to_unsigned_long(const char * s)972 awk_string_to_unsigned_long(const char *s)
973 {
974 ENSURE_INITIALIZATION();
975
976 return (strtoul(s,(char**)NULL,10));
977 }
978
979
980 void
awk_terminate(void)981 awk_terminate(void)
982 {
983 /*******************************************************************
984 Terminate use of the awk compatibility library. This function
985 MUST be called if memory leaks are to be avoided, and it MUST not
986 be called if awk_initialize() has not be called.
987
988 Calls to the pair awk_initialize() and awk_terminate() must
989 bracket all calls to other members of this library.
990 *******************************************************************/
991
992 ENSURE_INITIALIZATION();
993
994 _awk_lib_initialized = 0;
995 }
996
997
998 const char *
awk_tolower(const char * s)999 awk_tolower(const char *s)
1000 {
1001 /*******************************************************************
1002 Return a copy of S converted to lowercase.
1003 *******************************************************************/
1004
1005 const char *t;
1006 char *u;
1007
1008 ENSURE_INITIALIZATION();
1009
1010 t = awk_dupstr(s);
1011 for (u = (char *)t; *u; ++u)
1012 {
1013 if (isupper(*u))
1014 *u = tolower(*u);
1015 }
1016
1017 return (t);
1018 }
1019
1020
1021 const char *
awk_toupper(const char * s)1022 awk_toupper(const char *s)
1023 {
1024 /*******************************************************************
1025 Return a copy of S converted to uppercase.
1026 *******************************************************************/
1027
1028 const char *t;
1029 char *u;
1030
1031 ENSURE_INITIALIZATION();
1032
1033 t = awk_dupstr(s);
1034 for (u = (char *)t; *u; ++u)
1035 {
1036 if (islower(*u))
1037 *u = toupper(*u);
1038 }
1039
1040 return (t);
1041 }
1042
1043
1044 const char *
awk_unsigned_long_to_string(unsigned long n)1045 awk_unsigned_long_to_string(unsigned long n)
1046 {
1047 /*******************************************************************
1048 Return a newly-allocated string containing the decimal
1049 representation of N.
1050 *******************************************************************/
1051
1052 char s[40 + 1]; /* long enough for 128-bit integer */
1053
1054 ENSURE_INITIALIZATION();
1055
1056 (void)sprintf(s, "%lu", n);
1057 return (awk_dupstr(s));
1058 }
1059
1060
1061 static void
regfree(regexp * s)1062 regfree(regexp *s)
1063 {
1064 /*******************************************************************
1065 Free the compiled regular expression, S, which should have been
1066 returned from an earlier call to regcomp().
1067
1068 This function should logically be part of the regexp() package,
1069 but is sadly missing from it.
1070 *******************************************************************/
1071
1072 FREE(s);
1073 }
1074