1 /********************************************
2 field.c
3 copyright 2008-2014,2016 Thomas E. Dickey
4 copyright 1991-1995,2014 Michael D. Brennan
5
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
8
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
12
13 /*
14 * $MawkId: field.c,v 1.35 2016/11/21 02:15:47 tom Exp $
15 */
16
17 /* field.c */
18
19 #include "mawk.h"
20 #include "split.h"
21 #include "field.h"
22 #include "init.h"
23 #include "memory.h"
24 #include "scan.h"
25 #include "bi_vars.h"
26 #include "repl.h"
27 #include "regexp.h"
28
29 /* initial fields and pseudo fields,
30 most programs only need these */
31 CELL field[FBANK_SZ + NUM_PFIELDS];
32 /* hold banks of more fields if needed */
33 CELL **fbankv;
34
35 /* fbankv grows in chunks */
36 #define FBANKV_CHUNK_SIZE 1024
37 static size_t fbankv_num_chunks;
38
39 /* make fbankv big enough to hold field CELL $i
40 is called with i==0 during initialization
41
42 This does not create field CELL $i, it just
43 makes fbankv big enough to hold the fbank that will hold $i
44 */
45 static void
allocate_fbankv(int i)46 allocate_fbankv(int i)
47 {
48 if (i == 0) {
49 size_t sz = FBANKV_CHUNK_SIZE * sizeof(CELL *);
50 fbankv_num_chunks = 1;
51 fbankv = (CELL **) zmalloc(sz);
52 memset(fbankv, 0, sz);
53 fbankv[0] = field;
54 } else {
55 size_t u = (size_t) i + 1;
56 size_t chunks = (u / (FBANK_SZ * FBANKV_CHUNK_SIZE)) + 1;
57 if (chunks > fbankv_num_chunks) {
58 size_t old_size = fbankv_num_chunks * FBANKV_CHUNK_SIZE;
59 size_t new_size = chunks * FBANKV_CHUNK_SIZE;
60 fbankv = zrealloc(fbankv, old_size * sizeof(CELL *),
61 new_size * sizeof(CELL *));
62
63 memset(&fbankv[old_size], 0, (new_size - old_size) * sizeof(CELL *));
64 fbankv_num_chunks = chunks;
65 }
66 }
67 }
68
69 /* max_field created i.e. $max_field exists
70 as new fields are created max_field grows
71 */
72 static int max_field = FBANK_SZ - 1;
73
74 /* The fields $0, $1, ... $max_field are always valid, the
75 value of nf (below) does not affect validity of the
76 allocated fields. When a new $0 is read, nf is set to -1
77 to indicate $0 has not been split, field[1], field[2] ...
78 field[3] (actually fbankv[i/1024][i%1024]) are unchanged.
79
80 So any time a field is assigned or changed, it has to
81 be cell_destroyed first and this is the only way a field gets
82 cell_destroyed.
83 */
84
85 static void build_field0(void);
86
87 /* a description of how to split based on RS.
88 If RS is changed, so is rs_shadow */
89 SEPARATOR rs_shadow =
90 {
91 SEP_CHAR, '\n', NULL
92 };
93 /* a splitting CELL version of FS */
94 CELL fs_shadow =
95 {
96 C_SPACE, 0, 0, 0.0
97 };
98 int nf;
99 /* nf holds the true value of NF. If nf < 0 , then
100 NF has not been computed, i.e., $0 has not been split
101 */
102
103 static void
set_rs_shadow(void)104 set_rs_shadow(void)
105 {
106 CELL c;
107 STRING *sval;
108 char *s;
109 SLen len;
110
111 if (posix_space_flag && mawk_state == EXECUTION)
112 scan_code['\n'] = SC_UNEXPECTED;
113
114 if (rs_shadow.type == SEP_STR) {
115 free_STRING((STRING *) rs_shadow.ptr);
116 }
117
118 cast_for_split(cellcpy(&c, RS));
119 switch (c.type) {
120 case C_RE:
121 if ((s = is_string_split(c.ptr, &len))) {
122 if (len == 1) {
123 rs_shadow.type = SEP_CHAR;
124 rs_shadow.c = s[0];
125 } else {
126 rs_shadow.type = SEP_STR;
127 rs_shadow.ptr = (PTR) new_STRING(s);
128 }
129 } else {
130 rs_shadow.type = SEP_RE;
131 rs_shadow.ptr = c.ptr;
132 }
133 break;
134
135 case C_SPACE:
136 rs_shadow.type = SEP_CHAR;
137 rs_shadow.c = ' ';
138 break;
139
140 case C_SNULL: /* RS becomes one or more blank lines */
141 if (mawk_state == EXECUTION)
142 scan_code['\n'] = SC_SPACE;
143 rs_shadow.type = SEP_MLR;
144 sval = new_STRING("\n\n+");
145 rs_shadow.ptr = re_compile(sval);
146 free_STRING(sval);
147 break;
148
149 case C_STRING:
150 /*
151 * Check for special case where we retained the cell as a string,
152 * bypassing regular-expression compiling.
153 */
154 if (string(&c)->len == 1) {
155 rs_shadow.type = SEP_CHAR;
156 rs_shadow.c = string(&c)->str[0];
157 break;
158 }
159 /* FALLTHRU */
160 default:
161 bozo("bad cell in set_rs_shadow");
162 }
163 }
164
165 static void
load_pfield(const char * name,CELL * cp)166 load_pfield(const char *name, CELL *cp)
167 {
168 SYMTAB *stp;
169
170 stp = insert(name);
171 stp->type = ST_FIELD;
172 stp->stval.cp = cp;
173 }
174
175 /* initialize $0 and the pseudo fields */
176 void
field_init(void)177 field_init(void)
178 {
179 allocate_fbankv(0);
180
181 field[0].type = C_STRING;
182 field[0].ptr = (PTR) & null_str;
183 null_str.ref_cnt++;
184
185 load_pfield("NF", NF);
186 NF->type = C_DOUBLE;
187 NF->dval = 0.0;
188
189 load_pfield("RS", RS);
190 RS->type = C_STRING;
191 RS->ptr = (PTR) new_STRING("\n");
192 /* rs_shadow already set */
193
194 load_pfield("FS", FS);
195 FS->type = C_STRING;
196 FS->ptr = (PTR) new_STRING(" ");
197 /* fs_shadow is already set */
198
199 load_pfield("OFMT", OFMT);
200 OFMT->type = C_STRING;
201 OFMT->ptr = (PTR) new_STRING("%.6g");
202
203 load_pfield("CONVFMT", CONVFMT);
204 CONVFMT->type = C_STRING;
205 CONVFMT->ptr = OFMT->ptr;
206 string(OFMT)->ref_cnt++;
207 }
208
209 void
set_field0(char * s,size_t len)210 set_field0(char *s, size_t len)
211 {
212 cell_destroy(&field[0]);
213 nf = -1;
214
215 if (len) {
216 field[0].type = C_MBSTRN;
217 field[0].ptr = (PTR) new_STRING0(len);
218 memcpy(string(&field[0])->str, s, len);
219 } else {
220 field[0].type = C_STRING;
221 field[0].ptr = (PTR) & null_str;
222 null_str.ref_cnt++;
223 }
224 }
225
226 /* split field[0] into $1, $2 ... and set NF
227 *
228 * Note the current values are valid CELLS and
229 * have to be destroyed when the new values are loaded.
230 */
231
232 void
split_field0(void)233 split_field0(void)
234 {
235 CELL *cp0;
236 size_t cnt = 0;
237 CELL hold0; /* copy field[0] here if not string */
238
239 if (field[0].type < C_STRING) {
240 cast1_to_s(cellcpy(&hold0, field + 0));
241 cp0 = &hold0;
242 } else {
243 cp0 = &field[0];
244 }
245
246 if (string(cp0)->len > 0) {
247 switch (fs_shadow.type) {
248 case C_SNULL: /* FS == "" */
249 cnt = null_split(string(cp0)->str, string(cp0)->len);
250 break;
251
252 case C_SPACE:
253 cnt = space_split(string(cp0)->str, string(cp0)->len);
254 break;
255
256 default:
257 cnt = re_split(string(cp0)->str, string(cp0)->len, fs_shadow.ptr);
258 break;
259 }
260
261 }
262 /* the above xxx_split() function put the fields in an anonyous
263 * buffer that will be pulled into the fields with a transer call */
264
265 /* we are done with cp0 */
266 if (cp0 == &hold0)
267 free_STRING(string(cp0));
268
269 nf = (int) cnt;
270
271 cell_destroy(NF);
272 NF->type = C_DOUBLE;
273 NF->dval = (double) nf;
274
275 if (nf > max_field)
276 slow_field_ptr(nf);
277 /* fields 1 .. nf are created and valid */
278
279 /* retrieves the result of xxx_split() */
280 if (cnt > 0) {
281 transfer_to_fields(cnt);
282 }
283 }
284
285 static void
invalid_format(CELL * fp)286 invalid_format(CELL *fp)
287 {
288 const char *what = (fp == CONVFMT) ? "CONVFMT" : "OFMT";
289 const char *format = string(fp)->str;
290 rt_error("illegal format assigned to %s: %s", what, format);
291 }
292
293 /*
294 * We expect only one field, using the same format choices as in do_printf().
295 */
296 static int
valid_format(CELL * fp)297 valid_format(CELL *fp)
298 {
299 int result = 1;
300 char *format = string(fp)->str;
301 char *q = format;
302 int args = 0;
303
304 while (*q != '\0') {
305 if (*q++ == '%') {
306 int l_flag = 0;
307 int h_flag = 0;
308
309 if (++args > 1)
310 invalid_format(fp);
311
312 while (*q == '-' || *q == '+' || *q == ' ' ||
313 *q == '#' || *q == '0' || *q == '\'') {
314 q++;
315 }
316 if (*q == '*') {
317 invalid_format(fp);
318 } else {
319 while (scan_code[*(unsigned char *) q] == SC_DIGIT) {
320 q++;
321 }
322 }
323 if (*q == '.') { /* have precision */
324 q++;
325 if (*q == '*') {
326 invalid_format(fp);
327 } else {
328 while (scan_code[*(unsigned char *) q] == SC_DIGIT) {
329 q++;
330 }
331 }
332 }
333 if (*q == 'l') {
334 q++;
335 } else if (*q == 'h') {
336 q++;
337 }
338 switch (*q++) {
339 case 's':
340 if (l_flag + h_flag)
341 invalid_format(fp);
342 break;
343 case 'c':
344 if (l_flag + h_flag)
345 invalid_format(fp);
346 break;
347 case 'd':
348 case 'i':
349 break;
350 case 'o':
351 case 'x':
352 case 'X':
353 case 'u':
354 break;
355 case 'e':
356 case 'g':
357 case 'f':
358 case 'E':
359 case 'G':
360 if (h_flag + l_flag)
361 invalid_format(fp);
362 break;
363 default:
364 invalid_format(fp);
365 }
366 }
367 }
368 return result;
369 }
370
371 /*
372 assign CELL *cp to field or pseudo field
373 and take care of all side effects
374 */
375
376 void
field_assign(CELL * fp,CELL * cp)377 field_assign(CELL *fp, CELL *cp)
378 {
379 CELL c;
380 int i, j;
381
382 /* the most common case first */
383 if (fp == field) {
384 cell_destroy(field);
385 cellcpy(fp, cp);
386 nf = -1;
387 return;
388 }
389
390 /* its not important to do any of this fast */
391
392 if (nf < 0)
393 split_field0();
394
395 switch (i = (int) (fp - field)) {
396
397 case NF_field:
398
399 cell_destroy(NF);
400 cellcpy(NF, cellcpy(&c, cp));
401 if (c.type != C_DOUBLE)
402 cast1_to_d(&c);
403
404 if ((j = d_to_i(c.dval)) < 0)
405 rt_error("negative value assigned to NF");
406
407 if (j > nf)
408 for (i = nf + 1; i <= j; i++) {
409 cp = field_ptr(i);
410 cell_destroy(cp);
411 cp->type = C_STRING;
412 cp->ptr = (PTR) & null_str;
413 null_str.ref_cnt++;
414 }
415
416 nf = j;
417 build_field0();
418 break;
419
420 case RS_field:
421 cell_destroy(RS);
422 cellcpy(RS, cp);
423 set_rs_shadow();
424 break;
425
426 case FS_field:
427 cell_destroy(FS);
428 cast_for_split(cellcpy(&fs_shadow, cellcpy(FS, cp)));
429 break;
430
431 case OFMT_field:
432 case CONVFMT_field:
433 /* If the user does something stupid with OFMT or CONVFMT,
434 we could crash.
435 We'll make an attempt to protect ourselves here. This is
436 why OFMT and CONVFMT are pseudo fields.
437
438 The ptrs of OFMT and CONVFMT always have a valid STRING,
439 even if assigned a DOUBLE or NOINIT
440 */
441
442 free_STRING(string(fp));
443 cellcpy(fp, cp);
444 if (fp->type < C_STRING) { /* !! */
445 fp->ptr = (PTR) new_STRING("%.6g");
446 } else if (valid_format(fp)) {
447 if (fp == CONVFMT) {
448 /* It's a string, but if it's really goofy and CONVFMT,
449 it could still damage us. Test it .
450 */
451 char xbuff[512];
452
453 xbuff[256] = 0;
454 sprintf(xbuff, string(fp)->str, 3.1459);
455 if (xbuff[256])
456 rt_error("CONVFMT assigned unusable value");
457 }
458 }
459 break;
460
461 #ifdef MSDOS
462 lm_dos_label:
463 #endif
464
465 default: /* $1 or $2 or ... */
466
467 cell_destroy(fp);
468 cellcpy(fp, cp);
469
470 if (i < 0 || i >= FBANK_SZ) {
471 /* field assigned to was not in field[0..FBANK_SZ-1]
472 * or a pseudo field, so compute actual field index
473 */
474 i = field_addr_to_index(fp);
475 }
476
477 if (i > nf) {
478 for (j = nf + 1; j < i; j++) {
479 cp = field_ptr(j);
480 cell_destroy(cp);
481 cp->type = C_STRING;
482 cp->ptr = (PTR) & null_str;
483 null_str.ref_cnt++;
484 }
485 nf = i;
486 cell_destroy(NF);
487 NF->type = C_DOUBLE;
488 NF->dval = (double) i;
489 }
490
491 build_field0();
492
493 }
494 }
495
496 /* construct field[0] from the other fields */
497
498 static void
build_field0(void)499 build_field0(void)
500 {
501
502 #ifdef DEBUG
503 if (nf < 0)
504 bozo("nf <0 in build_field0");
505 #endif
506
507 cell_destroy(field + 0);
508
509 if (nf == 0) {
510 field[0].type = C_STRING;
511 field[0].ptr = (PTR) & null_str;
512 null_str.ref_cnt++;
513 } else if (nf == 1) {
514 cellcpy(field, field + 1);
515 } else {
516 CELL c;
517 STRING *ofs, *tail;
518 size_t len;
519 register CELL *cp;
520 register char *p, *q;
521 int cnt;
522 CELL **fbp, *cp_limit;
523
524 cast1_to_s(cellcpy(&c, OFS));
525 ofs = (STRING *) c.ptr;
526 cast1_to_s(cellcpy(&c, field_ptr(nf)));
527 tail = (STRING *) c.ptr;
528 cnt = nf - 1;
529
530 len = ((size_t) cnt) * ofs->len + tail->len;
531
532 fbp = fbankv;
533 cp_limit = field + FBANK_SZ;
534 cp = field + 1;
535
536 while (cnt-- > 0) {
537 if (cp->type < C_STRING) { /* use the string field temporarily */
538 if (cp->type == C_NOINIT) {
539 cp->ptr = (PTR) & null_str;
540 null_str.ref_cnt++;
541 } else { /* its a double */
542 Int ival;
543 char xbuff[260];
544
545 ival = d_to_I(cp->dval);
546 if (ival == cp->dval)
547 sprintf(xbuff, INT_FMT, ival);
548 else
549 sprintf(xbuff, string(CONVFMT)->str, cp->dval);
550
551 cp->ptr = (PTR) new_STRING(xbuff);
552 }
553 }
554
555 len += string(cp)->len;
556
557 if (++cp == cp_limit) {
558 cp = *++fbp;
559 cp_limit = cp + FBANK_SZ;
560 }
561
562 }
563
564 field[0].type = C_STRING;
565 field[0].ptr = (PTR) new_STRING0(len);
566
567 p = string(field)->str;
568
569 /* walk it again , putting things together */
570 cnt = nf - 1;
571 fbp = fbankv;
572 cp = field + 1;
573 cp_limit = field + FBANK_SZ;
574 while (cnt-- > 0) {
575 memcpy(p, string(cp)->str, string(cp)->len);
576 p += string(cp)->len;
577 /* if not really string, free temp use of ptr */
578 if (cp->type < C_STRING) {
579 free_STRING(string(cp));
580 }
581 if (++cp == cp_limit) {
582 cp = *++fbp;
583 cp_limit = cp + FBANK_SZ;
584 }
585 /* add the separator */
586 q = ofs->str;
587 while (*q)
588 *p++ = *q++;
589 }
590 /* tack tail on the end */
591 memcpy(p, tail->str, tail->len);
592
593 /* cleanup */
594 if (tail == ofs) {
595 free_STRING(tail);
596 } else {
597 free_STRING(tail);
598 free_STRING(ofs);
599 }
600 }
601 }
602
603 /* We are assigning to a CELL and we aren't sure if its
604 a field
605 */
606 void
slow_cell_assign(CELL * target,CELL * source)607 slow_cell_assign(CELL *target, CELL *source)
608 {
609 if (field <= target && target <= LAST_PFIELD) {
610 field_assign(target, source);
611 } else {
612 size_t i;
613 for (i = 1; i < fbankv_num_chunks * FBANKV_CHUNK_SIZE; i++) {
614 CELL *bank_start = fbankv[i];
615 CELL *bank_end = bank_start + FBANK_SZ;
616
617 if (bank_start == 0)
618 break;
619
620 if (bank_start <= target && target < bank_end) {
621 /* it is a field */
622 field_assign(target, source);
623 return;
624 }
625 }
626 /* its not a field */
627 cell_destroy(target);
628 cellcpy(target, source);
629 }
630 }
631
632 int
field_addr_to_index(CELL * cp)633 field_addr_to_index(CELL *cp)
634 {
635 CELL **p = fbankv;
636
637 while (cp < *p || cp >= *p + FBANK_SZ)
638 p++;
639
640 return (int) (((p - fbankv) << FB_SHIFT) + (cp - *p));
641 }
642
643 /*------- more than 1 fbank needed ------------*/
644
645 /*
646 compute the address of a field $i
647
648 if CELL $i doesn't exist, because it is bigger than max_field,
649 then it gets created and max_field grows.
650 */
651
652 CELL *
slow_field_ptr(int i)653 slow_field_ptr(int i)
654 {
655
656 if (i > max_field) { /* need to allocate more field memory */
657 int j;
658 allocate_fbankv(i);
659
660 j = (max_field >> FB_SHIFT) + 1;
661
662 assert(j > 0 && fbankv[j - 1] != 0 && fbankv[j] == 0);
663
664 do {
665 fbankv[j] = (CELL *) zmalloc(sizeof(CELL) * FBANK_SZ);
666 memset(fbankv[j], 0, sizeof(CELL) * FBANK_SZ);
667 j++;
668 max_field += FBANK_SZ;
669 }
670 while (i > max_field);
671 }
672
673 return &fbankv[i >> FB_SHIFT][i & (FBANK_SZ - 1)];
674 }
675
676 #if USE_BINMODE
677
678 /* read current value of BINMODE */
679 int
binmode(void)680 binmode(void)
681 {
682 CELL c;
683
684 cast1_to_d(cellcpy(&c, BINMODE));
685 return d_to_i(c.dval);
686 }
687
688 /* set BINMODE and RS and ORS
689 from environment or -W binmode= */
690
691 void
set_binmode(int x)692 set_binmode(int x)
693 {
694 CELL c;
695 int change = ((x & 4) == 0);
696
697 /* set RS */
698 c.type = C_STRING;
699 c.ptr = (PTR) new_STRING((change && (x & 1)) ? "\r\n" : "\n");
700 field_assign(RS, &c);
701 free_STRING(string(&c));
702
703 /* set ORS */
704 cell_destroy(ORS);
705 ORS->type = C_STRING;
706 ORS->ptr = (PTR) new_STRING((change && (x & 2)) ? "\r\n" : "\n");
707
708 cell_destroy(BINMODE);
709 BINMODE->type = C_DOUBLE;
710 BINMODE->dval = (double) x;
711 }
712
713 #endif /* USE_BINMODE */
714
715 #ifdef NO_LEAKS
716
717 static void
fbank_free(CELL * const fb)718 fbank_free(CELL *const fb)
719 {
720 CELL *end = fb + FBANK_SZ;
721 CELL *cp;
722 for (cp = fb; cp < end; cp++) {
723 cell_destroy(cp);
724 }
725 zfree(fb, FBANK_SZ * sizeof(CELL));
726 }
727
728 static void
fbankv_free(void)729 fbankv_free(void)
730 {
731 unsigned i = 1;
732 const size_t cnt = FBANKV_CHUNK_SIZE * fbankv_num_chunks;
733 while (i < cnt && fbankv[i] != 0) {
734 fbank_free(fbankv[i]);
735 i++;
736 }
737 for (; i < cnt; i++) {
738 if (fbankv[i] != 0) {
739 bozo("unexpected pointer in fbankv[]");
740 }
741 }
742 zfree(fbankv, cnt * sizeof(CELL *));
743 }
744
745 void
field_leaks(void)746 field_leaks(void)
747 {
748 int n;
749
750 /* everything in field[] */
751 for (n = 0; n < FBANK_SZ + NUM_PFIELDS; n++) {
752 cell_destroy(&field[n]);
753 }
754 /* fbankv[0] == field
755 this call does all the rest of the fields
756 */
757 fbankv_free();
758
759 switch (fs_shadow.type) {
760 case C_RE:
761 re_destroy(fs_shadow.ptr);
762 break;
763 case C_STRING:
764 case C_STRNUM:
765 case C_MBSTRN:
766 cell_destroy(&fs_shadow);
767 break;
768 default:
769 break;
770 }
771
772 switch (rs_shadow.type) {
773 case SEP_STR:
774 free_STRING(((STRING *) (&rs_shadow.ptr)));
775 break;
776 case SEP_RE:
777 re_destroy(rs_shadow.ptr);
778 break;
779 }
780 }
781 #endif
782