1 /********************************************
2 field.c
3 copyright 2008-2014,2016 Thomas E. Dickey
4 copyright 1991-1995,2014 Michael D. Brennan
5 
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
8 
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
12 
13 /*
14  * $MawkId: field.c,v 1.35 2016/11/21 02:15:47 tom Exp $
15  */
16 
17 /* field.c */
18 
19 #include "mawk.h"
20 #include "split.h"
21 #include "field.h"
22 #include "init.h"
23 #include "memory.h"
24 #include "scan.h"
25 #include "bi_vars.h"
26 #include "repl.h"
27 #include "regexp.h"
28 
29 /* initial fields and pseudo fields,
30     most programs only need these */
31 CELL field[FBANK_SZ + NUM_PFIELDS];
32 /* hold banks of more fields if needed */
33 CELL **fbankv;
34 
35 /* fbankv grows in chunks */
36 #define FBANKV_CHUNK_SIZE    1024
37 static size_t fbankv_num_chunks;
38 
39 /* make fbankv big enough to hold field CELL $i
40    is called with i==0 during initialization
41 
42    This does not create field CELL $i, it just
43    makes fbankv big enough to hold the fbank that will hold $i
44 */
45 static void
allocate_fbankv(int i)46 allocate_fbankv(int i)
47 {
48     if (i == 0) {
49 	size_t sz = FBANKV_CHUNK_SIZE * sizeof(CELL *);
50 	fbankv_num_chunks = 1;
51 	fbankv = (CELL **) zmalloc(sz);
52 	memset(fbankv, 0, sz);
53 	fbankv[0] = field;
54     } else {
55 	size_t u = (size_t) i + 1;
56 	size_t chunks = (u / (FBANK_SZ * FBANKV_CHUNK_SIZE)) + 1;
57 	if (chunks > fbankv_num_chunks) {
58 	    size_t old_size = fbankv_num_chunks * FBANKV_CHUNK_SIZE;
59 	    size_t new_size = chunks * FBANKV_CHUNK_SIZE;
60 	    fbankv = zrealloc(fbankv, old_size * sizeof(CELL *),
61 			      new_size * sizeof(CELL *));
62 
63 	    memset(&fbankv[old_size], 0, (new_size - old_size) * sizeof(CELL *));
64 	    fbankv_num_chunks = chunks;
65 	}
66     }
67 }
68 
69 /* max_field created i.e. $max_field exists
70    as new fields are created max_field grows
71 */
72 static int max_field = FBANK_SZ - 1;
73 
74 /*  The fields $0, $1, ... $max_field are always valid, the
75     value of nf (below) does not affect validity of the
76     allocated fields.  When a new $0 is read, nf is set to -1
77     to indicate $0 has not been split, field[1], field[2] ...
78     field[3] (actually fbankv[i/1024][i%1024]) are unchanged.
79 
80     So any time a field is assigned or changed, it has to
81     be cell_destroyed first and this is the only way a field gets
82     cell_destroyed.
83 */
84 
85 static void build_field0(void);
86 
87 /* a description of how to split based on RS.
88    If RS is changed, so is rs_shadow */
89 SEPARATOR rs_shadow =
90 {
91     SEP_CHAR, '\n', NULL
92 };
93 /* a splitting CELL version of FS */
94 CELL fs_shadow =
95 {
96     C_SPACE, 0, 0, 0.0
97 };
98 int nf;
99  /* nf holds the true value of NF.  If nf < 0 , then
100     NF has not been computed, i.e., $0 has not been split
101   */
102 
103 static void
set_rs_shadow(void)104 set_rs_shadow(void)
105 {
106     CELL c;
107     STRING *sval;
108     char *s;
109     SLen len;
110 
111     if (posix_space_flag && mawk_state == EXECUTION)
112 	scan_code['\n'] = SC_UNEXPECTED;
113 
114     if (rs_shadow.type == SEP_STR) {
115 	free_STRING((STRING *) rs_shadow.ptr);
116     }
117 
118     cast_for_split(cellcpy(&c, RS));
119     switch (c.type) {
120     case C_RE:
121 	if ((s = is_string_split(c.ptr, &len))) {
122 	    if (len == 1) {
123 		rs_shadow.type = SEP_CHAR;
124 		rs_shadow.c = s[0];
125 	    } else {
126 		rs_shadow.type = SEP_STR;
127 		rs_shadow.ptr = (PTR) new_STRING(s);
128 	    }
129 	} else {
130 	    rs_shadow.type = SEP_RE;
131 	    rs_shadow.ptr = c.ptr;
132 	}
133 	break;
134 
135     case C_SPACE:
136 	rs_shadow.type = SEP_CHAR;
137 	rs_shadow.c = ' ';
138 	break;
139 
140     case C_SNULL:		/* RS becomes one or more blank lines */
141 	if (mawk_state == EXECUTION)
142 	    scan_code['\n'] = SC_SPACE;
143 	rs_shadow.type = SEP_MLR;
144 	sval = new_STRING("\n\n+");
145 	rs_shadow.ptr = re_compile(sval);
146 	free_STRING(sval);
147 	break;
148 
149     case C_STRING:
150 	/*
151 	 * Check for special case where we retained the cell as a string,
152 	 * bypassing regular-expression compiling.
153 	 */
154 	if (string(&c)->len == 1) {
155 	    rs_shadow.type = SEP_CHAR;
156 	    rs_shadow.c = string(&c)->str[0];
157 	    break;
158 	}
159 	/* FALLTHRU */
160     default:
161 	bozo("bad cell in set_rs_shadow");
162     }
163 }
164 
165 static void
load_pfield(const char * name,CELL * cp)166 load_pfield(const char *name, CELL *cp)
167 {
168     SYMTAB *stp;
169 
170     stp = insert(name);
171     stp->type = ST_FIELD;
172     stp->stval.cp = cp;
173 }
174 
175 /* initialize $0 and the pseudo fields */
176 void
field_init(void)177 field_init(void)
178 {
179     allocate_fbankv(0);
180 
181     field[0].type = C_STRING;
182     field[0].ptr = (PTR) & null_str;
183     null_str.ref_cnt++;
184 
185     load_pfield("NF", NF);
186     NF->type = C_DOUBLE;
187     NF->dval = 0.0;
188 
189     load_pfield("RS", RS);
190     RS->type = C_STRING;
191     RS->ptr = (PTR) new_STRING("\n");
192     /* rs_shadow already set */
193 
194     load_pfield("FS", FS);
195     FS->type = C_STRING;
196     FS->ptr = (PTR) new_STRING(" ");
197     /* fs_shadow is already set */
198 
199     load_pfield("OFMT", OFMT);
200     OFMT->type = C_STRING;
201     OFMT->ptr = (PTR) new_STRING("%.6g");
202 
203     load_pfield("CONVFMT", CONVFMT);
204     CONVFMT->type = C_STRING;
205     CONVFMT->ptr = OFMT->ptr;
206     string(OFMT)->ref_cnt++;
207 }
208 
209 void
set_field0(char * s,size_t len)210 set_field0(char *s, size_t len)
211 {
212     cell_destroy(&field[0]);
213     nf = -1;
214 
215     if (len) {
216 	field[0].type = C_MBSTRN;
217 	field[0].ptr = (PTR) new_STRING0(len);
218 	memcpy(string(&field[0])->str, s, len);
219     } else {
220 	field[0].type = C_STRING;
221 	field[0].ptr = (PTR) & null_str;
222 	null_str.ref_cnt++;
223     }
224 }
225 
226 /* split field[0] into $1, $2 ... and set NF
227  *
228  * Note the current values are valid CELLS and
229  * have to be destroyed when the new values are loaded.
230 */
231 
232 void
split_field0(void)233 split_field0(void)
234 {
235     CELL *cp0;
236     size_t cnt = 0;
237     CELL hold0;			/* copy field[0] here if not string */
238 
239     if (field[0].type < C_STRING) {
240 	cast1_to_s(cellcpy(&hold0, field + 0));
241 	cp0 = &hold0;
242     } else {
243 	cp0 = &field[0];
244     }
245 
246     if (string(cp0)->len > 0) {
247 	switch (fs_shadow.type) {
248 	case C_SNULL:		/* FS == "" */
249 	    cnt = null_split(string(cp0)->str, string(cp0)->len);
250 	    break;
251 
252 	case C_SPACE:
253 	    cnt = space_split(string(cp0)->str, string(cp0)->len);
254 	    break;
255 
256 	default:
257 	    cnt = re_split(string(cp0)->str, string(cp0)->len, fs_shadow.ptr);
258 	    break;
259 	}
260 
261     }
262     /* the above xxx_split() function put the fields in an anonyous
263      * buffer that will be pulled into the fields with a transer call */
264 
265     /* we are done with cp0 */
266     if (cp0 == &hold0)
267 	free_STRING(string(cp0));
268 
269     nf = (int) cnt;
270 
271     cell_destroy(NF);
272     NF->type = C_DOUBLE;
273     NF->dval = (double) nf;
274 
275     if (nf > max_field)
276 	slow_field_ptr(nf);
277     /* fields 1 .. nf are created and valid */
278 
279     /* retrieves the result of xxx_split() */
280     if (cnt > 0) {
281 	transfer_to_fields(cnt);
282     }
283 }
284 
285 static void
invalid_format(CELL * fp)286 invalid_format(CELL *fp)
287 {
288     const char *what = (fp == CONVFMT) ? "CONVFMT" : "OFMT";
289     const char *format = string(fp)->str;
290     rt_error("illegal format assigned to %s: %s", what, format);
291 }
292 
293 /*
294  * We expect only one field, using the same format choices as in do_printf().
295  */
296 static int
valid_format(CELL * fp)297 valid_format(CELL *fp)
298 {
299     int result = 1;
300     char *format = string(fp)->str;
301     char *q = format;
302     int args = 0;
303 
304     while (*q != '\0') {
305 	if (*q++ == '%') {
306 	    int l_flag = 0;
307 	    int h_flag = 0;
308 
309 	    if (++args > 1)
310 		invalid_format(fp);
311 
312 	    while (*q == '-' || *q == '+' || *q == ' ' ||
313 		   *q == '#' || *q == '0' || *q == '\'') {
314 		q++;
315 	    }
316 	    if (*q == '*') {
317 		invalid_format(fp);
318 	    } else {
319 		while (scan_code[*(unsigned char *) q] == SC_DIGIT) {
320 		    q++;
321 		}
322 	    }
323 	    if (*q == '.') {	/* have precision */
324 		q++;
325 		if (*q == '*') {
326 		    invalid_format(fp);
327 		} else {
328 		    while (scan_code[*(unsigned char *) q] == SC_DIGIT) {
329 			q++;
330 		    }
331 		}
332 	    }
333 	    if (*q == 'l') {
334 		q++;
335 	    } else if (*q == 'h') {
336 		q++;
337 	    }
338 	    switch (*q++) {
339 	    case 's':
340 		if (l_flag + h_flag)
341 		    invalid_format(fp);
342 		break;
343 	    case 'c':
344 		if (l_flag + h_flag)
345 		    invalid_format(fp);
346 		break;
347 	    case 'd':
348 	    case 'i':
349 		break;
350 	    case 'o':
351 	    case 'x':
352 	    case 'X':
353 	    case 'u':
354 		break;
355 	    case 'e':
356 	    case 'g':
357 	    case 'f':
358 	    case 'E':
359 	    case 'G':
360 		if (h_flag + l_flag)
361 		    invalid_format(fp);
362 		break;
363 	    default:
364 		invalid_format(fp);
365 	    }
366 	}
367     }
368     return result;
369 }
370 
371 /*
372   assign CELL *cp to field or pseudo field
373   and take care of all side effects
374 */
375 
376 void
field_assign(CELL * fp,CELL * cp)377 field_assign(CELL *fp, CELL *cp)
378 {
379     CELL c;
380     int i, j;
381 
382     /* the most common case first */
383     if (fp == field) {
384 	cell_destroy(field);
385 	cellcpy(fp, cp);
386 	nf = -1;
387 	return;
388     }
389 
390     /* its not important to do any of this fast */
391 
392     if (nf < 0)
393 	split_field0();
394 
395     switch (i = (int) (fp - field)) {
396 
397     case NF_field:
398 
399 	cell_destroy(NF);
400 	cellcpy(NF, cellcpy(&c, cp));
401 	if (c.type != C_DOUBLE)
402 	    cast1_to_d(&c);
403 
404 	if ((j = d_to_i(c.dval)) < 0)
405 	    rt_error("negative value assigned to NF");
406 
407 	if (j > nf)
408 	    for (i = nf + 1; i <= j; i++) {
409 		cp = field_ptr(i);
410 		cell_destroy(cp);
411 		cp->type = C_STRING;
412 		cp->ptr = (PTR) & null_str;
413 		null_str.ref_cnt++;
414 	    }
415 
416 	nf = j;
417 	build_field0();
418 	break;
419 
420     case RS_field:
421 	cell_destroy(RS);
422 	cellcpy(RS, cp);
423 	set_rs_shadow();
424 	break;
425 
426     case FS_field:
427 	cell_destroy(FS);
428 	cast_for_split(cellcpy(&fs_shadow, cellcpy(FS, cp)));
429 	break;
430 
431     case OFMT_field:
432     case CONVFMT_field:
433 	/* If the user does something stupid with OFMT or CONVFMT,
434 	   we could crash.
435 	   We'll make an attempt to protect ourselves here.  This is
436 	   why OFMT and CONVFMT are pseudo fields.
437 
438 	   The ptrs of OFMT and CONVFMT always have a valid STRING,
439 	   even if assigned a DOUBLE or NOINIT
440 	 */
441 
442 	free_STRING(string(fp));
443 	cellcpy(fp, cp);
444 	if (fp->type < C_STRING) {	/* !! */
445 	    fp->ptr = (PTR) new_STRING("%.6g");
446 	} else if (valid_format(fp)) {
447 	    if (fp == CONVFMT) {
448 		/* It's a string, but if it's really goofy and CONVFMT,
449 		   it could still damage us. Test it .
450 		 */
451 		char xbuff[512];
452 
453 		xbuff[256] = 0;
454 		sprintf(xbuff, string(fp)->str, 3.1459);
455 		if (xbuff[256])
456 		    rt_error("CONVFMT assigned unusable value");
457 	    }
458 	}
459 	break;
460 
461 #ifdef MSDOS
462       lm_dos_label:
463 #endif
464 
465     default:			/* $1 or $2 or ... */
466 
467 	cell_destroy(fp);
468 	cellcpy(fp, cp);
469 
470 	if (i < 0 || i >= FBANK_SZ) {
471 	    /* field assigned to was not in field[0..FBANK_SZ-1]
472 	     * or a pseudo field, so compute actual field index
473 	     */
474 	    i = field_addr_to_index(fp);
475 	}
476 
477 	if (i > nf) {
478 	    for (j = nf + 1; j < i; j++) {
479 		cp = field_ptr(j);
480 		cell_destroy(cp);
481 		cp->type = C_STRING;
482 		cp->ptr = (PTR) & null_str;
483 		null_str.ref_cnt++;
484 	    }
485 	    nf = i;
486 	    cell_destroy(NF);
487 	    NF->type = C_DOUBLE;
488 	    NF->dval = (double) i;
489 	}
490 
491 	build_field0();
492 
493     }
494 }
495 
496 /* construct field[0] from the other fields */
497 
498 static void
build_field0(void)499 build_field0(void)
500 {
501 
502 #ifdef DEBUG
503     if (nf < 0)
504 	bozo("nf <0 in build_field0");
505 #endif
506 
507     cell_destroy(field + 0);
508 
509     if (nf == 0) {
510 	field[0].type = C_STRING;
511 	field[0].ptr = (PTR) & null_str;
512 	null_str.ref_cnt++;
513     } else if (nf == 1) {
514 	cellcpy(field, field + 1);
515     } else {
516 	CELL c;
517 	STRING *ofs, *tail;
518 	size_t len;
519 	register CELL *cp;
520 	register char *p, *q;
521 	int cnt;
522 	CELL **fbp, *cp_limit;
523 
524 	cast1_to_s(cellcpy(&c, OFS));
525 	ofs = (STRING *) c.ptr;
526 	cast1_to_s(cellcpy(&c, field_ptr(nf)));
527 	tail = (STRING *) c.ptr;
528 	cnt = nf - 1;
529 
530 	len = ((size_t) cnt) * ofs->len + tail->len;
531 
532 	fbp = fbankv;
533 	cp_limit = field + FBANK_SZ;
534 	cp = field + 1;
535 
536 	while (cnt-- > 0) {
537 	    if (cp->type < C_STRING) {	/* use the string field temporarily */
538 		if (cp->type == C_NOINIT) {
539 		    cp->ptr = (PTR) & null_str;
540 		    null_str.ref_cnt++;
541 		} else {	/* its a double */
542 		    Int ival;
543 		    char xbuff[260];
544 
545 		    ival = d_to_I(cp->dval);
546 		    if (ival == cp->dval)
547 			sprintf(xbuff, INT_FMT, ival);
548 		    else
549 			sprintf(xbuff, string(CONVFMT)->str, cp->dval);
550 
551 		    cp->ptr = (PTR) new_STRING(xbuff);
552 		}
553 	    }
554 
555 	    len += string(cp)->len;
556 
557 	    if (++cp == cp_limit) {
558 		cp = *++fbp;
559 		cp_limit = cp + FBANK_SZ;
560 	    }
561 
562 	}
563 
564 	field[0].type = C_STRING;
565 	field[0].ptr = (PTR) new_STRING0(len);
566 
567 	p = string(field)->str;
568 
569 	/* walk it again , putting things together */
570 	cnt = nf - 1;
571 	fbp = fbankv;
572 	cp = field + 1;
573 	cp_limit = field + FBANK_SZ;
574 	while (cnt-- > 0) {
575 	    memcpy(p, string(cp)->str, string(cp)->len);
576 	    p += string(cp)->len;
577 	    /* if not really string, free temp use of ptr */
578 	    if (cp->type < C_STRING) {
579 		free_STRING(string(cp));
580 	    }
581 	    if (++cp == cp_limit) {
582 		cp = *++fbp;
583 		cp_limit = cp + FBANK_SZ;
584 	    }
585 	    /* add the separator */
586 	    q = ofs->str;
587 	    while (*q)
588 		*p++ = *q++;
589 	}
590 	/* tack tail on the end */
591 	memcpy(p, tail->str, tail->len);
592 
593 	/* cleanup */
594 	if (tail == ofs) {
595 	    free_STRING(tail);
596 	} else {
597 	    free_STRING(tail);
598 	    free_STRING(ofs);
599 	}
600     }
601 }
602 
603 /* We are assigning to a CELL and we aren't sure if its
604    a field
605 */
606 void
slow_cell_assign(CELL * target,CELL * source)607 slow_cell_assign(CELL *target, CELL *source)
608 {
609     if (field <= target && target <= LAST_PFIELD) {
610 	field_assign(target, source);
611     } else {
612 	size_t i;
613 	for (i = 1; i < fbankv_num_chunks * FBANKV_CHUNK_SIZE; i++) {
614 	    CELL *bank_start = fbankv[i];
615 	    CELL *bank_end = bank_start + FBANK_SZ;
616 
617 	    if (bank_start == 0)
618 		break;
619 
620 	    if (bank_start <= target && target < bank_end) {
621 		/* it is a field */
622 		field_assign(target, source);
623 		return;
624 	    }
625 	}
626 	/* its not a field */
627 	cell_destroy(target);
628 	cellcpy(target, source);
629     }
630 }
631 
632 int
field_addr_to_index(CELL * cp)633 field_addr_to_index(CELL *cp)
634 {
635     CELL **p = fbankv;
636 
637     while (cp < *p || cp >= *p + FBANK_SZ)
638 	p++;
639 
640     return (int) (((p - fbankv) << FB_SHIFT) + (cp - *p));
641 }
642 
643 /*------- more than 1 fbank needed  ------------*/
644 
645 /*
646   compute the address of a field $i
647 
648   if CELL $i doesn't exist, because it is bigger than max_field,
649   then it gets created and max_field grows.
650 */
651 
652 CELL *
slow_field_ptr(int i)653 slow_field_ptr(int i)
654 {
655 
656     if (i > max_field) {	/* need to allocate more field memory */
657 	int j;
658 	allocate_fbankv(i);
659 
660 	j = (max_field >> FB_SHIFT) + 1;
661 
662 	assert(j > 0 && fbankv[j - 1] != 0 && fbankv[j] == 0);
663 
664 	do {
665 	    fbankv[j] = (CELL *) zmalloc(sizeof(CELL) * FBANK_SZ);
666 	    memset(fbankv[j], 0, sizeof(CELL) * FBANK_SZ);
667 	    j++;
668 	    max_field += FBANK_SZ;
669 	}
670 	while (i > max_field);
671     }
672 
673     return &fbankv[i >> FB_SHIFT][i & (FBANK_SZ - 1)];
674 }
675 
676 #if USE_BINMODE
677 
678 /* read current value of BINMODE */
679 int
binmode(void)680 binmode(void)
681 {
682     CELL c;
683 
684     cast1_to_d(cellcpy(&c, BINMODE));
685     return d_to_i(c.dval);
686 }
687 
688 /* set BINMODE and RS and ORS
689    from environment or -W binmode=   */
690 
691 void
set_binmode(int x)692 set_binmode(int x)
693 {
694     CELL c;
695     int change = ((x & 4) == 0);
696 
697     /* set RS */
698     c.type = C_STRING;
699     c.ptr = (PTR) new_STRING((change && (x & 1)) ? "\r\n" : "\n");
700     field_assign(RS, &c);
701     free_STRING(string(&c));
702 
703     /* set ORS */
704     cell_destroy(ORS);
705     ORS->type = C_STRING;
706     ORS->ptr = (PTR) new_STRING((change && (x & 2)) ? "\r\n" : "\n");
707 
708     cell_destroy(BINMODE);
709     BINMODE->type = C_DOUBLE;
710     BINMODE->dval = (double) x;
711 }
712 
713 #endif /* USE_BINMODE */
714 
715 #ifdef NO_LEAKS
716 
717 static void
fbank_free(CELL * const fb)718 fbank_free(CELL *const fb)
719 {
720     CELL *end = fb + FBANK_SZ;
721     CELL *cp;
722     for (cp = fb; cp < end; cp++) {
723 	cell_destroy(cp);
724     }
725     zfree(fb, FBANK_SZ * sizeof(CELL));
726 }
727 
728 static void
fbankv_free(void)729 fbankv_free(void)
730 {
731     unsigned i = 1;
732     const size_t cnt = FBANKV_CHUNK_SIZE * fbankv_num_chunks;
733     while (i < cnt && fbankv[i] != 0) {
734 	fbank_free(fbankv[i]);
735 	i++;
736     }
737     for (; i < cnt; i++) {
738 	if (fbankv[i] != 0) {
739 	    bozo("unexpected pointer in fbankv[]");
740 	}
741     }
742     zfree(fbankv, cnt * sizeof(CELL *));
743 }
744 
745 void
field_leaks(void)746 field_leaks(void)
747 {
748     int n;
749 
750     /* everything in field[] */
751     for (n = 0; n < FBANK_SZ + NUM_PFIELDS; n++) {
752 	cell_destroy(&field[n]);
753     }
754     /* fbankv[0] == field
755        this call does all the rest of the fields
756      */
757     fbankv_free();
758 
759     switch (fs_shadow.type) {
760     case C_RE:
761 	re_destroy(fs_shadow.ptr);
762 	break;
763     case C_STRING:
764     case C_STRNUM:
765     case C_MBSTRN:
766 	cell_destroy(&fs_shadow);
767 	break;
768     default:
769 	break;
770     }
771 
772     switch (rs_shadow.type) {
773     case SEP_STR:
774 	free_STRING(((STRING *) (&rs_shadow.ptr)));
775 	break;
776     case SEP_RE:
777 	re_destroy(rs_shadow.ptr);
778 	break;
779     }
780 }
781 #endif
782