1
2 /********************************************
3 field.c
4 copyright 1991, Michael D. Brennan
5
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
8
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
12
13 /* $Log: field.c,v $
14 * Revision 5.4.1.2 1993/01/20 12:53:08 mike
15 * d_to_l()
16 *
17 * Revision 5.4.1.1 1993/01/15 03:33:42 mike
18 * patch3: safer double to int conversion
19 *
20 * Revision 5.4 1992/11/29 22:52:11 mike
21 * double->string conversions uses long ints for 16/32 bit
22 * compatibility.
23 * Fixed small LM_DOS bozo.
24 *
25 * Revision 5.3 1992/08/17 14:21:10 brennan
26 * patch2: After parsing, only bi_sprintf() uses string_buff.
27 *
28 * Revision 5.2 1992/07/10 16:17:10 brennan
29 * MsDOS: remove NO_BINMODE macro
30 *
31 * Revision 5.1 1991/12/05 07:55:57 brennan
32 * 1.1 pre-release
33 *
34 */
35
36
37 /* field.c */
38
39 #include "mawk.h"
40 #include "field.h"
41 #include "init.h"
42 #include "memory.h"
43 #include "scan.h"
44 #include "bi_vars.h"
45 #include "repl.h"
46 #include "regexp.h"
47
48 CELL field[FBANK_SZ+NUM_PFIELDS] ;
49
50 CELL *fbank[NUM_FBANK] = {field} ;
51
52 static int max_field = MAX_SPLIT ; /* maximum field actually created*/
53
54 static void PROTO( build_field0, (void) ) ;
55 static void PROTO( set_rs_shadow, (void) ) ;
56 static void PROTO( load_pfield, (char*, CELL*)) ;
57 static void PROTO( load_field_ov, (void)) ;
58
59
60
61 /* a description of how to split based on RS.
62 If RS is changed, so is rs_shadow */
63 SEPARATOR rs_shadow = {SEP_CHAR, '\n'} ;
64 /* a splitting CELL version of FS */
65 CELL fs_shadow = {C_SPACE} ;
66 int nf ;
67 /* nf holds the true value of NF. If nf < 0 , then
68 NF has not been computed, i.e., $0 has not been split
69 */
70
set_rs_shadow()71 static void set_rs_shadow()
72 { CELL c ;
73 STRING *sval ;
74 char *s ;
75 unsigned len ;
76
77 if ( posix_space_flag && mawk_state == EXECUTION )
78 scan_code['\n'] = SC_UNEXPECTED ;
79
80 if ( rs_shadow.type == SEP_STR ) free_STRING((STRING*) rs_shadow.ptr) ;
81
82 cast_for_split( cellcpy(&c, RS) ) ;
83 switch( c.type )
84 {
85 case C_RE :
86 if ( s = is_string_split(c.ptr, &len) )
87 if ( len == 1 )
88 { rs_shadow.type = SEP_CHAR ;
89 rs_shadow.c = s[0] ;
90 }
91 else
92 { rs_shadow.type = SEP_STR ;
93 rs_shadow.ptr = (PTR) new_STRING(s) ;
94 }
95 else
96 { rs_shadow.type = SEP_RE ;
97 rs_shadow.ptr = c.ptr ;
98 }
99 break ;
100
101 case C_SPACE :
102 rs_shadow.type = SEP_CHAR ;
103 rs_shadow.c = ' ' ;
104 break ;
105
106 case C_SNULL : /* RS becomes one or more blank lines */
107 if ( mawk_state == EXECUTION ) scan_code['\n'] = SC_SPACE ;
108 rs_shadow.type = SEP_MLR ;
109 sval = new_STRING( "\n\n+" ) ;
110 rs_shadow.ptr = re_compile(sval) ;
111 free_STRING(sval) ;
112 break ;
113
114 default : bozo("bad cell in set_rs_shadow") ;
115 }
116 }
117
load_pfield(name,cp)118 static void load_pfield(name, cp)
119 char *name ;
120 CELL *cp ;
121 { SYMTAB *stp ;
122
123 stp = insert(name) ; stp->type = ST_FIELD ;
124 stp->stval.cp = cp ;
125 }
126
127 /* initialize $0 and the pseudo fields */
field_init()128 void field_init()
129 {
130 field[0].type = C_STRING ;
131 field[0].ptr = (PTR) & null_str ;
132 null_str.ref_cnt++ ;
133
134 load_pfield("NF",NF) ;
135 NF->type = C_DOUBLE ;
136 NF->dval = 0.0 ;
137
138 load_pfield("RS", RS) ;
139 RS->type = C_STRING ;
140 RS->ptr = (PTR) new_STRING( "\n" ) ;
141 /* rs_shadow already set */
142
143 load_pfield("FS", FS) ;
144 FS->type = C_STRING ;
145 FS->ptr = (PTR) new_STRING( " " ) ;
146 /* fs_shadow is already set */
147
148 load_pfield("OFMT", OFMT) ;
149 OFMT->type = C_STRING ;
150 OFMT->ptr = (PTR) new_STRING( "%.6g" ) ;
151
152 load_pfield("CONVFMT", CONVFMT) ;
153 CONVFMT->type = C_STRING ;
154 CONVFMT->ptr = OFMT->ptr ;
155 string(OFMT)->ref_cnt++ ;
156 }
157
158
159
set_field0(s,len)160 void set_field0( s, len)
161 char *s ;
162 unsigned len ;
163 {
164 cell_destroy( & field[0] ) ;
165 nf = -1 ;
166
167 if ( len )
168 {
169 field[0].type = C_MBSTRN ;
170 field[0].ptr = (PTR) new_STRING( (char *) 0, len) ;
171 (void) memcpy( string(&field[0])->str, s, SIZE_T(len) ) ;
172 }
173 else
174 {
175 field[0].type = C_STRING ;
176 field[0].ptr = (PTR) &null_str ;
177 null_str.ref_cnt++ ;
178 }
179 }
180
181
182
183 /* split field[0] into $1, $2 ... and set NF */
184
split_field0()185 void split_field0()
186 { register CELL *cp ;
187 register int cnt ;
188 CELL c ; /* copy field[0] here if not string */
189
190
191 if ( field[0].type < C_STRING )
192 { cast1_to_s(cellcpy(&c, field+0)) ;
193 cp = &c ;
194 }
195 else cp = &field[0] ;
196
197 if ( string(cp)->len == 0 ) nf = 0 ;
198 else
199 {
200 switch( fs_shadow.type )
201 {
202 case C_SNULL : /* FS == "" */
203 nf = 1 ;
204 cell_destroy(NF) ;
205 NF->type = C_DOUBLE ;
206 NF->dval = 1.0 ;
207 field[1].type = C_MBSTRN ;
208 field[1].ptr = cp->ptr ;
209
210 if ( cp == field ) string(cp)->ref_cnt++ ;
211 /* else we gain one ref_cnt and lose one for a wash */
212
213 return ;
214
215 case C_SPACE :
216 nf = space_split(string(cp)->str, string(cp)->len) ;
217 break ;
218
219 default :
220 nf = re_split(string(cp)->str, fs_shadow.ptr) ;
221 break ;
222 }
223
224 }
225
226 cell_destroy(NF) ;
227 NF->type = C_DOUBLE ;
228 NF->dval = (double) nf ;
229
230 if ( nf > MAX_SPLIT )
231 {
232 cnt = MAX_SPLIT ; load_field_ov() ;
233 }
234 else cnt = nf ;
235
236 while ( cnt > 0 )
237 {
238 cell_destroy(field+cnt) ;
239 field[cnt].ptr = (PTR) split_buff[cnt-1] ;
240 field[cnt--].type = C_MBSTRN ;
241 }
242
243 if ( cp == &c ) free_STRING( string(cp) ) ;
244 }
245
246 /*
247 assign CELL *cp to field or pseudo field
248 and take care of all side effects
249 */
250
field_assign(fp,cp)251 void field_assign( fp, cp)
252 register CELL *fp ;
253 CELL *cp ;
254 {
255 CELL c ;
256 int i , j ;
257
258 /* the most common case first */
259 if ( fp == field )
260 { cell_destroy(field) ;
261 (void) cellcpy(fp, cp) ;
262 nf = -1 ;
263 return ;
264 }
265
266 /* its not important to do any of this fast */
267
268 if ( nf < 0 ) split_field0() ;
269
270 #if LM_DOS
271 if ( !SAMESEG(fp,field) )
272 {
273 i = -1 ;
274 goto lm_dos_label ;
275 }
276 #endif
277
278 switch( i = (fp - field) )
279 {
280
281 case NF_field :
282
283 cell_destroy(NF) ;
284 (void) cellcpy(NF, cellcpy(&c,cp) ) ;
285 if ( c.type != C_DOUBLE ) cast1_to_d(&c) ;
286
287 if ( (j = d_to_i(c.dval)) < 0 )
288 rt_error("negative value assigned to NF") ;
289
290 if ( j > nf )
291 for ( i = nf+1 ; i <= j ; i++ )
292 {
293 cp = field_ptr(i) ;
294 cell_destroy(cp) ;
295 cp->type = C_STRING ;
296 cp->ptr = (PTR) &null_str ;
297 null_str.ref_cnt++ ;
298 }
299
300 nf = j ;
301 build_field0() ;
302 break ;
303
304 case RS_field :
305 cell_destroy(RS) ;
306 (void) cellcpy(RS, cp) ;
307 set_rs_shadow() ;
308 break ;
309
310 case FS_field :
311 cell_destroy(FS) ;
312 cast_for_split( cellcpy(&fs_shadow, cellcpy(FS, cp)) ) ;
313 break ;
314
315 case OFMT_field :
316 case CONVFMT_field:
317 /* If the user does something stupid with OFMT or CONVFMT,
318 we could crash.
319 We'll make an attempt to protect ourselves here. This is
320 why OFMT and CONVFMT are pseudo fields.
321
322 The ptrs of OFMT and CONVFMT always have a valid STRING,
323 even if assigned a DOUBLE or NOINIT
324 */
325
326 free_STRING( string(fp) ) ;
327 (void) cellcpy(fp, cp) ;
328 if ( fp->type < C_STRING ) /* !! */
329 fp->ptr = (PTR) new_STRING( "%.6g" ) ;
330 else
331 if ( fp == CONVFMT )
332 {
333 /* It's a string, but if it's really goofy and CONVFMT,
334 it could still damage us. Test it .
335 */
336 char xbuff[512] ;
337
338 xbuff[256] = 0 ;
339 (void) sprintf( xbuff, string(fp)->str, 3.1459) ;
340 if ( xbuff[256] )
341 rt_error("CONVFMT assigned unusable value") ;
342 }
343 break ;
344
345 #if LM_DOS
346 lm_dos_label :
347 #endif
348
349 default: /* $1 or $2 or ... */
350
351
352 cell_destroy(fp) ;
353 (void) cellcpy(fp, cp) ;
354
355 if ( i < 0 || i > MAX_SPLIT ) i = field_addr_to_index(fp) ;
356
357 if ( i > nf )
358 { for ( j = nf+1 ; j < i ; j++ )
359 {
360 cp = field_ptr(j) ;
361 cell_destroy(cp) ;
362 cp->type = C_STRING ;
363 cp->ptr = (PTR) &null_str ;
364 null_str.ref_cnt++ ;
365 }
366 nf = i ;
367 cell_destroy(NF) ;
368 NF->type = C_DOUBLE ;
369 NF->dval = (double) i ;
370 }
371
372 build_field0() ;
373
374 }
375 }
376
377
378 /* construct field[0] from the other fields */
379
build_field0()380 static void build_field0()
381 {
382
383
384 #ifdef DEBUG
385 if ( nf < 0 )
386 bozo("nf <0 in build_field0") ;
387 #endif
388
389 cell_destroy( field+0 ) ;
390
391 if ( nf == 0 )
392 { field[0].type = C_STRING ;
393 field[0].ptr = (PTR) &null_str ;
394 null_str.ref_cnt++ ;
395 }
396 else
397 if ( nf == 1 ) (void) cellcpy(field, field+1) ;
398
399 else
400 { CELL c ;
401 STRING *ofs, *tail ;
402 unsigned len ;
403 register CELL *cp ;
404 register char *p, *q ;
405 int cnt ;
406 CELL **fbp, *cp_limit ;
407
408
409 cast1_to_s(cellcpy(&c,OFS)) ;
410 ofs = (STRING *) c.ptr ;
411 cast1_to_s(cellcpy(&c, field_ptr(nf))) ;
412 tail = (STRING *) c.ptr ;
413 cnt = nf-1 ;
414
415 len = cnt*ofs->len + tail->len ;
416
417 fbp = fbank ; cp_limit = field + FBANK_SZ ;
418 cp = field + 1 ;
419
420 while ( cnt-- > 0 )
421 {
422 if ( cp->type < C_STRING )
423 { /* use the string field temporarily */
424 if ( cp->type == C_NOINIT )
425 {
426 cp->ptr = (PTR) &null_str ;
427 null_str.ref_cnt++ ;
428 }
429 else /* its a double */
430 {
431 long ival ;
432 char xbuff[260] ;
433
434 ival = d_to_l(cp->dval) ;
435 if ( ival == cp->dval )
436 (void) sprintf(xbuff, INT_FMT, ival) ;
437 else
438 (void) sprintf(xbuff, string(CONVFMT)->str, cp->dval) ;
439
440 cp->ptr = (PTR) new_STRING(xbuff) ;
441 }
442 }
443
444 len += string(cp)->len ;
445
446 if ( ++cp == cp_limit )
447 { cp = * ++fbp ; cp_limit = cp + FBANK_SZ ; }
448 }
449
450 field[0].type = C_STRING ;
451 field[0].ptr = (PTR) new_STRING((char *) 0, len) ;
452
453 p = string(field)->str ;
454
455 /* walk it again , putting things together */
456 cnt = nf-1 ; fbp = fbank ;
457 cp = field+1 ; cp_limit = field + FBANK_SZ ;
458
459 while ( cnt-- > 0 )
460 {
461 (void) memcpy(p, string(cp)->str,SIZE_T(string(cp)->len)) ;
462 p += string(cp)->len ;
463 /* if not really string, free temp use of ptr */
464 if ( cp->type < C_STRING ) free_STRING(string(cp)) ;
465 if ( ++cp == cp_limit )
466 { cp = * ++fbp ; cp_limit = cp + FBANK_SZ ; }
467
468 /* add the separator */
469 q = ofs->str ; while( *q ) *p++ = *q++ ;
470 }
471 /* tack tail on the end */
472 (void) memcpy(p, tail->str, SIZE_T(tail->len)) ;
473
474 /* cleanup */
475 free_STRING(tail) ; free_STRING(ofs) ;
476 }
477 }
478
479 /* We are assigning to a CELL and we aren't sure if its
480 a field */
481
slow_cell_assign(target,source)482 void slow_cell_assign(target, source)
483 register CELL *target ;
484 CELL *source ;
485 {
486 if (
487
488 #if LM_DOS /* the dreaded segment nonsense */
489 SAMESEG(target,field) &&
490 #endif
491 target >= field && target <= LAST_PFIELD )
492 field_assign(target, source) ;
493 else
494 { CELL **p = fbank + 1 ;
495
496 while ( *p )
497 {
498 if (
499 #if LM_DOS
500 SAMESEG(target, *p) &&
501 #endif
502 target >= *p && target < *p + FBANK_SZ )
503 {
504 field_assign(target, source) ;
505 return ;
506 }
507 p++ ;
508 }
509 /* its not a field */
510 cell_destroy(target) ;
511 (void) cellcpy(target, source) ;
512 }
513 }
514
field_addr_to_index(cp)515 int field_addr_to_index(cp)
516 CELL *cp ;
517 { CELL **p = fbank ;
518
519 while(
520
521 #if LM_DOS
522 ! SAMESEG(cp,*p) ||
523 #endif
524
525 cp < *p || cp >= *p + FBANK_SZ ) p++ ;
526
527 return ((p-fbank)<<FB_SHIFT) + (cp - *p) ;
528 }
529
530 /*------- more than 1 fbank needed ------------*/
531
532 /*
533 compute the address of a field with index
534 > MAX_SPLIT
535 */
536
slow_field_ptr(i)537 CELL *slow_field_ptr(i)
538 register int i ;
539 {
540
541 if ( i > max_field )
542 { int j ;
543
544 if ( i > MAX_FIELD )
545 rt_overflow("maximum number of fields", MAX_FIELD) ;
546
547 j = 1 ; while( fbank[j] ) j++ ;
548 do
549 {
550 fbank[j] = (CELL*)zmalloc(sizeof(CELL)*FBANK_SZ) ;
551 (void) memset(fbank[j], 0, SIZE_T(sizeof(CELL)*FBANK_SZ)) ;
552 j++ ;
553 max_field += FBANK_SZ ;
554 }
555 while ( i > max_field ) ;
556 }
557
558 return & fbank[i>>FB_SHIFT][i & (FBANK_SZ-1)] ;
559 }
560
561 /*
562 $0 split into more than MAX_SPLIT fields,
563 $(MAX_FIELD+1) ... are on the split_ov_list.
564 Copy into fields which start at fbank[1]
565 */
566
load_field_ov()567 static void load_field_ov()
568 {
569 register SPLIT_OV *p ; /* walks split_ov_list */
570 register CELL *cp ; /* target of copy */
571 int j ; /* current fbank[] */
572 CELL *cp_limit ; /* change fbank[] */
573 SPLIT_OV *q ; /* trails p */
574
575 /* make sure the fields are allocated */
576 (void) slow_field_ptr(nf) ;
577
578 p = split_ov_list ; split_ov_list = (SPLIT_OV*) 0 ;
579 j = 1 ; cp = fbank[j] ; cp_limit = cp + FBANK_SZ ;
580
581 while ( p )
582 {
583 cell_destroy(cp) ;
584 cp->type = C_MBSTRN ;
585 cp->ptr = (PTR) p->sval ;
586
587 if ( ++cp == cp_limit )
588 {
589 cp = fbank[++j] ; cp_limit = cp + FBANK_SZ ;
590 }
591
592 q = p ; p = p->link ; ZFREE(q) ;
593 }
594 }
595
596
597 #if MSDOS
598
binmode()599 int binmode() /* read current value of BINMODE */
600 { CELL c ;
601
602 cast1_to_d(cellcpy(&c, BINMODE)) ;
603 return d_to_i(c.dval) ;
604 }
605
606 /* set BINMODE and RS and ORS
607 from environment or -W binmode= */
608
set_binmode(x)609 void set_binmode(x)
610 int x ;
611 {
612 CELL c ;
613
614 /* set RS */
615 c.type = C_STRING ;
616 c.ptr = (PTR) new_STRING ( (x&1) ? "\r\n" : "\n" ) ;
617 field_assign(RS, &c) ;
618 free_STRING(string(&c)) ;
619
620 /* set ORS */
621 cell_destroy(ORS) ;
622 ORS->type = C_STRING ;
623 ORS->ptr = (PTR) new_STRING( (x&2) ? "\r\n" : "\n") ;
624
625 cell_destroy(BINMODE) ;
626 BINMODE->type = C_DOUBLE ;
627 BINMODE->dval = (double) x ;
628 }
629
630 #endif /* MSDOS */
631
632
633