xref: /386bsd/usr/src/usr.bin/awk/field.c (revision a2142627)
1 
2 /********************************************
3 field.c
4 copyright 1991, Michael D. Brennan
5 
6 This is a source file for mawk, an implementation of
7 the AWK programming language.
8 
9 Mawk is distributed without warranty under the terms of
10 the GNU General Public License, version 2, 1991.
11 ********************************************/
12 
13 /* $Log: field.c,v $
14  * Revision 5.4.1.2  1993/01/20  12:53:08  mike
15  * d_to_l()
16  *
17  * Revision 5.4.1.1  1993/01/15  03:33:42  mike
18  * patch3: safer double to int conversion
19  *
20  * Revision 5.4  1992/11/29  22:52:11  mike
21  * double->string conversions uses long ints for 16/32 bit
22  * compatibility.
23  * Fixed small LM_DOS bozo.
24  *
25  * Revision 5.3  1992/08/17  14:21:10  brennan
26  * patch2: After parsing, only bi_sprintf() uses string_buff.
27  *
28  * Revision 5.2  1992/07/10  16:17:10  brennan
29  * MsDOS: remove NO_BINMODE macro
30  *
31  * Revision 5.1  1991/12/05  07:55:57  brennan
32  * 1.1 pre-release
33  *
34 */
35 
36 
37 /* field.c */
38 
39 #include "mawk.h"
40 #include "field.h"
41 #include "init.h"
42 #include "memory.h"
43 #include "scan.h"
44 #include "bi_vars.h"
45 #include "repl.h"
46 #include "regexp.h"
47 
48 CELL  field[FBANK_SZ+NUM_PFIELDS] ;
49 
50 CELL  *fbank[NUM_FBANK] = {field} ;
51 
52 static int max_field = MAX_SPLIT ; /* maximum field actually created*/
53 
54 static void PROTO( build_field0, (void) ) ;
55 static void PROTO( set_rs_shadow, (void) ) ;
56 static void PROTO( load_pfield, (char*, CELL*)) ;
57 static void PROTO( load_field_ov, (void)) ;
58 
59 
60 
61 /* a description of how to split based on RS.
62    If RS is changed, so is rs_shadow */
63 SEPARATOR rs_shadow = {SEP_CHAR, '\n'} ;
64 /* a splitting CELL version of FS */
65 CELL fs_shadow = {C_SPACE} ;
66 int   nf ;
67   /* nf holds the true value of NF.  If nf < 0 , then
68      NF has not been computed, i.e., $0 has not been split
69   */
70 
set_rs_shadow()71 static void set_rs_shadow()
72 { CELL c ;
73   STRING  *sval ;
74   char *s ;
75   unsigned len ;
76 
77   if ( posix_space_flag && mawk_state == EXECUTION )
78 		  scan_code['\n'] = SC_UNEXPECTED ;
79 
80   if ( rs_shadow.type == SEP_STR )  free_STRING((STRING*) rs_shadow.ptr) ;
81 
82   cast_for_split( cellcpy(&c, RS) ) ;
83   switch( c.type )
84   {
85     case C_RE :
86         if ( s = is_string_split(c.ptr, &len) )
87             if ( len == 1 )
88             { rs_shadow.type = SEP_CHAR ;
89               rs_shadow.c = s[0] ;
90             }
91             else
92             { rs_shadow.type = SEP_STR ;
93               rs_shadow.ptr = (PTR) new_STRING(s) ;
94             }
95         else
96         { rs_shadow.type = SEP_RE ;
97           rs_shadow.ptr = c.ptr ;
98         }
99         break ;
100 
101     case C_SPACE :
102         rs_shadow.type = SEP_CHAR ;
103         rs_shadow.c = ' ' ;
104         break ;
105 
106     case C_SNULL : /* RS becomes one or more blank lines */
107 	if ( mawk_state == EXECUTION ) scan_code['\n'] = SC_SPACE ;
108         rs_shadow.type = SEP_MLR ;
109         sval = new_STRING( "\n\n+" ) ;
110         rs_shadow.ptr = re_compile(sval) ;
111         free_STRING(sval) ;
112         break ;
113 
114     default : bozo("bad cell in set_rs_shadow") ;
115   }
116 }
117 
load_pfield(name,cp)118 static  void load_pfield(name, cp)
119   char *name ;
120   CELL *cp ;
121 { SYMTAB *stp ;
122 
123   stp = insert(name) ; stp->type = ST_FIELD ;
124   stp->stval.cp = cp ;
125 }
126 
127 /* initialize $0 and the pseudo fields */
field_init()128 void  field_init()
129 {
130   field[0].type = C_STRING ;
131   field[0].ptr = (PTR) & null_str ;
132   null_str.ref_cnt++ ;
133 
134   load_pfield("NF",NF) ;
135   NF->type = C_DOUBLE ;
136   NF->dval = 0.0 ;
137 
138   load_pfield("RS", RS) ;
139   RS->type = C_STRING ;
140   RS->ptr =  (PTR) new_STRING( "\n" ) ;
141   /* rs_shadow already set */
142 
143   load_pfield("FS", FS) ;
144   FS->type = C_STRING ;
145   FS->ptr = (PTR) new_STRING( " " ) ;
146   /* fs_shadow is already set */
147 
148   load_pfield("OFMT", OFMT) ;
149   OFMT->type = C_STRING ;
150   OFMT->ptr = (PTR) new_STRING( "%.6g" ) ;
151 
152   load_pfield("CONVFMT", CONVFMT) ;
153   CONVFMT->type = C_STRING ;
154   CONVFMT->ptr = OFMT->ptr ;
155   string(OFMT)->ref_cnt++ ;
156 }
157 
158 
159 
set_field0(s,len)160 void  set_field0( s, len)
161   char *s ;
162   unsigned len ;
163 {
164   cell_destroy( & field[0] ) ;
165   nf = -1 ;
166 
167   if ( len )
168   {
169     field[0].type = C_MBSTRN ;
170     field[0].ptr = (PTR) new_STRING( (char *) 0, len) ;
171     (void) memcpy( string(&field[0])->str, s, SIZE_T(len) ) ;
172   }
173   else
174   {
175     field[0].type = C_STRING ;
176     field[0].ptr = (PTR) &null_str ;
177     null_str.ref_cnt++ ;
178   }
179 }
180 
181 
182 
183 /* split field[0] into $1, $2 ... and set NF  */
184 
split_field0()185 void  split_field0()
186 { register CELL *cp ;
187   register int cnt ;
188   CELL  c ;  /* copy field[0] here if not string */
189 
190 
191   if ( field[0].type < C_STRING )
192   { cast1_to_s(cellcpy(&c, field+0)) ;
193     cp = &c ;
194   }
195   else   cp = &field[0] ;
196 
197   if ( string(cp)->len == 0 )  nf = 0 ;
198   else
199   {
200     switch( fs_shadow.type )
201     {
202       case   C_SNULL :  /* FS == "" */
203           nf = 1 ;
204           cell_destroy(NF) ;
205           NF->type = C_DOUBLE ;
206           NF->dval = 1.0 ;
207           field[1].type = C_MBSTRN ;
208           field[1].ptr = cp->ptr ;
209 
210           if ( cp == field )  string(cp)->ref_cnt++ ;
211           /* else we gain one ref_cnt and lose one for a wash */
212 
213           return ;
214 
215       case  C_SPACE :
216           nf = space_split(string(cp)->str, string(cp)->len) ;
217           break ;
218 
219       default :
220           nf = re_split(string(cp)->str, fs_shadow.ptr) ;
221           break ;
222     }
223 
224   }
225 
226   cell_destroy(NF) ;
227   NF->type = C_DOUBLE ;
228   NF->dval = (double) nf ;
229 
230   if ( nf > MAX_SPLIT )
231   {
232     cnt = MAX_SPLIT ; load_field_ov() ;
233   }
234   else cnt = nf ;
235 
236   while ( cnt > 0 )
237   {
238     cell_destroy(field+cnt) ;
239     field[cnt].ptr = (PTR) split_buff[cnt-1] ;
240     field[cnt--].type = C_MBSTRN ;
241   }
242 
243   if ( cp == &c )  free_STRING( string(cp) ) ;
244 }
245 
246 /*
247   assign CELL *cp to field or pseudo field
248   and take care of all side effects
249 */
250 
field_assign(fp,cp)251 void  field_assign( fp, cp)
252   register CELL *fp ;
253   CELL *cp ;
254 {
255   CELL c ;
256   int i , j ;
257 
258   /* the most common case first */
259   if ( fp == field )
260   { cell_destroy(field) ;
261     (void) cellcpy(fp, cp) ;
262     nf = -1 ;
263     return ;
264   }
265 
266   /* its not important to do any of this fast */
267 
268   if ( nf < 0 )  split_field0() ;
269 
270 #if  LM_DOS
271   if ( !SAMESEG(fp,field) )
272   {
273     i = -1 ;
274     goto lm_dos_label ;
275   }
276 #endif
277 
278   switch( i = (fp - field) )
279   {
280 
281     case  NF_field :
282 
283         cell_destroy(NF) ;
284         (void) cellcpy(NF, cellcpy(&c,cp) ) ;
285         if ( c.type != C_DOUBLE )  cast1_to_d(&c) ;
286 
287         if ( (j = d_to_i(c.dval)) < 0 )
288             rt_error("negative value assigned to NF") ;
289 
290         if ( j > nf )
291             for ( i = nf+1 ; i <= j ; i++ )
292             {
293 	      cp = field_ptr(i) ;
294 	      cell_destroy(cp) ;
295 	      cp->type = C_STRING ;
296               cp->ptr = (PTR) &null_str ;
297               null_str.ref_cnt++ ;
298             }
299 
300         nf = j ;
301         build_field0() ;
302         break ;
303 
304     case  RS_field :
305         cell_destroy(RS) ;
306         (void) cellcpy(RS, cp) ;
307         set_rs_shadow() ;
308         break ;
309 
310     case  FS_field :
311         cell_destroy(FS) ;
312         cast_for_split( cellcpy(&fs_shadow, cellcpy(FS, cp)) ) ;
313         break ;
314 
315     case OFMT_field :
316     case CONVFMT_field:
317         /* If the user does something stupid with OFMT or CONVFMT,
318 	   we could crash.
319            We'll make an attempt to protect ourselves here.  This is
320            why OFMT and CONVFMT are pseudo fields.
321 
322            The ptrs of OFMT and CONVFMT always have a valid STRING,
323 	   even if assigned a DOUBLE or NOINIT
324         */
325 
326         free_STRING( string(fp) ) ;
327         (void) cellcpy(fp, cp) ;
328         if ( fp->type < C_STRING ) /* !! */
329              fp->ptr = (PTR) new_STRING( "%.6g" ) ;
330         else
331 	if ( fp == CONVFMT )
332         {
333           /* It's a string, but if it's really goofy and CONVFMT,
334 	     it could still damage us. Test it .
335 	  */
336           char xbuff[512] ;
337 
338 	  xbuff[256] = 0 ;
339           (void) sprintf( xbuff, string(fp)->str, 3.1459) ;
340           if ( xbuff[256] )
341 	      rt_error("CONVFMT assigned unusable value") ;
342         }
343         break ;
344 
345 #if LM_DOS
346 lm_dos_label :
347 #endif
348 
349     default:  /* $1 or $2 or ... */
350 
351 
352         cell_destroy(fp) ;
353         (void) cellcpy(fp, cp) ;
354 
355 	if ( i < 0 || i > MAX_SPLIT ) i = field_addr_to_index(fp) ;
356 
357         if ( i > nf )
358         { for ( j = nf+1 ; j < i ; j++ )
359           {
360 	    cp = field_ptr(j) ;
361 	    cell_destroy(cp) ;
362             cp->type = C_STRING ;
363 	    cp->ptr = (PTR) &null_str ;
364             null_str.ref_cnt++ ;
365           }
366           nf = i ;
367           cell_destroy(NF) ;
368           NF->type = C_DOUBLE ;
369           NF->dval = (double) i ;
370         }
371 
372         build_field0() ;
373 
374   }
375 }
376 
377 
378 /* construct field[0] from the other fields */
379 
build_field0()380 static void  build_field0()
381 {
382 
383 
384 #ifdef DEBUG
385   if ( nf < 0 )
386       bozo("nf <0 in build_field0") ;
387 #endif
388 
389   cell_destroy( field+0 ) ;
390 
391   if ( nf == 0 )
392   { field[0].type = C_STRING ;
393     field[0].ptr = (PTR) &null_str ;
394     null_str.ref_cnt++ ;
395   }
396   else
397   if ( nf == 1 )  (void) cellcpy(field, field+1) ;
398 
399   else
400   { CELL  c ;
401     STRING *ofs, *tail ;
402     unsigned len ;
403     register CELL *cp ;
404     register char *p, *q ;
405     int cnt ;
406     CELL **fbp, *cp_limit ;
407 
408 
409     cast1_to_s(cellcpy(&c,OFS)) ;
410     ofs = (STRING *) c.ptr ;
411     cast1_to_s(cellcpy(&c, field_ptr(nf))) ;
412     tail = (STRING *) c.ptr ;
413     cnt = nf-1 ;
414 
415     len = cnt*ofs->len + tail->len ;
416 
417     fbp = fbank ; cp_limit = field + FBANK_SZ ;
418     cp = field + 1 ;
419 
420     while ( cnt-- > 0 )
421     {
422       if ( cp->type < C_STRING )
423       { /* use the string field temporarily */
424         if ( cp->type == C_NOINIT )
425 	{
426 	  cp->ptr = (PTR) &null_str ;
427 	  null_str.ref_cnt++ ;
428         }
429 	else /* its a double */
430 	{
431 	  long ival ;
432 	  char xbuff[260] ;
433 
434 	  ival = d_to_l(cp->dval) ;
435 	  if ( ival == cp->dval )
436 	    (void) sprintf(xbuff, INT_FMT, ival) ;
437 	  else
438 	    (void) sprintf(xbuff, string(CONVFMT)->str, cp->dval) ;
439 
440 	  cp->ptr = (PTR) new_STRING(xbuff) ;
441         }
442       }
443 
444       len += string(cp)->len ;
445 
446       if ( ++cp == cp_limit )
447       { cp = * ++fbp ; cp_limit = cp + FBANK_SZ ; }
448     }
449 
450     field[0].type = C_STRING ;
451     field[0].ptr = (PTR) new_STRING((char *) 0, len) ;
452 
453     p = string(field)->str ;
454 
455     /* walk it again , putting things together */
456     cnt = nf-1 ; fbp = fbank ;
457     cp = field+1 ; cp_limit = field + FBANK_SZ ;
458 
459     while ( cnt-- > 0 )
460     {
461       (void) memcpy(p, string(cp)->str,SIZE_T(string(cp)->len)) ;
462       p += string(cp)->len ;
463       /* if not really string, free temp use of ptr */
464       if ( cp->type < C_STRING ) free_STRING(string(cp)) ;
465       if ( ++cp == cp_limit )
466       { cp = * ++fbp ; cp_limit = cp + FBANK_SZ ; }
467 
468       /* add the separator */
469       q = ofs->str ;  while( *q )  *p++ = *q++ ;
470     }
471     /* tack tail on the end */
472     (void) memcpy(p, tail->str, SIZE_T(tail->len)) ;
473 
474     /* cleanup */
475     free_STRING(tail) ; free_STRING(ofs) ;
476   }
477 }
478 
479 /* We are assigning to a CELL and we aren't sure if its
480    a field */
481 
slow_cell_assign(target,source)482 void slow_cell_assign(target, source)
483   register CELL *target ;
484   CELL *source ;
485 {
486   if (
487 
488 #if  LM_DOS  /* the dreaded segment nonsense */
489   SAMESEG(target,field) &&
490 #endif
491        target >= field && target <= LAST_PFIELD )
492        field_assign(target, source) ;
493   else
494   { CELL **p = fbank + 1 ;
495 
496     while ( *p )
497     {
498       if (
499 #if  LM_DOS
500       SAMESEG(target, *p) &&
501 #endif
502 	   target >= *p && target < *p + FBANK_SZ )
503       {
504         field_assign(target, source) ;
505 	return ;
506       }
507       p++ ;
508     }
509     /* its not a field */
510     cell_destroy(target) ;
511     (void) cellcpy(target, source) ;
512   }
513 }
514 
field_addr_to_index(cp)515 int field_addr_to_index(cp)
516   CELL *cp ;
517 { CELL **p = fbank ;
518 
519   while(
520 
521 #if  LM_DOS
522     ! SAMESEG(cp,*p) ||
523 #endif
524 
525 	 cp < *p || cp >= *p + FBANK_SZ )  p++ ;
526 
527   return  ((p-fbank)<<FB_SHIFT) + (cp - *p) ;
528 }
529 
530 /*------- more than 1 fbank needed  ------------*/
531 
532 /*
533   compute the address of a field with index
534   > MAX_SPLIT
535 */
536 
slow_field_ptr(i)537 CELL *slow_field_ptr(i)
538   register int i ;
539 {
540 
541   if ( i > max_field )
542   { int j ;
543 
544     if ( i > MAX_FIELD )
545         rt_overflow("maximum number of fields", MAX_FIELD) ;
546 
547     j = 1 ; while( fbank[j] )  j++ ;
548     do
549     {
550       fbank[j] = (CELL*)zmalloc(sizeof(CELL)*FBANK_SZ) ;
551       (void) memset(fbank[j], 0, SIZE_T(sizeof(CELL)*FBANK_SZ)) ;
552       j++ ;
553       max_field += FBANK_SZ ;
554     }
555     while ( i > max_field ) ;
556   }
557 
558   return  & fbank[i>>FB_SHIFT][i & (FBANK_SZ-1)] ;
559 }
560 
561 /*
562   $0 split into more than MAX_SPLIT fields,
563   $(MAX_FIELD+1) ... are on the split_ov_list.
564   Copy into fields which start at fbank[1]
565 */
566 
load_field_ov()567 static void  load_field_ov()
568 {
569   register SPLIT_OV *p ;  /* walks split_ov_list */
570   register CELL *cp ;  /* target of copy */
571   int j ;  /* current fbank[] */
572   CELL *cp_limit ;  /* change fbank[] */
573   SPLIT_OV *q ;    /* trails p */
574 
575   /* make sure the fields are allocated */
576   (void) slow_field_ptr(nf) ;
577 
578   p = split_ov_list ; split_ov_list = (SPLIT_OV*) 0 ;
579   j = 1 ; cp = fbank[j] ; cp_limit = cp + FBANK_SZ ;
580 
581   while ( p )
582   {
583     cell_destroy(cp) ;
584     cp->type = C_MBSTRN ;
585     cp->ptr = (PTR) p->sval ;
586 
587     if ( ++cp == cp_limit )
588     {
589       cp = fbank[++j] ; cp_limit = cp + FBANK_SZ ;
590     }
591 
592     q = p ; p = p->link ; ZFREE(q) ;
593   }
594 }
595 
596 
597 #if  MSDOS
598 
binmode()599 int binmode()  /* read current value of BINMODE */
600 { CELL c ;
601 
602   cast1_to_d(cellcpy(&c, BINMODE)) ;
603   return  d_to_i(c.dval) ;
604 }
605 
606 /* set BINMODE and RS and ORS
607    from environment or -W binmode=   */
608 
set_binmode(x)609 void  set_binmode(x)
610   int x ;
611 {
612   CELL c ;
613 
614   /* set RS */
615   c.type = C_STRING ;
616   c.ptr = (PTR) new_STRING ( (x&1) ? "\r\n" : "\n" ) ;
617   field_assign(RS, &c) ;
618   free_STRING(string(&c)) ;
619 
620   /* set ORS */
621   cell_destroy(ORS) ;
622   ORS->type = C_STRING ;
623   ORS->ptr = (PTR) new_STRING( (x&2) ? "\r\n" : "\n") ;
624 
625   cell_destroy(BINMODE) ;
626   BINMODE->type = C_DOUBLE ;
627   BINMODE->dval = (double) x ;
628 }
629 
630 #endif /* MSDOS */
631 
632 
633