1 /* sumlines.c - total the numbers appearing in various input lines. */
2 /* B. D. McKay.  Version of March 30, 2017. */
3 
4 #ifndef GMP
5 #define GMP 1  /* Non-zero if gmp multi-precise integers are allowed.
6                   In this case you need the GNU multi-precision library,
7                   available with -lgmp if it is installed. */
8 #endif
9 
10 #define USAGE \
11 "sumlines [-w|-W] [-v] [-d] [-n] [-f fmtfile]...  file file file ..."
12 
13 #define HELPTEXT \
14 "   Sum lines matching specified formats.\n\
15 \n\
16    Any number of input files can be given.  \"-\" means stdin.\n\
17    If there are no files given, just stdin is assumed.\n\
18    File names can contain wildcards, in which case all matching files\n\
19       are used in numerically sorted order.\n\
20 \n\
21    Formats are read from four sources in this order:\n\
22    (1) Any files mentioned with -f on the command line (any number).\n\
23    (2) The file named in the environment variable SUMLINES.FMT (if any)\n\
24    (3) The file sumlines.fmt in the current directory (if it exists)\n\
25    (4) The file sumlines.fmt in the home directory (if it exists)\n\
26    All these are read if they exist and the results concatenated.\n\
27    Formats exactly matching earlier formats (except perhaps for flags)\n\
28         are not used.\n\
29 \n\
30    Each format occupies exactly two lines.  The first line gives a\n\
31    list of flags (DEFAULT FINAL ERROR UNIQUE COUNT CONTINUE NUMERIC\n\
32    SILENT ENDFILE P=# separated by spaces, commas or |s).\n\
33    The second line gives the format itself.\n\
34 \n\
35    Example.  This totals the summary lines of autoson runs:\n\
36      DEFAULT  # comment \n\
37      cpu=%fu,%fs,%fx  pf=%d\n\
38    There can also be blank lines and lines with only comments, but\n\
39    not between the flags line and the format itself.\n\
40 \n\
41    -d don't read sumlines.fmt or ~/sumlines.fmt or $SUMLINES.FMT \n\
42    -w suppresses warning messages about no matching lines or no\n\
43       matching final lines.\n\
44    -W in addition, suppresses warning about missing cases.\n\
45    -n don't write the number of matching lines for each format.\n\
46    -v produces a list of all the formats.\n"
47 
48 #define DEFAULT   0  /* No special flags */
49 #define FINAL     1  /* At least one of these must be in each input file */
50 #define ERROR     2  /* Must be none of these */
51 #define UNIQUE    4  /* The %s and %c parts must be unique over all inputs */
52 #define COUNT     8  /* The output only states how many lines matched */
53 #define CONTINUE 16  /* Try to match later formats too */
54 #define NUMERIC  32  /* Use numerical comparison (see numstrcmp() below) */
55 #define SILENT   64  /* Don't report, just check */
56 #define ENDFILE 128  /* Usually appears at end of output */
57 
58 /* The formats are tried against each input line one at a time, and the
59    first one that matches is accepted.  The entire line must match.
60    If the CONTINUE flag is present, the input line is also matched
61    against further formats.
62 
63    Except in the case of formats with the COUNT flag, each format that
64    matches any lines produces output giving the total value of each of the
65    integers %d or real numbers %f in the lines which match.  If there are
66    any %s or %c controls in the format, the output is given separately for
67    each value of the matching strings which appear in the input lines.
68 
69    In the case of the COUNT flag, the program only reports the number of
70    input lines which matched the format.
71 
72    If a format has the UNIQUE flag, no two input lines may match with the
73    same values of the %s and %c controls present.  Otherwise a warning
74    message is written for each duplicate match.
75 
76    The sequence P=# where # is an integer value defines the base for the
77    %p directive.  There can be no spaces in the sequence "P=#".  The
78    default base is 2.
79 
80    %d  - matches an integer (small enough for 64 bits)
81    %x  - same as %d but accumulates maximum rather than the sum
82    %n  - same as %d but accumulates minimum rather than the sum
83    %p  - same as %d but accumulates the value modulo a base
84    %m  - matches a integer of unbounded size (if GMP!=0)
85    %f  - matches a real number of the form ddddd.ddd or -ddddd.ddd
86    %v  - same as %f but reports the average rather than the sum
87    %X  - same as %f but reports the maximum rather than the sum
88    %h  - similar to %d:%d:%f taken as h:m:s with a single floating value
89    %sx - matches a string, where 'x' is any character.
90          If 'x' is not a space, match zero or more characters from the
91              current position up but not including the first 'x'.
92          If 'x' is a space, match one or more characters from the current
93              position up to and including the first non-space character
94              which is followed by a space.
95    %c  - matches a non-white character
96    %%  - matches the character '%'
97    %   - (with a space following the '%') matches zero or more spaces or
98             tabs, as many as appear in the input.  In the output, this
99             sequence appears as one space.
100    %   - (appearing exactly at the end of the format) matches zero or
101          more spaces at the end of the line.  In the output, nothing.
102    %*d, %*m, %*x, %*p, %*f, %*sx, %*c - these are similar to the versions
103          without the '*' except that the value is ignored (not used for
104          summing, and not used to divide the output).  In the output,
105          this field appears as a single character '*'.
106    %#  - matches an unsigned integer.  For each format containing this
107          control, a report is made of any breaks or duplicates in the
108          sequence of matching numbers.  (So this is useful for checking a
109          sequence of case numbers.)  At most one %# may appear in each format.
110    %l  - matches a list of arbitrarily many (%d sized) integers
111 
112   At least one FINAL format must match in each file or a warning is given
113   (unless -w is used, in which case no warning is given).
114 
115   A format marked ENDFILE will cause sumlines to act as if it started
116   reading from a new input file.  This can have some effects on the
117   order of output lines.
118 */
119 
120 #define HAS(i,flgs)  ((format[i].flags&(flgs)) != 0)
121 
122 #include <stdio.h>
123 #include <ctype.h>
124 #include <string.h>
125 #include <pwd.h>
126 #include <stdlib.h>
127 #include <glob.h>
128 #include <limits.h>
129 #include <unistd.h>
130 
131 #if GMP
132 #include <gmp.h>
133 #endif
134 
135 #if defined(__alpha)
136 typedef long integer;
137 #define DOUT "%ld"       /* Format used to output %d/%x/%n/%p quantities */
138 #define FOUT "%.2f"      /* Format used to output %f/%X quantities */
139 #define VOUT "%.4f"      /* Format used to output %v quantities */
140 #define HMSOUT1 "%ld:%ld:%ld"
141 #define HMSOUT2 "%ld:%ld:%.2f"
142 #elif defined(__sun) || defined(__GNUC__) || (__STDC_VERSION__ > 199900L)
143 typedef long long integer;
144 #define DOUT "%lld"
145 #define FOUT "%.2f"
146 #define VOUT "%.4f"
147 #define HMSOUT1 "%lld:%lld:%lld"
148 #define HMSOUT2 "%lld:%lld:%.2f"
149 #else
150 typedef long long integer;
151 #define DOUT "%Ld"
152 #define FOUT "%.2f"
153 #define VOUT "%.4f"
154 #define HMSOUT1 "%Ld:%Ld:%Ld"
155 #define HMSOUT2 "%Ld:%Ld:%.2f"
156 #endif
157 
158 static char *dout,*fout,*Xout,*vout,*hmsout1,*hmsout2;
159 static integer maxint;   /* set by find_maxint() */
160 
161 
162 #define INCR(x,inc) \
163      {if (((x) > 0 && maxint-(x) < (inc)) || ((x) < 0 && (maxint)+(x) < -(inc))) \
164            {fprintf(stderr,">E overflow with %%d or %%p format\n"); exit(1);} \
165       x += (inc);}    /*  x += inc   with safety check */
166 
167 typedef int boolean;
168 #define FALSE 0
169 #define TRUE  1
170 
171 typedef struct
172 {
173     int nvals;
174     integer *val;
175 } integerlist;
176 
177 typedef union
178 {
179     double f;
180     integer d;
181     integerlist *l;
182 #if GMP
183     mpz_t *m;
184 #endif
185 } number;
186 
187 #define D 0    /* Code for "integer" */
188 #define F 1    /* Code for "real" */
189 #define M 2    /* Code for "multiprecision integer" */
190 #define X 3    /* Code for "integer, take maximum" */
191 #define V 4    /* Code for "real, take average" */
192 #define P 5    /* Code for "integer, modulo some base" */
193 #define LD 6   /* Code for "list of integer" */
194 #define H 8    /* Code for "h:m:s" */
195 #define FX 9   /* Code for "real, take maximum" */
196 #define N 10   /* Code for "integer, take minimum" */
197 
198 #define MAXLINELEN 100000   /* Maximum input line size
199                               (longer lines are broken in bits) */
200 #define MAXVALUES 32  /* Maximum  total number of
201                           %d,%x,%n,%p,%m,%v,%f,%h or %l items in a format */
202 
203 #define MAXFORMATS 1000
204 
205 static struct fmt_st
206 {
207    integer pmod;
208    int flags;
209    char *fmt;
210 } format[MAXFORMATS];
211 
212 typedef struct countrec
213 {
214     struct countrec *left,*right,*parent;
215     char *fmt;
216     unsigned long count;
217     number total[MAXVALUES];
218 } countnode;
219 
220 static countnode *count_root[MAXFORMATS];
221 static unsigned long matching_lines[MAXFORMATS];
222 static integer total_position[MAXFORMATS];
223 static integer lastseq[MAXFORMATS];
224 
225 #if GMP
226 static mpz_t mp_value[MAXVALUES];
227 #endif
228 
229 static integerlist il[MAXVALUES];
230 
231 #define A 0
232 #define L 1
233 #define R 2
234 #define LL 3
235 #define LR 4
236 #define RL 5
237 #define RR 6
238 
239 #ifndef GLOB_BRACE       /* Allow {} processing -- Linux extension */
240 #define GLOB_BRACE 0
241 #endif
242 
243 #ifndef GLOB_TILDE      /* Allow ~ processing -- Linux extension */
244 #define GLOB_TILDE 0
245 #endif
246 
247 #ifndef GLOB_NOMATCH
248 #define GLOB_NOMATCH 0 /* Some versions don't have a special return for this */
249 #endif
250 
251 #define GLOB_FLAGS (GLOB_ERR|GLOB_NOSORT|GLOB_BRACE|GLOB_TILDE)
252 
253 #define HELP if (argc > 1 && (strcmp(argv[1],"-help")==0 \
254                            || strcmp(argv[1],"--help")==0)) \
255        { printf("\nUsage: %s\n\n%s",USAGE,HELPTEXT); return 0;}
256 
257 /****************************************************************************/
258 
259 static int
numstrcmp(char * s1,char * s2)260 numstrcmp(char *s1, char *s2)
261 /* Same behaviour as strcmp(), except that when an unsigned integer is
262    found in each string, the numerical values are compared instead of
263    the ascii values.   Overflow is impossible.  Leading spaces before
264    numbers are considered part of the numbers.  A number in one string
265    is considered less than a non-number in the other string. */
266 {
267     char *a1,*a2;
268 
269     while (1)
270     {
271         for (a1 = s1; *a1 == ' '; ++a1) {}
272         if (isdigit(*a1))
273         {
274             for (s1 = a1+1; isdigit(*s1); ++s1) {}
275         }
276         else
277         {
278             a1 = s1;
279             ++s1;
280         }
281 
282         for (a2 = s2; *a2 == ' '; ++a2) {}
283         if (isdigit(*a2))
284         {
285             for (s2 = a2+1; isdigit(*s2); ++s2) {}
286         }
287         else
288         {
289             a2 = s2;
290             ++s2;
291         }
292 
293         if (!isdigit(*a1))
294         {
295             if (!isdigit(*a2))
296             {
297                 if (*a1 < *a2) return -1;
298                 if (*a1 > *a2) return 1;
299                 if (*a1 == '\0') return 0;
300             }
301             else
302                 return 1;
303         }
304         else
305         {
306             if (!isdigit(*a2))
307                 return -1;
308             else
309             {
310                 for (; *a1 == '0'; ++a1) {}
311                 for (; *a2 == '0'; ++a2) {}
312 
313                 if (s1-a1 < s2-a2) return -1;
314                 if (s1-a1 > s2-a2) return 1;
315                 for (; a1 < s1 && *a1 == *a2; ++a1, ++a2) {}
316                 if (a1 < s1)
317                 {
318                     if (*a1 < *a2) return -1;
319                     else           return 1;
320                 }
321             }
322         }
323     }
324 }
325 
326 /****************************************************************************/
327 
328 static void
writeline(char * outf,number * val,unsigned long count)329 writeline(char *outf, number *val, unsigned long count)
330 /* Write an output line with the given format and values */
331 {
332     int i,n;
333     integer mins,nsecs;
334     double secs,hms;
335     boolean neg;
336 
337     n = 0;
338 
339     for (; *outf != '\0'; ++outf)
340     {
341         if (*outf == '%')
342         {
343             ++outf;
344             if (*outf == '%' || *outf == '#')
345                 putchar(*outf);
346             else if (*outf == 'd' || *outf == 'x'
347                            || *outf == 'n' || *outf == 'p')
348                 printf(dout,val[n++].d);
349             else if (*outf == 'f')
350                 printf(fout,val[n++].f);
351             else if (*outf == 'v')
352                 printf(vout,val[n++].f/count);
353             else if (*outf == 'X')
354                 printf(Xout,val[n++].f);
355 	    else if (*outf == 'h')
356 	    {
357 		if (val[n].f < 0)
358 		{
359 		    neg = TRUE;
360 		    hms = -val[n].f;
361 		}
362 		else
363 		{
364 		    neg = FALSE;
365 		    hms = val[n].f;
366 		}
367 		mins = hms/60.0;
368 		secs = hms - 60*mins;
369 		nsecs = secs;
370 		++n;
371 		if (neg) printf("-");
372 		if (secs == nsecs)
373 		    printf(hmsout1,mins/60,mins%60,nsecs);
374 		else
375 		    printf(hmsout2,mins/60,mins%60,secs);
376 	    }
377             else if (*outf == 'l')
378             {
379                 for (i = 0; i < val[n].l->nvals; ++i)
380                 {
381                     if (i > 0) printf(" ");
382                     printf(dout,val[n].l->val[i]);
383                 }
384                 ++n;
385             }
386 #if GMP
387             else if (*outf == 'm')
388                 mpz_out_str(NULL,10,*(val[n++].m));
389 #endif
390             else
391             {
392                 fprintf(stderr,">E unknown output format %%%c\n",*outf);
393                 exit(1);
394             }
395         }
396         else
397             putchar(*outf);
398     }
399 }
400 
401 /*********************************************************************/
402 
403 static void
print_counts(countnode * root,boolean printcounts)404 print_counts(countnode *root, boolean printcounts)
405 /* Use a non-recursive inorder traversal to print the tree */
406 {
407     int code;
408     countnode *p;
409 
410     p = root;
411     code = A;
412 
413     while (p)
414     {
415         switch (code)    /* deliberate flow-ons */
416         {
417          case A:
418             if (p->left)
419             {
420                 p = p->left;
421                 break;
422             }
423          case L:
424             if (printcounts) printf("%5lu: ",p->count);
425             writeline(p->fmt,p->total,p->count);
426             if (p->right)
427             {
428                 p = p->right;
429                 code = A;
430                 break;
431             }
432          case R:
433             if (p->parent && p->parent->left == p) code = L;
434             else                                   code = R;
435             p = p->parent;
436             break;
437         }
438     }
439 }
440 
441 /*********************************************************************/
442 
443 static void
print_common(countnode * root)444 print_common(countnode *root)
445 /* Print the common ends of the formats in the tree */
446 {
447     int code;
448     countnode *p;
449     char *s0,*s1,*t0,*t1;
450     int i,comm0,comm1,minlen,maxlen;
451 
452     if (root == NULL) return;
453 
454     p = root;
455     code = A;
456 
457     s0 = s1 = p->fmt;
458     while (*s1 != '\0') ++s1;
459     comm0 = comm1 = minlen = maxlen = s1-s0;
460 
461     while (p)
462     {
463         switch (code)    /* deliberate flow-ons */
464         {
465          case A:
466             if (p->left)
467             {
468                 p = p->left;
469                 break;
470             }
471          case L:
472             t0 = t1 = p->fmt;
473             for (i = 0; i < comm0; ++i)
474                 if (s0[i] != t0[i]) break;
475             comm0 = i;
476 
477             while (*t1 != '\0') ++t1;
478             for (i = 1; i <= comm1; ++i)
479                 if (s1[-i] != t1[-i]) break;
480             comm1 = i-1;
481             if (t1-t0 < minlen) minlen = t1-t0;
482             if (t1-t0 > maxlen) maxlen = t1-t0;
483 
484             if (p->right)
485             {
486                 p = p->right;
487                 code = A;
488                 break;
489             }
490          case R:
491             if (p->parent && p->parent->left == p) code = L;
492             else                                   code = R;
493             p = p->parent;
494             break;
495         }
496     }
497 
498     if (comm0 + comm1 > minlen) comm1 = minlen - comm0;
499 
500     for (i = 0; i < comm0; ++i)
501         printf("%c",s0[i]);
502     if (comm0 + comm1 < maxlen) printf("*");
503     for (i = comm1; i > 0; --i)
504         printf("%c",s1[-i]);
505 }
506 
507 /*********************************************************************/
508 
509 static void
splay(countnode * p)510 splay(countnode *p)
511 /* Splay the node p.  It becomes the new root. */
512 {
513     countnode *q,*r,*s;
514     countnode *a,*b,*c;
515     int code;
516 
517 #define LCHILD(x,y) {(x)->left = y; if (y) (y)->parent = x;}
518 #define RCHILD(x,y) {(x)->right = y; if (y) (y)->parent = x;}
519 
520     while (p->parent)
521     {
522         a = p->left;
523         b = p->right;
524         q = p->parent;
525         if (q->left == p)
526         {
527             code = L;
528             c = q->right;
529         }
530         else
531         {
532             code = R;
533             c = q->left;
534         }
535         r = q->parent;
536         if (r)
537         {
538             if (r->left == q) code = (code == L ? LL : LR);
539             else              code = (code == L ? RL : RR);
540             s = r->parent;
541             p->parent = s;
542             if (s)
543             {
544                 if (s->left == r) s->left = p;
545                 else              s->right = p;
546             }
547         }
548         else
549         {
550             p->parent = NULL;
551         }
552 
553         switch (code)
554         {
555          case L:
556             RCHILD(p,q); LCHILD(q,b); break;
557          case R:
558             LCHILD(p,q); RCHILD(q,a); break;
559          case LL:
560             RCHILD(p,q); RCHILD(q,r); LCHILD(q,b); LCHILD(r,c); break;
561          case RR:
562             LCHILD(p,q); LCHILD(q,r); RCHILD(r,c); RCHILD(q,a); break;
563          case LR:
564             LCHILD(p,q); RCHILD(p,r); RCHILD(q,a); LCHILD(r,b); break;
565          case RL:
566             LCHILD(p,r); RCHILD(p,q); RCHILD(r,a); LCHILD(q,b); break;
567         }
568     }
569 }
570 
571 /*********************************************************************/
572 
573 static void
add_one(countnode ** to_root,char * fmt,integer pmod,int nval,number * val,int * valtype,int which,boolean numcompare)574 add_one(countnode **to_root, char *fmt, integer pmod, int nval,
575         number *val, int *valtype, int which, boolean numcompare)
576 /* Add one match to the node with the given format, creating it if it is new.
577    The tree is then splayed to ensure good efficiency. */
578 {
579     int i,j,cmp;
580     countnode *p,*ppar,*new_node;
581     integer w;
582 
583     p = *to_root;
584     cmp = 0;
585 
586     while (p != NULL)
587     {
588         cmp = (numcompare ? numstrcmp(fmt,p->fmt) : strcmp(fmt,p->fmt));
589         if (cmp == 0)
590         {
591             if (HAS(which,UNIQUE) && p->count == 1)
592                 printf("ERROR: Multiple matches for %s",fmt);
593             for (i = 0; i < nval; ++i)
594                 if (valtype[i] == D)
595                     {INCR(p->total[i].d,val[i].d);}
596                 else if (valtype[i] == X)
597                     {if (val[i].d > p->total[i].d) p->total[i].d = val[i].d;}
598                 else if (valtype[i] == N)
599                     {if (val[i].d < p->total[i].d) p->total[i].d = val[i].d;}
600                 else if (valtype[i] == P)
601                     {w = val[i].d % pmod; INCR(p->total[i].d,w);
602                       p->total[i].d %= pmod;}
603                 else if (valtype[i] == LD)
604                 {
605                     if (p->total[i].l->nvals < val[i].l->nvals)
606                     {
607                         if ((p->total[i].l->val
608                            = (integer*)realloc(p->total[i].l->val,
609                                           sizeof(integer)*val[i].l->nvals))
610                                 == NULL)
611                         {
612                             fprintf(stderr,"Malloc failed\n");
613                             exit(1);
614                         }
615                     }
616                     for (j = 0; j < p->total[i].l->nvals &&
617                                 j < val[i].l->nvals; ++j)
618                         INCR(p->total[i].l->val[j],val[i].l->val[j]);
619                     if (p->total[i].l->nvals < val[i].l->nvals)
620                     {
621                         for (j = p->total[i].l->nvals;
622                                    j < val[i].l->nvals; ++j)
623                             p->total[i].l->val[j] = val[i].l->val[j];
624                         p->total[i].l->nvals = val[i].l->nvals;
625                     }
626                 }
627 #if GMP
628                 else if (valtype[i] == M)
629                     mpz_add(*(p->total[i].m),*(p->total[i].m),*(val[i].m));
630 #endif
631 		else if (valtype[i] == FX)
632 		    {if (val[i].f > p->total[i].f) p->total[i].f = val[i].f;}
633                 else
634                     p->total[i].f += val[i].f;   /* F, V and H */
635             ++p->count;
636             splay(p);
637             *to_root = p;
638             return;
639         }
640         else if (cmp < 0)
641         {
642             ppar = p;
643             p = p->left;
644         }
645         else
646         {
647             ppar = p;
648             p = p->right;
649         }
650     }
651 
652     if ((new_node = (countnode*)malloc(sizeof(countnode))) == NULL)
653     {
654         fprintf(stderr,">E malloc failed in add_one()\n");
655         exit(1);
656     }
657 
658     if ((new_node->fmt = (char*)malloc(strlen(fmt)+1)) == NULL)
659     {
660         fprintf(stderr,">E malloc failed in add_one()\n");
661         exit(1);
662     }
663 
664     new_node->count = 1;
665     strcpy(new_node->fmt,fmt);
666     for (i = 0; i < nval; ++i)
667     {
668 #if GMP
669         if (valtype[i] == M)
670         {
671             if ((new_node->total[i].m
672                                 = (mpz_t*)malloc(sizeof(mpz_t))) == NULL)
673             {
674                 fprintf(stderr,"Malloc failed\n");
675                 exit(1);
676             }
677             mpz_init_set(*(new_node->total[i].m),*(val[i].m));
678         }
679         else
680 #endif
681         if (valtype[i] == LD)
682         {
683             if ((new_node->total[i].l
684                         = (integerlist*)malloc(sizeof(integerlist))) == NULL)
685             {
686                 fprintf(stderr,"Malloc failed\n");
687                 exit(1);
688             }
689             if ((new_node->total[i].l->val
690                  = (integer*)malloc(sizeof(integer)*val[i].l->nvals)) == NULL)
691             {
692                 fprintf(stderr,"Malloc failed\n");
693                 exit(1);
694             }
695             new_node->total[i].l->nvals = val[i].l->nvals;
696             for (j = 0; j < val[i].l->nvals; ++j)
697                 new_node->total[i].l->val[j] = val[i].l->val[j];
698         }
699         else
700             new_node->total[i] = val[i];
701     }
702 
703     new_node->left = new_node->right = NULL;
704 
705     if (cmp == 0)
706     {
707         *to_root = new_node;
708         new_node->parent = NULL;
709     }
710     else if (cmp < 0)
711     {
712         ppar->left = new_node;
713         new_node->parent = ppar;
714     }
715     else
716     {
717         ppar->right = new_node;
718         new_node->parent = ppar;
719     }
720 
721     splay(new_node);
722     *to_root = new_node;
723 }
724 
725 /****************************************************************************/
726 
727 static int
scanline(char * s,char * f,number * val,int * valtype,integer * seqno,char * outf)728 scanline(char *s, char *f, number *val, int *valtype,
729          integer *seqno, char *outf)
730 /* Perform sscanf-like scan of line.
731    The whole format must match.  outf is set to be an output format
732    with unassigned values replaced by '*' and %s replaced by what
733    it matches.  Assigned values except %s are put into val[] with
734    their types in valtype[].  The number of values (not counting %#)
735    is returned.
736    Integers matching %# are put into *seqno, with an error if there
737    are more than one, and -1 if there are none.
738    If the format doesn't match, -1 is returned.
739    WARNING: the gmp and ilist values are pointers to static data,
740    so they need to be copied if the values array is copied.
741    See the comments at the start of the program for more information.
742 */
743 {
744     int n;                   /* Number of values assigned */
745     int digit;
746     boolean doass,neg,oflow,badgmp;
747     integer ival;
748     double dval,digval,comval;
749     char ends,*saves;
750     static boolean gmp_warning = FALSE;
751     integer *ilist;
752     size_t ilist_sz;
753     int nilist;
754 #if GMP
755     char mp_line[MAXLINELEN+1],*mp;
756 #endif
757 
758     n = 0;
759     *seqno = -1;
760     badgmp = oflow = FALSE;
761 
762     while (*f != '\0')
763     {
764         if (*f == '%')
765         {
766             ++f;
767             if (*f == '*')
768             {
769                 doass = FALSE;
770                 ++f;
771             }
772             else
773                 doass = TRUE;
774 
775             if (*f == '%')
776             {
777                 if (!doass)
778                 {
779                     fprintf(stderr,"Bad format item %%*\n");
780                     exit(1);
781                 }
782                 if (*s++ != '%') return -1;
783                 ++f;
784                 *outf++ = '%';
785                 *outf++ = '%';
786             }
787             else if (*f == '\n')
788             {
789                 if (!doass)
790                 {
791                     fprintf(stderr,"Bad format item %%*\n");
792                     exit(1);
793                 }
794                 while (*s != '\0')
795                 {
796                     if (*s != ' ' && *s != '\n') return -1;
797                     ++s;
798                 }
799                 --s;
800             }
801             else if (*f == 'c')
802             {
803                 if (*s == ' ' || *s == '\t' || *s == '\n') return -1;
804                 if (doass) *outf++ = *s;
805                 else       *outf++ = '*';
806                 ++f;
807                 ++s;
808             }
809             else if (*f == 's')
810             {
811                 ends = *(f+1);
812                 if (ends == ' ')
813                 {
814                     while (*s == ' ' || *s == '\t')
815                     {
816                         if (doass) *outf++ = *s;
817                         ++s;
818                     }
819                 }
820                 while (*s != '\n' && *s != ends)
821                 {
822                     if (doass) *outf++ = *s;
823                     ++s;
824                 }
825                 if (!doass) *outf++ = '*';
826                 ++f;
827             }
828 #if GMP
829             else if (*f == 'd' || *f == 'x' || *f == 'n' || *f == 'p')
830             {
831 #else
832             else if (*f == 'd' || *f == 'x' || *f == 'n'
833 				|| *f == 'p' || *f == 'm')
834             {
835                 if (*f == 'm' && !gmp_warning)
836                 {
837                     fprintf(stderr,
838                      ">W not compiled with GMP, treating %%m like %%d\n");
839                     gmp_warning = TRUE;
840                 }
841 #endif
842                 while (*s == ' ' || *s == '\t') ++s;
843                 if (!isdigit(*s) && *s != '-' && *s != '+') return -1;
844                 neg = (*s == '-');
845                 if (*s == '-' || *s == '+') ++s;
846                 ival = 0;
847                 while (isdigit(*s))
848                 {
849                     digit =  *s++ - '0';
850                     if (ival > (maxint-digit)/10)
851                         oflow = TRUE;
852                     else
853                         ival = ival*10 + digit;
854                 }
855                 if (doass)
856                 {
857                     *outf++ = '%';
858                     if (*f == 'd' || *f == 'm')
859                     {
860                         *outf++ = 'd';
861                         valtype[n] = D;
862                     }
863                     else if (*f == 'x')
864                     {
865                         *outf++ = 'x';
866                         valtype[n] = X;
867                     }
868                     else if (*f == 'n')
869                     {
870                         *outf++ = 'n';
871                         valtype[n] = N;
872                     }
873                     else
874                     {
875                         *outf++ = 'p';
876                         valtype[n] = P;
877                     }
878                     val[n++].d = (neg ? -ival : ival);
879                 }
880                 else
881                     *outf++ = '*';
882                 ++f;
883             }
884             else if (*f == 'l')
885             {
886                 nilist = 0;
887                 if ((ilist = (integer*)malloc(200*sizeof(integer)))
888                             == NULL)
889                 {
890                     fprintf(stderr,"Malloc failed\n");
891                     exit(1);
892                 }
893                 ilist_sz = 200;
894                 for (;;)
895                 {
896                     saves = s;
897                     while (*s == ' ' || *s == '\t') ++s;
898                     if (!isdigit(*s) && *s != '-' && *s != '+')
899                     {
900                         s = saves;
901                         break;
902                     }
903                     neg = (*s == '-');
904                     if (*s == '-' || *s == '+') ++s;
905                     ival = 0;
906                     while (isdigit(*s))
907                     {
908                         digit =  *s++ - '0';
909                         if (ival > (maxint-digit)/10)
910                             oflow = TRUE;
911                         else
912                             ival = ival*10 + digit;
913                     }
914                     if (neg) ival = -ival;
915                     if (nilist == ilist_sz)
916                     {
917                         if ((ilist
918                             = (integer*)realloc((void*)ilist,
919                                        (ilist_sz+500)*sizeof(integer)))
920                                 == NULL)
921                         {
922                             fprintf(stderr,"Malloc failed\n");
923                             exit(1);
924                         }
925                         ilist_sz += 500;
926                     }
927                     ilist[nilist++] = ival;
928                 }
929                 if (doass)
930                 {
931                     valtype[n] = LD;
932                     val[n].l = &il[n];
933                     val[n].l->nvals = nilist;
934                     if (val[n].l->val) free(val[n].l->val);
935                     val[n].l->val = ilist;
936                     ++n;
937                     *outf++ = '%';
938                     *outf++ = 'l';
939                 }
940                 else
941                 {
942                     free(ilist);
943                     *outf++ = '*';
944                 }
945                 ++f;
946             }
947 #if GMP
948             else if (*f == 'm')
949             {
950                 while (*s == ' ' || *s == '\t') ++s;
951                 if (!isdigit(*s) && *s != '-' && *s != '+') return -1;
952                 mp = mp_line;
953                 if      (*s == '-') *mp++ = *s++;
954                 else if (*s == '+') s++;
955                 while (isdigit(*s)) *mp++ = *s++;
956                 *mp = '\0';
957                 if (doass)
958                 {
959                     valtype[n] = M;
960                     val[n].m = &mp_value[n];
961                     if (mpz_set_str(mp_value[n],mp_line,10) < 0)
962                         badgmp = TRUE;
963                     ++n;
964                     *outf++ = '%';
965                     *outf++ = 'm';
966                 }
967                 else
968                     *outf++ = '*';
969                 ++f;
970             }
971 #endif
972             else if (*f == '#')
973             {
974                 while (*s == ' ' || *s == '\t') ++s;
975                 if (!isdigit(*s)) return -1;
976                 ival = 0;
977                 while (isdigit(*s))
978                 {
979                     digit =  *s++ - '0';
980                     if (ival > (maxint-digit)/10)
981                         oflow = TRUE;
982                     else
983                         ival = ival*10 + digit;
984                 }
985                 if (*seqno >= 0)
986                 {
987                     fprintf(stderr,
988                             ">E %%# can only be used once per format\n");
989                     exit(1);
990                 }
991                 *seqno = ival;
992                 *outf++ = '#';
993                 ++f;
994             }
995             else if (*f == 'f' || *f == 'v' || *f == 'X')
996             {
997                 while (*s == ' ' || *s == '\t') ++s;
998 
999                 if (!isdigit(*s) && *s != '.' && *s != '-' && *s != '+')
1000                     return -1;
1001                 neg = (*s == '-');
1002                 if (*s == '-' || *s == '+') ++s;
1003                 dval = 0.0;
1004                 while (isdigit(*s)) dval = dval*10.0 + (*s++ - '0');
1005                 if (*s == '.')
1006                 {
1007                     digval = 1.0;
1008                     ++s;
1009                     while (isdigit(*s))
1010                     {
1011                         digval /= 10.0;
1012                         dval += (*s++ - '0') * digval;
1013                     }
1014                 }
1015                 if (doass)
1016                 {
1017                     valtype[n] = (*f == 'f' ? F : (*f == 'v' ? V : FX));
1018                     val[n++].f = (neg ? -dval : dval);
1019                     *outf++ = '%';
1020                     *outf++ = *f;
1021                 }
1022                 else
1023                     *outf++ = '*';
1024                 ++f;
1025             }
1026             else if (*f == 'h')
1027             {
1028                 while (*s == ' ' || *s == '\t') ++s;
1029 
1030                 if (!isdigit(*s) && *s != '.' && *s != '-' && *s != '+' && *s != ':')
1031                     return -1;
1032                 neg = (*s == '-');
1033                 if (*s == '-' || *s == '+') ++s;
1034                 dval = 0.0;
1035 		comval = 0.0;
1036                 while (isdigit(*s) || *s == ':')
1037 		{
1038 		    if (*s == ':')
1039 		    {
1040 			dval = dval*60.0 + comval;
1041 			comval = 0.0;
1042 			++s;
1043 		    }
1044 		    else
1045 		        comval = comval*10.0 + (*s++ - '0');
1046 		}
1047                 if (*s == '.')
1048                 {
1049                     digval = 1.0;
1050                     ++s;
1051                     while (isdigit(*s))
1052                     {
1053                         digval /= 10.0;
1054                         comval += (*s++ - '0') * digval;
1055                     }
1056                 }
1057 		dval = dval*60.0 + comval;
1058                 if (doass)
1059                 {
1060                     valtype[n] = H;
1061                     val[n++].f = (neg ? -dval : dval);
1062                     *outf++ = '%';
1063                     *outf++ = *f;
1064                 }
1065                 else
1066                     *outf++ = '*';
1067                 ++f;
1068             }
1069             else if (*f == ' ')
1070             {
1071                 while (*s == ' ' || *s == '\t') ++s;
1072                 *outf++ = ' ';
1073                 ++f;
1074             }
1075             else
1076             {
1077                 fprintf(stderr,"Bad format item %%%c\n",*f);
1078                 exit(1);
1079             }
1080         }
1081         else
1082         {
1083             if (*s != *f) return -1;
1084             *outf++ = *f;
1085             ++s;
1086             ++f;
1087         }
1088     }
1089 
1090     if (*s != '\0') return -1;
1091 
1092     *outf = '\0';
1093 
1094     if (oflow)
1095     {
1096         fprintf(stderr,"Integer too large\n");
1097         exit(1);
1098     }
1099     if (badgmp)
1100     {
1101         fprintf(stderr,"Illegal multiprecision integer\n");
1102         exit(1);
1103     }
1104 
1105     return n;
1106 }
1107 
1108 /****************************************************************************/
1109 
1110 void
1111 find_maxint(void)
1112 {
1113 /* Put the maximum possible integer value into maxint. */
1114 /* New version with no integer overflow. */
1115     integer x,y;
1116 
1117     x = ((integer)1) << (8*sizeof(integer) - 2);
1118     y = x - 1;
1119     x += y;
1120 
1121     if (x <= 0)
1122     {
1123         fprintf(stderr,">E find_maxint() failed\n");
1124         exit(1);
1125     }
1126 
1127     maxint = x;
1128 }
1129 
1130 /****************************************************************************/
1131 
1132 static void
1133 sort_formats(int *order, int numformats)
1134 /* Make order[0..numformats-1] a permutation of 0..numformats-1 being
1135    a good order to display the results. */
1136 {
1137     double score[MAXFORMATS];
1138     int h,i,j,iw;
1139 
1140     for (i = 0; i < numformats; ++i)
1141     {
1142         if (matching_lines[i] == 0)
1143             score[i] = -1.0;
1144         else
1145             score[i] = i +
1146                ((100.0*total_position[i]) / matching_lines[i]) * numformats;
1147         order[i] = i;
1148     }
1149 
1150     j = numformats / 3;
1151     h = 1;
1152     do
1153         h = 3 * h + 1;
1154     while (h < j);
1155 
1156     do
1157     {
1158         for (i = h; i < numformats; ++i)
1159         {
1160             iw = order[i];
1161             for (j = i; score[order[j-h]] > score[iw]; )
1162             {
1163                 order[j] = order[j-h];
1164                 if ((j -= h) < h) break;
1165             }
1166             order[j] = iw;
1167         }
1168         h /= 3;
1169     }
1170     while (h > 0);
1171 }
1172 
1173 /****************************************************************************/
1174 
1175 static void
1176 read_formats(char *filename, int *numformatsp, boolean mustexist)
1177 /* Read formats from the given file. */
1178 {
1179     FILE *f;
1180     int i,c,flags,ignore;
1181     char flagname[52];
1182     char line[MAXLINELEN+3];
1183     integer pmod;
1184     char *s;
1185     boolean oflow,badpmod;
1186     int digit;
1187 
1188     if (strcmp(filename,"-") == 0)
1189         f = stdin;
1190     else if ((f = fopen(filename,"r")) == NULL)
1191     {
1192         if (mustexist)
1193         {
1194             fprintf(stderr,">E Can't open %s for reading.\n",filename);
1195             exit(1);
1196         }
1197         return;
1198     }
1199 
1200     line[MAXLINELEN+2] = '\0';
1201 
1202     for (;;)
1203     {
1204         if ((c = getc(f)) == EOF) break;
1205 
1206         while (c == ' ' || c == '\t') c = getc(f);
1207         if (c == '\n') continue;
1208         if (c == EOF) break;
1209 
1210         if (c == '#')
1211         {
1212             while (c != '\n' && c != EOF) c = getc(f);
1213             continue;
1214         }
1215 
1216         ungetc(c,f);
1217 
1218         flags = 0;
1219         pmod = 2;
1220         for (;;)
1221         {
1222             while ((c = getc(f)) == ' '
1223                                 || c == '|' || c == ',' || c == '\t') {}
1224             if (c == '#')
1225                 while (c != '\n' && c != EOF) c = getc(f);
1226             if (c == '\n' || c == EOF) break;
1227 
1228             ungetc(c,f);
1229 
1230          /* There appear to be some issues with the [ flag in fscanf,
1231           * as to whether a null is appended.  We'll take no chances. */
1232             for (i = 0; i < 52; ++i) flagname[i] = '\0';
1233             ignore = fscanf(f,"%50[A-Za-z0-9=]",flagname);
1234 
1235             if      (strcmp(flagname,"DEFAULT") == 0)  {}
1236             else if (strcmp(flagname,"FINAL") == 0)    flags |= FINAL;
1237             else if (strcmp(flagname,"ERROR") == 0)    flags |= ERROR;
1238             else if (strcmp(flagname,"UNIQUE") == 0)   flags |= UNIQUE;
1239             else if (strcmp(flagname,"COUNT") == 0)    flags |= COUNT;
1240             else if (strcmp(flagname,"CONTINUE") == 0) flags |= CONTINUE;
1241             else if (strcmp(flagname,"NUMERIC") == 0)  flags |= NUMERIC;
1242             else if (strcmp(flagname,"SILENT") == 0)   flags |= SILENT;
1243             else if (strcmp(flagname,"ENDFILE") == 0)  flags |= ENDFILE;
1244             else if (flagname[0] == 'P' && flagname[1] == '=')
1245             {
1246                 pmod = 0;
1247                 oflow = FALSE;
1248                 badpmod = (flagname[2] == '\0');
1249                 for (s = flagname+2; *s != '\0'; ++s)
1250                 {
1251                     if (isdigit(*s))
1252                     {
1253                         digit =  *s - '0';
1254                         if (pmod > (maxint-digit)/10)
1255                             oflow = TRUE;
1256                         else
1257                             pmod = pmod*10 + digit;
1258                     }
1259                     else
1260                         badpmod = TRUE;
1261                 }
1262                 if (badpmod)
1263                 {
1264                     fprintf(stderr,">E Bad value for P= directive: %s\n",
1265                             flagname+2);
1266                     exit(1);
1267                 }
1268                 else if (oflow)
1269                 {
1270                     fprintf(stderr,">E Value for P= is too large\n");
1271                     exit(1);
1272                 }
1273 
1274             }
1275             else
1276             {
1277                 fprintf(stderr,">E Unknown flag \"%s\" in %s\n",
1278                                flagname,filename);
1279                 exit(1);
1280             }
1281         }
1282 
1283         if (fgets(line,MAXLINELEN,f) == NULL)
1284         {
1285             fprintf(stderr,">E Missing format in %s\n",filename);
1286             exit(1);
1287         }
1288 
1289         for (i = 0; i < *numformatsp; ++i)
1290             if (strcmp(line,format[i].fmt) == 0) break;
1291         if (i < *numformatsp) continue;
1292 
1293         if (*numformatsp == MAXFORMATS)
1294         {
1295             fprintf(stderr,">E Increase MAXFORMATS\n");
1296             exit(1);
1297         }
1298 
1299         format[*numformatsp].flags = flags;
1300         format[*numformatsp].pmod = pmod;
1301         if ((format[*numformatsp].fmt
1302                             = (char*)malloc(strlen(line)+1)) == NULL)
1303         {
1304             fprintf(stderr,">E malloc() failed in read_formats()\n");
1305             exit(1);
1306         }
1307         strcpy(format[*numformatsp].fmt,line);
1308         ++*numformatsp;
1309     }
1310 
1311     if (f != stdin) fclose(f);
1312 }
1313 
1314 /****************************************************************************/
1315 
1316 static void
1317 read_local_formats(int *numformatsp)
1318 /* Read formats from sumlines.fmt in current directory */
1319 {
1320         read_formats("sumlines.fmt",numformatsp,FALSE);
1321 }
1322 
1323 /****************************************************************************/
1324 
1325 static void
1326 read_global_formats(int *numformatsp)
1327 /* Read formats from sumlines.fmt in home directory */
1328 {
1329     struct passwd *pwd;
1330     char *homedir;
1331     char filename[4097];
1332 
1333     homedir = getenv("HOME");
1334     if (homedir == NULL && (pwd = getpwuid(getuid())) != NULL)
1335         homedir = pwd->pw_dir;
1336 
1337     if (homedir == NULL)
1338     {
1339         fprintf(stderr,">W Can't find home directory\n");
1340         return;
1341     }
1342 
1343     sprintf(filename,"%s/sumlines.fmt",homedir);
1344     read_formats(filename,numformatsp,FALSE);
1345 }
1346 
1347 /****************************************************************************/
1348 
1349 static void
1350 read_env_formats(int *numformatsp)
1351 /* Read formats from $SUMLINES.FMT if it exists */
1352 {
1353     char *filename;
1354 
1355     if ((filename = getenv("SUMLINES.FMT")) != 0)
1356         read_formats(filename,numformatsp,FALSE);
1357 }
1358 
1359 /****************************************************************************/
1360 
1361 static boolean
1362 readoneline(FILE *f, char *line, int size, int *nulls)
1363 /* Get a line.  Read at most size-1 chars until EOF or \n.
1364    If \n is read, it is stored.  Then \0 is appended.
1365    *nulls is set to the number of NUL chars (which are also stored). */
1366 {
1367     int i,c;
1368 
1369     *nulls = 0;
1370     for (i = 0; i < size-1; ++i)
1371     {
1372         c = getc(f);
1373         if (c == EOF) break;
1374         line[i] = c;
1375         if (c == '\0') ++*nulls;
1376         if (c == '\n') {++i; break;}
1377     }
1378     line[i] = '\0';
1379 
1380     return i > 0;
1381 }
1382 
1383 /****************************************************************************/
1384 
1385 static int
1386 pnumstrcmp(const void *a, const void *b)
1387 /* numstrcmp on strings pointed at by a and b */
1388 {
1389     return numstrcmp(*(char**)a,*(char**)b);
1390 }
1391 
1392 /****************************************************************************/
1393 
1394 static void
1395 doglob(char *patt, glob_t *globlk)
1396 /* Find all files matching the given pattern, numeric sorting.
1397    Give a warning message if there are none. */
1398 {
1399     int ret;
1400 
1401     ret = glob(patt,GLOB_FLAGS,NULL,globlk);
1402 
1403     if (ret != 0) globlk->gl_pathc = 0;
1404 
1405     if (ret == GLOB_NOSPACE)
1406     {
1407         fprintf(stderr,"ERROR: ran out of space during glob()\n");
1408         exit(1);
1409     }
1410     if (ret == GLOB_ERR)
1411     {
1412         fprintf(stderr,"ERROR: during glob(%s)\n",patt);
1413         exit(1);
1414     }
1415     if (ret != 0 && ret != GLOB_NOMATCH)
1416     {
1417         fprintf(stderr,"ERROR: value %d from glob(%s)\n",ret,patt);
1418         exit(1);
1419     }
1420 
1421 
1422     if (globlk->gl_pathc == 0) printf("WARNING: no files match %s\n",patt);
1423 
1424     if (globlk->gl_pathc >= 2)
1425         qsort(globlk->gl_pathv,globlk->gl_pathc,sizeof(char*),pnumstrcmp);
1426 }
1427 
1428 /****************************************************************************/
1429 
1430 int
1431 main(int argc, char *argv[])
1432 {
1433     int i,j,nvals,argnum;
1434     number val[MAXVALUES];
1435     int valtype[MAXVALUES];
1436     char line[MAXLINELEN+2];
1437     char outf[MAXLINELEN+MAXVALUES+6];
1438     unsigned long matched,unmatched,finalmatched;
1439     unsigned long errorlines,totalerrorlines;
1440     unsigned long line_number,nullcount,numfiles,ifile;
1441     char *filename;
1442     FILE *infile;
1443     int numformats,firstarg,nulls;
1444     boolean havefinal,nowarn,noWarn,listformats,readfiles;
1445     integer seq;
1446     int order[MAXFORMATS];
1447     glob_t globlk,globlk_stdin,*pglob;
1448     char *glob_stdin_v[2];
1449     boolean printcounts;
1450 
1451     HELP;
1452 
1453     find_maxint();
1454 
1455     firstarg = 1;
1456     numformats = 0;
1457     nowarn = noWarn = FALSE;
1458     listformats = FALSE;
1459     readfiles = TRUE;
1460     printcounts = TRUE;
1461 
1462     globlk_stdin.gl_pathc = 1;
1463     globlk_stdin.gl_pathv = glob_stdin_v;
1464     glob_stdin_v[0] = "-";
1465     glob_stdin_v[1] = NULL;
1466 
1467     dout = DOUT;
1468     fout = FOUT;
1469     vout = VOUT;
1470     Xout = FOUT;
1471     hmsout1 = HMSOUT1;
1472     hmsout2 = HMSOUT2;
1473 
1474     for (; firstarg < argc; ++firstarg)
1475     {
1476         if (argv[firstarg][0] == '-' && argv[firstarg][1] == 'f')
1477         {
1478             if (argv[firstarg][2] != '\0')
1479                 read_formats(&argv[firstarg][2],&numformats,TRUE);
1480             else if (firstarg == argc - 1)
1481             {
1482                 fprintf(stderr,">E No argument for -f\n");
1483                 exit(1);
1484             }
1485             else
1486             {
1487                 ++firstarg;
1488                 read_formats(argv[firstarg],&numformats,TRUE);
1489             }
1490         }
1491         else if (strcmp(argv[firstarg],"-W") == 0)
1492             noWarn = TRUE;
1493         else if (strcmp(argv[firstarg],"-w") == 0)
1494             nowarn = TRUE;
1495         else if (strcmp(argv[firstarg],"-v") == 0)
1496             listformats = TRUE;
1497         else if (strcmp(argv[firstarg],"-d") == 0)
1498             readfiles = FALSE;
1499         else if (strcmp(argv[firstarg],"-n") == 0)
1500             printcounts = FALSE;
1501         else if (strcmp(argv[firstarg],"-V") == 0)
1502             vout = argv[++firstarg];
1503         else if (strcmp(argv[firstarg],"-F") == 0)
1504             fout = argv[++firstarg];
1505         else if (strcmp(argv[firstarg],"-D") == 0)
1506             dout = argv[++firstarg];
1507         else
1508             break;
1509     }
1510 
1511 #if GMP
1512     for (i = 0; i < MAXVALUES; ++i) mpz_init(mp_value[i]);
1513 #endif
1514     for (i = 0; i < MAXVALUES; ++i)
1515     {
1516         il[i].nvals = 0;
1517         il[i].val = NULL;
1518     }
1519 
1520     if (noWarn) nowarn = TRUE;
1521 
1522     if (readfiles) read_local_formats(&numformats);
1523     if (readfiles) read_env_formats(&numformats);
1524     if (readfiles) read_global_formats(&numformats);
1525 
1526     if (listformats)
1527     {
1528         printf("%d formats:\n",numformats);
1529         for (i = 0; i < numformats; ++i)
1530             printf("%03x %s",format[i].flags,format[i].fmt);
1531     }
1532 
1533     if (numformats == 0)
1534     {
1535         fprintf(stderr,">E No formats\n");
1536         exit(1);
1537     }
1538 
1539     havefinal = FALSE;
1540     for (i = 0; i < numformats; ++i)
1541     {
1542         count_root[i] = NULL;
1543         matching_lines[i] = 0;
1544         total_position[i] = 0;
1545         if (HAS(i,FINAL)) havefinal = TRUE;
1546     }
1547 
1548     unmatched = totalerrorlines = 0;
1549     numfiles = 0;
1550 
1551     for (argnum = firstarg;
1552          argnum < (argc == firstarg ? argc+1 : argc); ++argnum)
1553     {
1554         if (argnum >= argc || strcmp(argv[argnum],"-") == 0)
1555             pglob = &globlk_stdin;
1556         else
1557         {
1558             pglob = &globlk;
1559             doglob(argv[argnum],pglob);
1560         }
1561 
1562         for (ifile = 0; ifile < pglob->gl_pathc; ++ifile)
1563         {
1564             matched = finalmatched = errorlines = 0;
1565             ++numfiles;
1566 
1567             if (strcmp(pglob->gl_pathv[ifile],"-") == 0)
1568             {
1569                 filename = "stdin";
1570                 infile = stdin;
1571             }
1572             else
1573             {
1574                 filename = pglob->gl_pathv[ifile];
1575                 if ((infile = fopen(filename,"r")) == NULL)
1576                 {
1577                     fprintf(stderr,">E Can't open %s\n",filename);
1578                     exit(1);
1579                 }
1580             }
1581 
1582             line_number = 0;
1583             nullcount = 0;
1584             while (readoneline(infile,line,MAXLINELEN,&nulls))
1585             {
1586                 nullcount += nulls;
1587                 line[MAXLINELEN] = '\n';
1588                 line[MAXLINELEN+1] = '\0';
1589                 if (line[0] == '\n') continue;
1590                 ++line_number;
1591 
1592                 for (i = 0; i < numformats; ++i)
1593                 {
1594                     nvals
1595                       = scanline(line,format[i].fmt,val,valtype,&seq,outf);
1596                     if (nvals >= 0)
1597                     {
1598                         if (HAS(i,ENDFILE)) line_number = 0;
1599                         ++matched;
1600                         if (HAS(i,FINAL)) ++finalmatched;
1601                         if (HAS(i,ERROR)) ++errorlines;
1602                         ++matching_lines[i];
1603                         total_position[i] += line_number;
1604                         add_one(&count_root[i],outf,format[i].pmod,nvals,
1605                              val,valtype,i,HAS(i,NUMERIC));
1606                         if (!noWarn && matching_lines[i] > 1 && seq >= 0
1607                                                && seq != lastseq[i]+1)
1608                         {
1609                             printf("WARNING: Sequence number");
1610                             if (seq == lastseq[i])
1611                             {
1612                                 printf(" ");
1613                                 printf(dout,seq);
1614                                 printf(" is repeated.\n");
1615                             }
1616                             else if (seq != lastseq[i]+2)
1617                             {
1618                                 printf("s ");
1619                                 printf(dout,lastseq[i]+1);
1620                                 printf("-");
1621                                 printf(dout,seq-1);
1622                                 printf(" are missing.\n");
1623                             }
1624                             else
1625                             {
1626                                 printf("  ");
1627                                 printf(dout,seq-1);
1628                                 printf(" is missing.\n");
1629                             }
1630                         }
1631                         lastseq[i] = seq;
1632                         if (!HAS(i,CONTINUE)) break;
1633                     }
1634                 }
1635 
1636                 if (i == numformats) ++unmatched;
1637             }
1638             if (errorlines != 0)
1639                 printf("ERRORS: Error lines in file %s\n",filename);
1640             else if (matched == 0 && !nowarn)
1641                 printf("WARNING: No matching lines in file %s\n",filename);
1642             else if (finalmatched == 0 && havefinal && !nowarn)
1643                 printf("WARNING: No final lines in file %s\n",filename);
1644             if (nullcount > 0)
1645                 printf("WARNING: %ld NULs found in file %s\n",
1646                                                         nullcount,filename);
1647             if (infile != stdin) fclose(infile);
1648 
1649             totalerrorlines += errorlines;
1650         }
1651         if (pglob == &globlk) globfree(pglob);
1652     }
1653 
1654     sort_formats(order,numformats);
1655 
1656     for (j = 0; j < numformats; ++j)
1657     {
1658         i = order[j];
1659         if (HAS(i,SILENT)) continue;
1660 
1661         if (HAS(i,COUNT))
1662         {
1663             if (matching_lines[i] > 0)
1664                 printf("%5lu lines matched ",matching_lines[i]);
1665             print_common(count_root[i]);
1666         }
1667         else
1668             print_counts(count_root[i],printcounts);
1669     }
1670 
1671     if (unmatched > 0)
1672         printf("%5lu non-empty lines not matched\n",unmatched);
1673     if (argc > firstarg) printf("%5lu files read altogether\n",numfiles);
1674     if (totalerrorlines > 0) printf("%5lu errors found\n",totalerrorlines);
1675 
1676     exit(0);
1677 }
1678