1 /*
2 System: Structured text retrieval tool sgrep.
3 Module: main.c
4 Author: Pekka Kilpel�inen & Jani Jaakkola
5 Description: Parsing of command line options,
6 Reading command files
7 Scanning for input files
8 Calling other modules for preprocessing, parsing,
9 pattern matching, evaluation and output
10 Version history: Original version February 1995 by JJ & PK
11 Copyright: University of Helsinki, Dept. of Computer Science
12 Distributed under GNU General Public Lisence
13 See file COPYING for details
14 */
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <unistd.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <string.h>
22 #include <sys/times.h>
23 #include "defines.h"
24
25 void check_files(int ,char *[],int);
26 void clear_stats();
27 void show_stats();
28 void show_times();
29 int get_options(char *[]);
30 void add_command(char *);
31 int read_stdin();
32 void read_expressions();
33 int environ_options();
34 void run_stream(struct TREE_NODE *, struct PHRASE_NODE *p_list);
35 void run_one_by_one(struct TREE_NODE *, struct PHRASE_NODE *p_list);
36 void create_constant_lists();
37 void copyright_notice();
38
39 /*
40 * The global variables common to all modules. See declarations in defines.h
41 */
42 char *output_style=SHORT_OUTPUT; /* default is short */
43 int open_failure=OPEN_FAILURE;
44 struct STATS stats;
45 int print_newline=TRUE;
46 int stdin_fd=-1; /* not opened yet */
47 int print_all=FALSE;
48 int gc_lists_now=0;
49 struct GC_LIST *end_list=NULL;
50 struct GC_LIST *start_list=NULL;
51 struct GC_LIST *chars_list=NULL;
52 int ignore_case=FALSE;
53 #ifdef STREAM_MODE
54 int stream_mode=TRUE;
55 #else
56 int stream_mode=FALSE;
57 #endif
58 #ifdef PROGRESS_REPORTS
59 int progress_output=FALSE;
60 #endif
61
62 /*
63 * Global variables used inside main.c . These are mainly used for storing
64 * information about given options
65 */
66 int have_stats=FALSE; /* Should we show statistics in the end (-T) */
67 int have_times=FALSE; /* Should we show info about used time in the end (-t) */
68 int do_concat=TRUE; /* Shall we do concat operation on result list (-d) */
69 int display_count=FALSE;/* Should we display only count of matching regions (-c) */
70 int no_output=FALSE; /* Should we supress normal output (-q) */
71 int command_file_given=FALSE; /* If a command file name was given with -f
72 option this is set, and no commands are read
73 from command line anymore */
74 int show_expr=FALSE; /* only show expression, don't execute it (-P) */
75 char *preprocessor=PRE_PROCESSOR; /* Which preprocessor to use (-p) */
76 int read_sgreprc; /* are we going to read sgreprc (-n) */
77
78 char com_buf[COMBUF_SIZE]; /* preprosessed command buffer */
79 char com_file_buf[COMBUF_SIZE]; /* not preprocessed command file buffer */
80 int com_buf_size; /* How much it is actually used */
81 int com_file_buf_used;
82 char *home_file; /* pointer to whole path of $HOME/USER_SGREPRC */
83 struct INPUT_FILE *input_files=NULL; /* Table of input files */
84 int last_file=0; /* Index of last input file */
85 int stdin_read=FALSE; /* Since expressions and files can both
86 be read from stdin, we got to make
87 sure that stdin is read only once */
88 /*
89 * struct for list of expression strings ( or files ) to be executed
90 */
91 struct EXPR_TYPE {
92 int type; /* If this is a file, or command line */
93 char *expr; /* Pointer to either filename or expression */
94 } expr_table[MAX_EXPRESSIONS];
95 int exprs; /* How many expressions there were */
96 enum EXPR_TYPES { E_FILE,E_TEXT };
97
98 /*
99 * Struct for time information
100 */
101 struct time_points {
102 struct tms start;
103 struct tms parsing;
104 struct tms acsearch;
105 struct tms eval;
106 struct tms output;
107 } tps;
108
109 /*
110 * The copyright notice text.
111 */
112 char *copyright_text[]={
113 "sgrep version "VERSION" - search a file for structured pattern",
114 "Copyright (C) 1996 University of Helsinki",
115 "",
116 "This program is free software; you can redistribute it and/or modify",
117 "it under the terms of the GNU General Public License as published by",
118 "the Free Software Foundation; either version 2 of the License, or",
119 "(at your option) any later version.",
120 "",
121 "This program is distributed in the hope that it will be useful,",
122 "but WITHOUT ANY WARRANTY; without even the implied warranty of",
123 "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the",
124 "GNU General Public License for more details.",
125 "",
126 "You should have received a copy of the GNU General Public License",
127 "along with this program; if not, write to the Free Software",
128 "Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.",
129 "",
130 "Authors: Pekka Kilpel�inen Pekka.Kilpelainen@cc.helsinki.fi",
131 " Jani Jaakkola Jani.Jaakkola@cc.helsinki.fi",
132 NULL
133 };
134
135 /*
136 * Struct for options
137 */
138 struct opt_data {
139 char opt;
140 char *have_param;
141 char *what_does;
142 };
143
144 /*
145 * List & description of options
146 * If you add more options, add the descriptions here. Put the implementation
147 * of option in get_options()
148 */
149 struct opt_data options[]= {
150 { 'a',NULL,"act as a filter" },
151 { 'C',NULL,"display copyright notice" },
152 { 'c',NULL,"display only count of matching regions" },
153 #ifdef PROGRESS_REPORTS
154 { 'D',NULL,"verbose, show progress" },
155 #endif
156 { 'd',NULL,"don't do concat on result list"},
157 { 'h',NULL,"help (means this text)" },
158 { 'i',NULL,"ignore case distinctions in phrases" },
159 { 'l',NULL,"long output format" },
160 { 'N',NULL,"don't add trailing newline" },
161 { 'n',NULL,"don't read $HOME/"USER_SGREPRC" or "SYSTEM_SGREPRC},
162 { 'P',NULL,"show preprocessed expression, don't execute it." },
163 { 'q',NULL,"supress normal output" },
164 { 'S',NULL,"stream mode (regions extend across files)"},
165 { 's',NULL,"short output format" },
166 { 'T',NULL,"show statistics about what was done" },
167 { 't',NULL,"show information about time spent"},
168 { 'V',NULL,"display version information" },
169 { 'e',"<expression>","execute expression (after preprocessing)" },
170 { 'f',"<file>","reads commands from file" },
171 { 'O',"<file>","reads output style from file"},
172 { 'o',"<style>","set output style. See man page for details"},
173 #ifdef USE_EXEC
174 { 'p',"<program>","preprocess expression using external preprocessor" },
175 #endif
176 { 0,NULL,NULL }
177 };
178
main(int argc,char * argv[])179 int main(int argc, char *argv[])
180 {
181 struct TREE_NODE concat;
182 struct TREE_NODE *root;
183 struct PHRASE_NODE *p_list;
184 int end_options;
185 char *hp;
186
187 /* Initialize the statistics gathering struct */
188 clear_stats();
189 times(&tps.start);
190
191 com_buf_size=0;
192 com_file_buf_used=0;
193
194 /*
195 * If we are going to use either $HOME/sgreprc or system/sgreprc,
196 * we must check if we can read those files
197 */
198 read_sgreprc=FALSE;
199 hp=getenv("HOME");
200 if (hp!=NULL)
201 {
202 home_file=(char *)e_malloc(strlen(hp)+strlen(USER_SGREPRC)+1);
203 strcpy(home_file,hp);
204 strcat(home_file,"/");
205 strcat(home_file,USER_SGREPRC);
206 if (access(home_file,R_OK)==0)
207 {
208 expr_table[0].type=E_FILE;
209 expr_table[0].expr=home_file;
210 read_sgreprc=TRUE;
211 }
212 }
213 if (!read_sgreprc && access(SYSTEM_SGREPRC,R_OK)==0)
214 {
215 expr_table[0].type=E_FILE;
216 expr_table[0].expr=SYSTEM_SGREPRC;
217 read_sgreprc=TRUE;
218 }
219 exprs=1;
220
221 /*
222 * Process environment options
223 */
224 end_options=environ_options();
225
226 /*
227 * Get the command line options
228 */
229 if (end_options!=-1 )
230 end_options=get_options(argv+1);
231 if ( end_options!=-1 && !command_file_given && argv[end_options]==NULL )
232 {
233 /* we need some expression to process */
234 fprintf(stderr,"You have to give an expression line if you don't use -f or -e switch.\n");
235 end_options=-1;
236 }
237 if (end_options==-1)
238 {
239 /* There was error. Let's print usage information */
240 struct opt_data *o=options;
241 fprintf(stderr,"Usage: sgrep [ -");
242 while (o->opt!=0)
243 {
244 if (o->have_param!=NULL)
245 {
246 fprintf(stderr," -%c %s",
247 o->opt,o->have_param);
248 } else fprintf(stderr,"%c",o->opt);
249 o++;
250 }
251 fprintf(stderr," ] \'expr\' [<files...>]\n");
252 fprintf(stderr,"sgrep -h for help\n");
253 exit(2);
254 }
255
256 /*
257 * Shall we get expression from command line
258 */
259 if (!command_file_given)
260 {
261 expr_table[exprs].type=E_TEXT;
262 expr_table[exprs++].expr=(argv[end_options]);
263 end_options++;
264 }
265
266 /*
267 * Reading all expressions to buffer
268 */
269 read_expressions();
270
271 /*
272 * Invoking preprocessor (external because there is no internal yet)
273 */
274 #ifdef DEBUG
275 fprintf(stderr,"Preprocessing expression.\n");
276 #endif
277 preprocess(com_file_buf,com_buf,preprocessor,COMBUF_SIZE);
278
279 /*
280 * If we have show_expr then we show preprocessed expression, and
281 * stop here
282 */
283 if (show_expr)
284 {
285 fprintf(stderr,"%s\n",com_buf);
286 exit(0);
287 }
288
289 /*
290 * Should we read stdin to temp file
291 */
292 if (argv[end_options]==NULL)
293 {
294 #ifdef DEBUG
295 fprintf(stderr,"Reading stdin.\n");
296 #endif
297 if (read_stdin()==0)
298 {
299 fprintf(stderr,"Empty stdin\n");
300 exit(2);
301 }
302 } else
303 /*
304 * If stdin is not used, we check every input file
305 */
306 {
307 #ifdef DEBUG
308 fprintf(stderr,"Scanning through files.");
309 #endif
310 check_files(argc,argv,end_options);
311 }
312
313 /*
314 * Counting the input size
315 */
316 stats.input_size=input_files[last_file-1].start+
317 input_files[last_file-1].length-1;
318
319 /*
320 * Creating constant lists
321 */
322 create_constant_lists();
323
324 /*
325 * Invoking parser
326 */
327 #ifdef DEBUG
328 fprintf(stderr,"Starting parser.\n");
329 #endif
330 root=parse_string(com_buf,&p_list);
331
332 /*
333 * Optimize the operator tree
334 */
335 #ifdef DEBUG
336 fprintf(stderr,"Optimizing operator tree\n");
337 #endif
338 optimize_tree(&root,&p_list);
339
340 if (do_concat)
341 {
342 /* If we do concat on result list, we have to add it to parse tree */
343 concat.oper=CONCAT;
344 concat.left=root;
345 concat.right=NULL;
346 concat.leaf=NULL;
347 concat.parent=NULL;
348 concat.refcount=1;
349 concat.GC_list=NULL;
350 root=&concat;
351 };
352
353 times(&tps.parsing);
354
355 /*
356 * Evaluation style depends on stream_mode
357 */
358 if (stream_mode)
359 run_stream(root,p_list);
360 else
361 run_one_by_one(root,p_list);
362
363 /*
364 * Should we show statistics
365 */
366 if (have_stats) show_stats();
367
368 /*
369 * Should we show information about time spend
370 */
371 if (have_times) show_times();
372
373 if (stats.output==0)
374 return 1; /* Empty result list */
375 /* non empty result list */
376 return 0;
377 }
378
379 /*
380 * Runs sgrep file by file
381 */
run_one_by_one(struct TREE_NODE * root,struct PHRASE_NODE * p_list)382 void run_one_by_one(struct TREE_NODE *root, struct PHRASE_NODE *p_list)
383 {
384 struct GC_LIST *result;
385 int i;
386 int save_print_newline;
387 struct tms t_pmatch,t_eval,t_output,t_last,t_now;
388
389 #define CALC_TIME(TIME) do { \
390 times(&t_now); \
391 (TIME).tms_utime+=t_now.tms_utime-t_last.tms_utime; \
392 (TIME).tms_stime+=t_now.tms_stime-t_last.tms_stime; \
393 t_last=t_now; } while (0)
394
395 t_last=tps.parsing;
396 t_pmatch.tms_utime=0;
397 t_pmatch.tms_stime=0;
398 t_eval=t_pmatch;
399 t_output=t_pmatch;
400
401 #ifdef DEBUG
402 fprintf(stderr,"one by one: input_files=%d\n",last_file);
403 #endif
404 save_print_newline=print_newline;
405 print_newline=FALSE;
406
407 for (i=0;i<last_file;i++)
408 {
409 #ifdef DEBUG
410 fprintf(stderr,"file #%d:%s\n",i,input_files[i].name);
411 #endif
412 /* We got to clear root nodes gc list so that eval won't think
413 that it's already evaluated */
414 root->GC_list=NULL;
415
416 /* end is now the size of file now being evaluated */
417 end_list->first->list[0].start=input_files[i].length-1;
418 end_list->first->list[0].end=input_files[i].length-1;
419 /* chars list size is the size of file being evaluates */
420 chars_list->length=input_files[i].length;
421
422 ACsearch(p_list,&input_files[i],1);
423 CALC_TIME(t_pmatch);
424
425 result=eval(root);
426 stats.output+=LIST_SIZE(result);
427 CALC_TIME(t_eval);
428
429 if (i==last_file-1) print_newline=save_print_newline;
430 if ( !display_count && !no_output && (
431 LIST_SIZE(result)>0 || print_all ))
432 {
433 show_gc_list(result,&input_files[i],1);
434 }
435
436 /* We free result list,except when we got constant list
437 as result list */
438 if (gc_lists_now==stats.constant_lists+1)
439 {
440 free_gclist(result);
441 }
442 CALC_TIME(t_output);
443
444 #ifdef ASSERT
445 /*
446 * Now should only constant lists be left
447 */
448 assert(gc_lists_now==stats.constant_lists);
449 #endif
450 }
451 if ( display_count && !no_output )
452 {
453 printf("%d\n",stats.output);
454 }
455 fflush(stdout);
456
457 tps.acsearch=tps.parsing;
458 tps.acsearch.tms_utime+=t_pmatch.tms_utime;
459 tps.acsearch.tms_stime+=t_pmatch.tms_stime;
460 tps.eval=tps.acsearch;
461 tps.eval.tms_utime+=t_eval.tms_utime;
462 tps.eval.tms_stime+=t_eval.tms_stime;
463 tps.output=tps.eval;
464 tps.output.tms_utime+=t_output.tms_utime;
465 tps.output.tms_stime+=t_output.tms_stime;
466
467
468 }
469
470 #undef DEBUG
471 /*
472 * Runs sgrep in stream mode
473 */
run_stream(struct TREE_NODE * root,struct PHRASE_NODE * p_list)474 void run_stream(struct TREE_NODE *root, struct PHRASE_NODE *p_list)
475 {
476 struct GC_LIST *result;
477
478 /* Pattern matching on input files */
479 #ifdef DEBUG
480 fprintf(stderr,"Starting ACsearch\n");
481 #endif
482 ACsearch(p_list,input_files,last_file);
483 times(&tps.acsearch);
484
485 /* Evaluate the expression */
486 #ifdef DEBUG
487 fprintf(stderr,"Evaluating.\n");
488 #endif
489 result=eval(root);
490 #ifdef ASSERT
491 assert(gc_lists_now<=4);
492 #endif
493 times(&tps.eval);
494
495 /* Outputting result */
496 #ifdef DEBUG
497 fprintf(stderr,"Output result.\n");
498 #endif
499 fflush(stderr);
500
501 stats.output=LIST_SIZE(result);
502 /* Should we show the count of matching regions */
503 if ( display_count )
504 {
505 printf("%d\n",LIST_SIZE(result));
506 }
507 /* We show result list only if there wasn't -c option, and there was
508 something to output */
509 if ( !display_count && !no_output && (
510 stats.output>0 || print_all ))
511 show_gc_list(result,input_files,last_file);
512 fflush(stdout);
513 times(&tps.output);
514 }
515
516 /*
517 * Prints help
518 */
print_help()519 void print_help()
520 {
521 int i;
522
523 printf("Usage: sgrep <options> 'region expression' [<files...>]\n");
524 printf("If no files are given stdin is used instead.\n");
525 printf("\noptions are:\n");
526 for (i=0;options[i].opt!=0;i++)
527 {
528 printf("\t-%c %s\t%s\n",
529 options[i].opt,
530 (options[i].have_param==NULL) ?
531 (char *)"\t":
532 options[i].have_param,
533 options[i].what_does);
534 }
535 printf("\t--\t\tno more options\n");
536 printf("Options can also be specified with "ENV_OPTIONS" environment variable\n");
537 printf("\nCopyright (C) 1996 University of Helsinki. Use sgrep -C for details,\n\n");
538 exit(0);
539 }
540
541 /*
542 * Creates and initializes the constant lists, start end and chars.
543 * They may need to be modified later, because when scanning each
544 * file separately end point keeps changing
545 */
create_constant_lists()546 void create_constant_lists()
547 {
548 /* start list always is just (0,0) */
549 start_list=new_gclist();
550 add_region(start_list,0,0);
551
552 /* if in one by one mode, end lists region will be changed
553 to the file size being evaluated */
554 end_list=new_gclist();
555 add_region(end_list,stats.input_size,stats.input_size);
556
557 /* Chars list is optimized and created in a special way */
558 chars_list=new_gclist();
559 to_chars(chars_list,1);
560
561 stats.constant_lists+=3;
562 }
563
564 /*
565 * Returns argument given to option like -o <arg> or -o<arg>
566 */
get_arg(char * (* argv[]),int * i,int * j)567 char *get_arg(char *(*argv[]),int *i,int *j)
568 {
569 char *r;
570
571 if ((*(*argv))[*j+1]==0)
572 {
573 if ( ((*argv)[1])==NULL )
574 {
575 fprintf(stderr,"-%c requires an argument\n",
576 (**argv)[*j]);
577 exit (2);
578 }
579 r=*(++(*argv));
580 (*i)++;
581 *j=strlen(r)-1;
582 }
583 else {
584 r=&(*(*argv))[(*j)+1];
585 *j=strlen(*(*argv))-1;
586 }
587 #ifdef DEBUG
588 fprintf(stderr,"Got argument %s\n",r);
589 #endif
590 return r;
591 }
592
593 /*
594 * Adds a command to com_file_buf
595 */
add_command(char * com)596 void add_command(char *com)
597 {
598 if (COMBUF_SIZE-com_file_buf_used < (int)strlen(com)+2)
599 {
600 fprintf(stderr,"Expression too long (>%d)\n",COMBUF_SIZE);
601 exit(2);
602 }
603 strcpy(&com_file_buf[com_file_buf_used],com);
604 com_file_buf_used+=strlen(com);
605 }
606
607 /*
608 * Reads command file to command buffer
609 */
read_com_file(char * fname)610 void read_com_file(char *fname)
611 {
612 int i;
613 int size;
614 int r;
615
616 if (fname[0]=='-' && fname[1]==0)
617 {
618 /* Commands are coming from stdin */
619 if (stdin_read) {
620 fprintf(stderr,
621 "Stdin already read, Can't read expressions from stdin\n");
622 exit(2);
623 }
624 stdin_read=TRUE;
625 i=0;
626 }
627 else {
628 i=open(fname,O_RDONLY);
629 if (i==-1)
630 {
631 fprintf(stderr,"Command file %s : %s\n",
632 fname,strerror(errno));
633 exit(2);
634 }
635 }
636 size=0;
637
638 /*
639 * When reading from file this loop is done only once
640 * When reading from pipe (file descriptor==i==0 )
641 * this loop is done as long as there is input coming
642 */
643 do {
644 r=read(i,&com_file_buf[com_file_buf_used+size],
645 COMBUF_SIZE-com_file_buf_used-size);
646 if ( r==-1 )
647 {
648 perror("Read command file");
649 exit(2);
650 }
651 if ( r==0 && ( i!=0 || (i==0 && size==0) ) )
652 {
653 fprintf(stderr,"Empty command file %s\n",fname);
654 exit(2);
655 }
656 size+=r;
657 } while ( i==0 && r!=0 );
658
659 if ( size-2>COMBUF_SIZE-com_file_buf_used)
660 {
661 fprintf(stderr,"Expression too long (>%d)\n",COMBUF_SIZE);
662 exit(2);
663 }
664 com_file_buf_used+=size;
665 command_file_given=TRUE;
666 if (i!=0) close(i);
667 }
668
669 /*
670 * Reads the expression commands to com_file_buf
671 */
read_expressions()672 void read_expressions()
673 {
674 int i;
675
676 i= (read_sgreprc) ? 0:1;
677 #ifdef ASSERT
678 assert(exprs>0);
679 #endif
680 while (i<exprs)
681 {
682 switch(expr_table[i].type){
683 case E_FILE:
684 read_com_file(expr_table[i].expr);
685 break;
686 case E_TEXT:
687 add_command(expr_table[i].expr);
688 break;
689 default:
690 fprintf(stderr,"Strange expression type\n");
691 exit(3);
692 break;
693 }
694 /* If there wasn't nl between command expressions we add one */
695 if (com_file_buf_used>0)
696 if (com_file_buf[com_file_buf_used-1]!='\n')
697 com_file_buf[com_file_buf_used++]='\n';
698 i++;
699 }
700 com_file_buf[com_file_buf_used]=0;
701 }
702
703 /*
704 * Reads output style from file
705 */
read_style_file(char * fname)706 void read_style_file(char *fname)
707 {
708 int fd;
709 int l,r;
710
711 fd=open(fname,O_RDONLY);
712 if (fd==-1)
713 {
714 fprintf(stderr,"open style file %s : %s\n",fname,strerror(errno));
715 exit(2);
716 }
717 l=lseek(fd,0,SEEK_END);
718 if (l==-1)
719 {
720 fprintf(stderr,"lseek style file %s : %s\n",fname,strerror(errno));
721 exit(2);
722 }
723 lseek(fd,0,SEEK_SET);
724 output_style=(char *)e_malloc(l+1);
725 r=read(fd,output_style,l);
726 if (r==-1)
727 {
728 fprintf(stderr,"read style file %s : %s\n",fname,strerror(errno));
729 exit(2);
730 }
731 if (r==0)
732 {
733 fprintf(stderr,"Empty style file %s\n",fname);
734 exit(2);
735 }
736 output_style[r]=0;
737 close(fd);
738 }
739
740 /*
741 * Checks the command line options
742 */
get_options(char * argv[])743 int get_options(char *argv[])
744 {
745 int o,i,j;
746
747 i=1;
748 j=1;
749
750 while ( *argv!=NULL && *argv[0]=='-' )
751 {
752 /* option -- means no more options */
753 if (strcmp(*argv,"--")==0) return i+1;
754 o=0;
755 while (options[o].opt!=0)
756 {
757 if (options[o].opt==(*argv)[j]) break;
758 o++;
759 }
760 switch((*argv)[j])
761 {
762 case 'h':
763 print_help();
764 break;
765 case 'V':
766 printf("sgrep version %s compiled at %s\n",
767 VERSION,__DATE__);
768 exit(0);
769 break;
770 case 'T':
771 have_stats=TRUE;
772 break;
773 case 't':
774 have_times=TRUE;
775 break;
776 case 'a':
777 print_all=TRUE;
778 break;
779 case 'i':
780 ignore_case=TRUE;
781 break;
782 case 'l':
783 output_style=LONG_OUTPUT;
784 do_concat=FALSE;
785 break;
786 case 's':
787 output_style=SHORT_OUTPUT;
788 do_concat=TRUE;
789 break;
790 case 'o':
791 output_style=get_arg(&argv,&i,&j);
792 do_concat=FALSE;
793 break;
794 case 'c':
795 display_count=TRUE;
796 do_concat=FALSE;
797 no_output=FALSE;
798 break;
799 case 'd':
800 do_concat=FALSE;
801 break;
802 case 'N':
803 print_newline=FALSE;
804 break;
805 case 'C':
806 copyright_notice();
807 exit(0);
808 break;
809 case 'f':
810 if (exprs==MAX_EXPRESSIONS)
811 {
812 fprintf(stderr,
813 "too many expressions. (-e and -f options more than %d)\n",
814 MAX_EXPRESSIONS);
815 exit(2);
816 }
817 expr_table[exprs].expr=get_arg(&argv,&i,&j);
818 expr_table[exprs++].type=E_FILE;
819 command_file_given=TRUE;
820 break;
821 case 'e':
822 if (exprs==MAX_EXPRESSIONS)
823 {
824 fprintf(stderr,
825 "too many expressions. (-e and -f options more than %d)\n",
826 MAX_EXPRESSIONS);
827 exit(2);
828 }
829 expr_table[exprs].expr=get_arg(&argv,&i,&j);
830 expr_table[exprs++].type=E_TEXT;
831 command_file_given=TRUE;
832 break;
833 case 'p':
834 preprocessor=get_arg(&argv,&i,&j);
835 break;
836 case 'n':
837 read_sgreprc=FALSE;
838 break;
839 case 'O':
840 read_style_file(get_arg(&argv,&i,&j));
841 break;
842 case 'P':
843 show_expr=TRUE;
844 break;
845 #ifdef PROGRESS_REPORTS
846 case 'D':
847 progress_output=TRUE;
848 break;
849 #endif
850 case 'S':
851 stream_mode=TRUE;
852 break;
853
854 case 'q':
855 no_output=TRUE;
856 break;
857
858 /* case '�':
859 fprintf(stderr,"Option not implemented yet.\n");
860 exit(2);
861 break; */
862 default:
863 fprintf(stderr,"Illegal option -%c\n",(*argv)[j]);
864 return -1;
865 break;
866 }
867 if ((*argv)[++j]==0)
868 {
869 argv++;
870 i++;
871 j=1;
872 }
873 }
874 return i;
875 }
876
877 /*
878 * Clears the stats struct which we use for gathering statistical information
879 */
clear_stats()880 void clear_stats()
881 {
882 /* Everything is zero. At least so far */
883 memset(&stats,0,sizeof(stats));
884 }
885
886 /*
887 * Shows the statistics ( from stats struct )
888 */
show_stats()889 void show_stats()
890 {
891 fprintf(stderr,
892 "Scanned %d files, having total of %dK size finding %d phrases.\n",
893 last_file,
894 (input_files[last_file-1].start+
895 input_files[last_file-1].length)/1024,
896 stats.phrases);
897 fprintf(stderr,"Operator tree size was %d, optimized %d\n",
898 stats.tree_size,stats.tree_size-stats.opt_nodes);
899 fprintf(stderr,"Output list size was %d regions.\n",stats.output);
900 fprintf(stderr,
901 "Operations:\n%15s:%-4d%6s:%-4d%5s:%-4d%5s:%-4d%11s:%-4d%3s:%-4d\n",
902 "containing",stats.containing,
903 "in",stats.in,
904 "order",stats.order,
905 "or",stats.or,
906 "extracting",stats.extracting,
907 "quote",stats.quote);
908 fprintf(stderr,"%15s:%-4d%6s:%-4d%5s:%-4d%5s:%-4d%11s:%-4d%4s:%-4d\n",
909 "not containing",stats.not_containing,
910 "not in",stats.not_in,
911 "inner",stats.inner,
912 "outer",stats.outer,
913 "concat",stats.concat,
914 "join",stats.join);
915 fprintf(stderr,"%15s:%-4d%6s:%-4d\n",
916 "equal",stats.equal,
917 "not equal",stats.not_equal);
918 fprintf(stderr,"Memory:\n %dK memory allocated, %d realloc operations\n",
919 stats.e_mallocs/1024,stats.reallocs);
920 fprintf(stderr," %d gc lists, %d gc lists allocated\n",
921 stats.gc_lists,stats.gc_lists_allocated);
922 fprintf(stderr," %d gc blocks used, %d gc blocks allocated.\n",
923 stats.gc_nodes,stats.gc_nodes_allocated);
924 fprintf(stderr," Longest list size was %d regions.\n",
925 stats.longest_list);
926 fprintf(stderr,
927 " %dK nest stack size, %dK inner tablesize\n",
928 stats.nest_stacksize/1024,
929 stats.inner_tablesize/1024);
930 fprintf(stderr,
931 #ifdef REMOVE_DUPLICATES
932 "Things done:\n %d %s, %d %s, %d %s\n %d %s, %d %s, %d %s\n",
933 #else
934 "Things done:\n %d %s, %d %s, %d %s\n %d %s, %d %s\n",
935 #endif
936 stats.regions,"regions created",
937 stats.scans,"gc lists scanned",
938 stats.scanned_regions,"regions scanned",
939 stats.sorts_by_start,"sorts by start point",
940 stats.sorts_by_end,"sorts by end point"
941 #ifdef REMOVE_DUPLICATES
942 ,stats.remove_duplicates,"remove duplicates"
943 #endif
944 );
945 #ifdef OPTIMIZE_SORTS
946 fprintf(stderr," %d sorts optimized\n",stats.sorts_optimized);
947 #endif
948 if (stats.skipped_phrases)
949 {
950 fprintf(stderr," %d same phrases\n",stats.skipped_phrases);
951 }
952 }
953
954 /*
955 * Checks that files which are given in the command line really exist.
956 * If open_failure==true nonexistent files are considered fatal.
957 * Creates input_file list, skipping zero length files
958 */
check_files(int argc,char * argv[],int optind)959 void check_files(int argc, char *argv[], int optind)
960 {
961 int fd,ls,r=0;
962 int pos;
963 char buf[1];
964
965 input_files=(struct INPUT_FILE *)
966 e_malloc( sizeof(struct INPUT_FILE) * (argc-optind) );
967 last_file=0;
968 pos=0;
969 ls=0;
970 while (optind<argc)
971 {
972 if (strcmp(argv[optind],"-")==0)
973 {
974 optind++;
975 /* We try to read stdin */
976 pos+=read_stdin();
977 continue;
978 }
979 #ifdef DEBUG
980 fprintf(stderr,"checking file %s\n",argv[optind]);
981 #endif
982 /* We do sgrep only on files which we can open,read, lseek
983 and which are not empty */
984 fd=open(argv[optind],O_RDONLY);
985 if (fd!=-1) r=read(fd,buf,1);
986 if (fd!=-1 && r!=-1 ) ls=lseek(fd,0,SEEK_END);
987 if (fd==-1 || ls==-1 || r==-1 )
988 {
989 fprintf(stderr,"sgrep: %s: %s\n",argv[optind],strerror(errno));
990 if (open_failure) exit(2);
991 } else if (ls>0)
992 {
993 input_files[last_file].start=pos;
994 input_files[last_file].length=ls;
995 input_files[last_file].name=argv[optind];
996 pos+=ls;
997 last_file++;
998 }
999 close(fd);
1000 optind++;
1001 }
1002 if (last_file==0)
1003 {
1004 fprintf(stderr,"No valid files\n");
1005 exit(2);
1006 }
1007 }
1008
1009 /*
1010 * Calculates the difference between two times in seconds
1011 * and returns it
1012 */
calc_time(clock_t b,clock_t e)1013 float calc_time(clock_t b,clock_t e)
1014 {
1015 static long clktck=0;
1016
1017 if (clktck==0) clktck=sysconf(_SC_CLK_TCK);
1018 if (clktck<0) return 0;
1019
1020 return ((float)(e-b)/(float)clktck);
1021 }
1022
1023 /*
1024 * Prints a nice looking line of time information with label
1025 */
print_time(char * label,struct tms * b,struct tms * e)1026 void print_time(char *label,struct tms *b,struct tms *e)
1027 {
1028 float sys,usr;
1029
1030 usr=calc_time(b->tms_utime,e->tms_utime);
1031 sys=calc_time(b->tms_stime,e->tms_stime);
1032 fprintf(stderr," %-18s%6.2fs %6.2fs %6.2fs\n",label,usr,sys,usr+sys);;
1033 }
1034
1035 /*
1036 * Prints information about time used to stderr
1037 */
show_times()1038 void show_times()
1039 {
1040 fprintf(stderr,"%-18s%8s%8s%8s\n",
1041 "sgrep time usage","usr","sys","total");
1042 print_time("parsing",&tps.start,&tps.parsing);
1043 print_time("acsearch",&tps.parsing,&tps.acsearch);
1044 print_time("evaluating",&tps.acsearch,&tps.eval);
1045 print_time("output",&tps.eval,&tps.output);
1046 fprintf(stderr," -----------------------------------------\n");
1047 print_time("total",&tps.start,&tps.output);
1048 if (tps.output.tms_cutime>0)
1049 {
1050 fprintf(stderr,"\n");
1051 print_time("preprocessor",
1052 (struct tms *)&tps.start.tms_cutime,
1053 (struct tms *)&tps.output.tms_cutime);
1054 }
1055
1056 }
1057
1058 /*
1059 * Reads stdin to a temp file. Leaves temp file open and stdin_fd pointing
1060 * to it. File name will be NULL. Unlinks temp file, so that it will be
1061 * removed when program exits.
1062 * returns size of input file read
1063 */
read_stdin()1064 int read_stdin()
1065 {
1066 char buf[4096];
1067 char *temp_file;
1068 int r,w;
1069 static int length=-1;
1070
1071 if (length==0)
1072 return 0; /* If stdin was already read, and was empty */
1073
1074 if ( input_files==NULL )
1075 {
1076 /*
1077 * If input_files hasn't been malloced, there is none,
1078 * and we read only from stdin
1079 */
1080 input_files= (struct INPUT_FILE *)
1081 e_malloc(sizeof(*input_files));
1082 last_file=0;
1083 }
1084 if (last_file==0)
1085 {
1086 input_files[0].start=0;
1087 } else
1088 {
1089 input_files[last_file].start=
1090 input_files[last_file-1].start+
1091 input_files[last_file-1].length;
1092 }
1093 input_files[last_file].name=NULL;
1094 if (length>0)
1095 {
1096 /* We have already read stdin, so we just return what
1097 we already know */
1098 input_files[last_file++].length=length;
1099 return length;
1100 }
1101
1102 if (stdin_read) {
1103 /*
1104 * Somebody had already used stdin for something
1105 */
1106 fprintf(stderr,
1107 "Can't read input from stdin, it's already used\n");
1108 exit(2);
1109 }
1110 /* We read stdin to temporary file */
1111 temp_file=tmpnam(NULL);
1112 stdin_fd=open(temp_file,O_RDWR | O_CREAT);
1113 if (unlink(temp_file)==-1)
1114 {
1115 perror("sgrep warning: unlinking temp file failed");
1116 }
1117 if (stdin_fd==-1)
1118 {
1119 perror("creating tempfile: open");
1120 exit(2);
1121 }
1122 length=0;
1123 while ( (r=read(0,buf,4096))!=0 )
1124 {
1125 if (r==-1)
1126 {
1127 perror("read stdin");
1128 exit(2);
1129 }
1130 length+=r;
1131 w=write(stdin_fd,buf,r);
1132 if (w==-1)
1133 {
1134 perror("write tempfile");
1135 exit(2);
1136 }
1137 if (w!=r)
1138 {
1139 fprintf(stderr,"Short write to tempfile\n");
1140 exit(2);
1141 }
1142 }
1143 if (length>0)
1144 {
1145 return (input_files[last_file++].length=length);
1146 }
1147 return 0;
1148 }
1149
1150 /*
1151 * Reads the options from environ variable ENV_OPTIONS
1152 */
environ_options()1153 int environ_options()
1154 {
1155 char *av[100];
1156 int i=0;
1157 int j=0;
1158 char *o;
1159
1160 if (getenv(ENV_OPTIONS)==NULL) return 0;
1161
1162 o=(char *)e_malloc(strlen(getenv(ENV_OPTIONS)+1));
1163 strcpy(o,getenv(ENV_OPTIONS));
1164
1165 do {
1166 while( o[i]==' ' )
1167 {
1168 o[i++]=0;
1169 }
1170 if (!o[i]) break;
1171 av[j++]=&o[i];
1172 if (j==100)
1173 {
1174 fprintf(stderr,"Too complex "ENV_OPTIONS"\n");
1175 exit(2);
1176 }
1177 while( o[i]!=' ' && o[i]!=0 ) i++;
1178 } while (o[i]);
1179 av[j]=NULL;
1180
1181 #ifdef DEBUG
1182 fprintf(stderr,"Environment options: ");
1183 for (i=0;av[i]!=NULL;i++)
1184 {
1185 fprintf(stderr,"'%s' ",av[i]);
1186 }
1187 fprintf(stderr,"\n");
1188 #endif
1189 i=get_options(av);
1190 if (i==-1)
1191 {
1192 fprintf(stderr,"Invalid "ENV_OPTIONS" ("ENV_OPTIONS"=%s)\n",getenv(ENV_OPTIONS));
1193 return -1;
1194 }
1195 if (i<=j)
1196 {
1197 fprintf(stderr,"No files or expressions allowed in "ENV_OPTIONS"\n");
1198 return -1;
1199 }
1200 return 0;
1201 }
1202
1203 /*
1204 * Displays the copyright notice.
1205 */
copyright_notice()1206 void copyright_notice()
1207 {
1208 int i;
1209
1210 for (i=0;copyright_text[i]!=NULL;i++)
1211 {
1212 printf("\t%s\n",copyright_text[i]);
1213 }
1214 }
1215