1 /*
2 	System: Structured text retrieval tool sgrep.
3 	Module: main.c
4 	Author: Pekka Kilpel�inen & Jani Jaakkola
5 	Description: Parsing of command line options,
6 		     Reading command files
7 		     Scanning for input files
8 		     Calling other modules for preprocessing, parsing,
9 		     	pattern matching, evaluation and output
10 	Version history: Original version February 1995 by JJ & PK
11 	Copyright: University of Helsinki, Dept. of Computer Science
12 		   Distributed under GNU General Public Lisence
13 		   See file COPYING for details
14 */
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <unistd.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <string.h>
22 #include <sys/times.h>
23 #include "defines.h"
24 
25 void check_files(int ,char *[],int);
26 void clear_stats();
27 void show_stats();
28 void show_times();
29 int get_options(char *[]);
30 void add_command(char *);
31 int read_stdin();
32 void read_expressions();
33 int environ_options();
34 void run_stream(struct TREE_NODE *, struct PHRASE_NODE *p_list);
35 void run_one_by_one(struct TREE_NODE *, struct PHRASE_NODE *p_list);
36 void create_constant_lists();
37 void copyright_notice();
38 
39 /*
40  * The global variables common to all modules. See declarations in defines.h
41  */
42 char *output_style=SHORT_OUTPUT; /* default is short */
43 int open_failure=OPEN_FAILURE;
44 struct STATS stats;
45 int print_newline=TRUE;
46 int stdin_fd=-1;	/* not opened yet */
47 int print_all=FALSE;
48 int gc_lists_now=0;
49 struct GC_LIST *end_list=NULL;
50 struct GC_LIST *start_list=NULL;
51 struct GC_LIST *chars_list=NULL;
52 int ignore_case=FALSE;
53 #ifdef STREAM_MODE
54  int stream_mode=TRUE;
55 #else
56  int stream_mode=FALSE;
57 #endif
58 #ifdef PROGRESS_REPORTS
59 int progress_output=FALSE;
60 #endif
61 
62 /*
63  * Global variables used inside main.c . These are mainly used for storing
64  * information about given options
65  */
66 int have_stats=FALSE;	/* Should we show statistics in the end (-T) */
67 int have_times=FALSE;   /* Should we show info about used time in the end (-t) */
68 int do_concat=TRUE;	/* Shall we do concat operation on result list (-d) */
69 int display_count=FALSE;/* Should we display only count of matching regions (-c) */
70 int no_output=FALSE;	/* Should we supress normal output (-q) */
71 int command_file_given=FALSE; /* If a command file name was given with -f
72 				option this is set, and no commands are read
73 				from command line anymore */
74 int show_expr=FALSE;		/* only show expression, don't execute it (-P) */
75 char *preprocessor=PRE_PROCESSOR; /* Which preprocessor to use (-p) */
76 int read_sgreprc; 		/* are we going to read sgreprc (-n) */
77 
78 char com_buf[COMBUF_SIZE]; 	/* preprosessed command buffer */
79 char com_file_buf[COMBUF_SIZE]; /* not preprocessed command file buffer */
80 int com_buf_size; 		/* How much it is actually used */
81 int com_file_buf_used;
82 char *home_file; 		/* pointer to whole path of $HOME/USER_SGREPRC */
83 struct INPUT_FILE *input_files=NULL; /* Table of input files */
84 int last_file=0;                 /* Index of last input file */
85 int stdin_read=FALSE;		/* Since expressions and files can both
86 				   be read from stdin, we got to make
87 				   sure that stdin is read only once */
88 /*
89  * struct for list of expression strings ( or files ) to be executed
90  */
91 struct EXPR_TYPE {
92 	int type; 		/* If this is a file, or command line */
93 	char *expr; 		/* Pointer to either filename or expression */
94 } expr_table[MAX_EXPRESSIONS];
95 int exprs;			/* How many expressions there were */
96 enum EXPR_TYPES { E_FILE,E_TEXT };
97 
98 /*
99  * Struct for time information
100  */
101 struct time_points {
102 	struct tms start;
103 	struct tms parsing;
104 	struct tms acsearch;
105 	struct tms eval;
106 	struct tms output;
107 } tps;
108 
109 /*
110  * The copyright notice text.
111  */
112 char *copyright_text[]={
113 	"sgrep version "VERSION" - search a file for structured pattern",
114 	"Copyright (C) 1996  University of Helsinki",
115 	"",
116 	"This program is free software; you can redistribute it and/or modify",
117 	"it under the terms of the GNU General Public License as published by",
118 	"the Free Software Foundation; either version 2 of the License, or",
119 	"(at your option) any later version.",
120 	"",
121 	"This program is distributed in the hope that it will be useful,",
122 	"but WITHOUT ANY WARRANTY; without even the implied warranty of",
123 	"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the",
124 	"GNU General Public License for more details.",
125 	"",
126 	"You should have received a copy of the GNU General Public License",
127 	"along with this program; if not, write to the Free Software",
128 	"Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.",
129 	"",
130 	"Authors: Pekka Kilpel�inen       Pekka.Kilpelainen@cc.helsinki.fi",
131 	"         Jani Jaakkola           Jani.Jaakkola@cc.helsinki.fi",
132 	NULL
133 };
134 
135 /*
136  * Struct for options
137  */
138 struct opt_data {
139 	char opt;
140 	char *have_param;
141 	char *what_does;
142 };
143 
144 /*
145  * List & description of options
146  * If you add more options, add the descriptions here. Put the implementation
147  * of option in get_options()
148  */
149 struct opt_data options[]= {
150 	{ 'a',NULL,"act as a filter" },
151 	{ 'C',NULL,"display copyright notice" },
152 	{ 'c',NULL,"display only count of matching regions" },
153 #ifdef PROGRESS_REPORTS
154 	{ 'D',NULL,"verbose, show progress" },
155 #endif
156 	{ 'd',NULL,"don't do concat on result list"},
157 	{ 'h',NULL,"help (means this text)" },
158 	{ 'i',NULL,"ignore case distinctions in phrases" },
159 	{ 'l',NULL,"long output format" },
160 	{ 'N',NULL,"don't add trailing newline" },
161 	{ 'n',NULL,"don't read $HOME/"USER_SGREPRC" or "SYSTEM_SGREPRC},
162 	{ 'P',NULL,"show preprocessed expression, don't execute it." },
163 	{ 'q',NULL,"supress normal output" },
164 	{ 'S',NULL,"stream mode (regions extend across files)"},
165 	{ 's',NULL,"short output format" },
166 	{ 'T',NULL,"show statistics about what was done" },
167 	{ 't',NULL,"show information about time spent"},
168 	{ 'V',NULL,"display version information" },
169 	{ 'e',"<expression>","execute expression (after preprocessing)" },
170 	{ 'f',"<file>","reads commands from file" },
171 	{ 'O',"<file>","reads output style from file"},
172 	{ 'o',"<style>","set output style. See man page for details"},
173 #ifdef USE_EXEC
174 	{ 'p',"<program>","preprocess expression using external preprocessor" },
175 #endif
176 	{ 0,NULL,NULL }
177 };
178 
main(int argc,char * argv[])179 int main(int argc, char *argv[])
180 {
181 	struct TREE_NODE concat;
182 	struct TREE_NODE *root;
183 	struct PHRASE_NODE *p_list;
184 	int end_options;
185 	char *hp;
186 
187 	/* Initialize the statistics gathering struct */
188 	clear_stats();
189 	times(&tps.start);
190 
191 	com_buf_size=0;
192 	com_file_buf_used=0;
193 
194 	/*
195 	 * If we are going to use either $HOME/sgreprc or system/sgreprc,
196 	 *  we must check if we can read those files
197 	 */
198 	read_sgreprc=FALSE;
199 	hp=getenv("HOME");
200 	if (hp!=NULL)
201 	{
202 		home_file=(char *)e_malloc(strlen(hp)+strlen(USER_SGREPRC)+1);
203 		strcpy(home_file,hp);
204 		strcat(home_file,"/");
205 		strcat(home_file,USER_SGREPRC);
206 		if (access(home_file,R_OK)==0)
207 		{
208 			expr_table[0].type=E_FILE;
209 			expr_table[0].expr=home_file;
210 			read_sgreprc=TRUE;
211 		}
212 	}
213 	if (!read_sgreprc && access(SYSTEM_SGREPRC,R_OK)==0)
214 	{
215 		expr_table[0].type=E_FILE;
216 		expr_table[0].expr=SYSTEM_SGREPRC;
217 		read_sgreprc=TRUE;
218 	}
219 	exprs=1;
220 
221 	/*
222 	 * Process environment options
223 	 */
224 	end_options=environ_options();
225 
226 	/*
227 	 * Get the command line options
228 	 */
229 	if (end_options!=-1 )
230 		end_options=get_options(argv+1);
231 	if ( end_options!=-1 && !command_file_given && argv[end_options]==NULL )
232 	{
233 		/* we need some expression to process */
234 		fprintf(stderr,"You have to give an expression line if you don't use -f or -e switch.\n");
235 		end_options=-1;
236 	}
237 	if (end_options==-1)
238 	{
239 		/* There was error. Let's print usage information */
240 		struct opt_data *o=options;
241 		fprintf(stderr,"Usage: sgrep [ -");
242 		while (o->opt!=0)
243 		{
244 			if (o->have_param!=NULL)
245 			{
246 				fprintf(stderr," -%c %s",
247 					o->opt,o->have_param);
248 	         		} else fprintf(stderr,"%c",o->opt);
249 			o++;
250 		}
251 		fprintf(stderr," ] \'expr\' [<files...>]\n");
252 		fprintf(stderr,"sgrep -h for help\n");
253 		exit(2);
254 	}
255 
256 	/*
257 	 * Shall we get expression from command line
258 	 */
259 	if (!command_file_given)
260 	{
261 		expr_table[exprs].type=E_TEXT;
262 		expr_table[exprs++].expr=(argv[end_options]);
263 		end_options++;
264 	}
265 
266 	/*
267 	 * Reading all expressions to buffer
268 	 */
269 	read_expressions();
270 
271 	/*
272 	 * Invoking preprocessor (external because there is no internal yet)
273 	 */
274 #ifdef DEBUG
275 	fprintf(stderr,"Preprocessing expression.\n");
276 #endif
277 	preprocess(com_file_buf,com_buf,preprocessor,COMBUF_SIZE);
278 
279 	/*
280 	 * If we have show_expr then we show preprocessed expression, and
281 	 * stop here
282 	 */
283 	if (show_expr)
284 	{
285 		fprintf(stderr,"%s\n",com_buf);
286 		exit(0);
287 	}
288 
289 	/*
290 	 * Should we read stdin to temp file
291 	 */
292 	if (argv[end_options]==NULL)
293 	{
294 #ifdef DEBUG
295 		fprintf(stderr,"Reading stdin.\n");
296 #endif
297 		if (read_stdin()==0)
298 		{
299 			fprintf(stderr,"Empty stdin\n");
300 			exit(2);
301 		}
302 	} else
303 	/*
304 	 * If stdin is not used, we check every input file
305 	 */
306 	{
307 #ifdef DEBUG
308 			fprintf(stderr,"Scanning through files.");
309 #endif
310 		check_files(argc,argv,end_options);
311 	}
312 
313 	/*
314 	 * Counting the input size
315 	 */
316 	stats.input_size=input_files[last_file-1].start+
317 		input_files[last_file-1].length-1;
318 
319 	/*
320 	 * Creating constant lists
321 	 */
322 	create_constant_lists();
323 
324 	/*
325 	 * Invoking parser
326 	 */
327 #ifdef DEBUG
328 	fprintf(stderr,"Starting parser.\n");
329 #endif
330 	root=parse_string(com_buf,&p_list);
331 
332 	/*
333 	 * Optimize the operator tree
334 	 */
335 #ifdef DEBUG
336 	fprintf(stderr,"Optimizing operator tree\n");
337 #endif
338 	optimize_tree(&root,&p_list);
339 
340 	if (do_concat)
341 	{
342 	/* If we do concat on result list, we have to add it to parse tree */
343 		concat.oper=CONCAT;
344 		concat.left=root;
345 		concat.right=NULL;
346 		concat.leaf=NULL;
347 		concat.parent=NULL;
348 		concat.refcount=1;
349 		concat.GC_list=NULL;
350 		root=&concat;
351 	};
352 
353 	times(&tps.parsing);
354 
355 	/*
356 	 * Evaluation style depends on stream_mode
357 	 */
358 	if (stream_mode)
359 		run_stream(root,p_list);
360 	else
361 		run_one_by_one(root,p_list);
362 
363 	/*
364 	 * Should we show statistics
365 	 */
366 	if (have_stats) show_stats();
367 
368 	/*
369 	 * Should we show information about time spend
370 	 */
371 	if (have_times) show_times();
372 
373 	if (stats.output==0)
374 		return 1; /* Empty result list */
375 	/* non empty result list */
376 	return 0;
377 }
378 
379 /*
380  * Runs sgrep file by file
381  */
run_one_by_one(struct TREE_NODE * root,struct PHRASE_NODE * p_list)382 void run_one_by_one(struct TREE_NODE *root, struct PHRASE_NODE *p_list)
383 {
384 	struct GC_LIST *result;
385 	int i;
386 	int save_print_newline;
387 	struct tms t_pmatch,t_eval,t_output,t_last,t_now;
388 
389 #define CALC_TIME(TIME)	do { \
390 	times(&t_now);  \
391 	(TIME).tms_utime+=t_now.tms_utime-t_last.tms_utime; \
392 	(TIME).tms_stime+=t_now.tms_stime-t_last.tms_stime; \
393 	t_last=t_now; } while (0)
394 
395 	t_last=tps.parsing;
396 	t_pmatch.tms_utime=0;
397 	t_pmatch.tms_stime=0;
398 	t_eval=t_pmatch;
399 	t_output=t_pmatch;
400 
401 #ifdef DEBUG
402 	fprintf(stderr,"one by one: input_files=%d\n",last_file);
403 #endif
404 	save_print_newline=print_newline;
405 	print_newline=FALSE;
406 
407 	for (i=0;i<last_file;i++)
408 	{
409 #ifdef DEBUG
410 		fprintf(stderr,"file #%d:%s\n",i,input_files[i].name);
411 #endif
412 		/* We got to clear root nodes gc list so that eval won't think
413 		   that it's already evaluated */
414 		root->GC_list=NULL;
415 
416 		/* end is now the size of file now being evaluated */
417 		end_list->first->list[0].start=input_files[i].length-1;
418 		end_list->first->list[0].end=input_files[i].length-1;
419 		/* chars list size is the size of file being evaluates */
420 		chars_list->length=input_files[i].length;
421 
422 		ACsearch(p_list,&input_files[i],1);
423 		CALC_TIME(t_pmatch);
424 
425 		result=eval(root);
426 		stats.output+=LIST_SIZE(result);
427 		CALC_TIME(t_eval);
428 
429 		if (i==last_file-1) print_newline=save_print_newline;
430 		if ( !display_count && !no_output && (
431 			LIST_SIZE(result)>0 || print_all ))
432 		{
433 			show_gc_list(result,&input_files[i],1);
434 		}
435 
436 		/* We free result list,except when we got constant list
437 		   as result list */
438 		if (gc_lists_now==stats.constant_lists+1)
439 		{
440 			free_gclist(result);
441 		}
442 		CALC_TIME(t_output);
443 
444 #ifdef ASSERT
445 		/*
446 		 * Now should only constant lists be left
447 		 */
448 		assert(gc_lists_now==stats.constant_lists);
449 #endif
450 	}
451 	if ( display_count && !no_output )
452 	{
453 		printf("%d\n",stats.output);
454 	}
455 	fflush(stdout);
456 
457 	tps.acsearch=tps.parsing;
458 	tps.acsearch.tms_utime+=t_pmatch.tms_utime;
459 	tps.acsearch.tms_stime+=t_pmatch.tms_stime;
460 	tps.eval=tps.acsearch;
461 	tps.eval.tms_utime+=t_eval.tms_utime;
462 	tps.eval.tms_stime+=t_eval.tms_stime;
463 	tps.output=tps.eval;
464 	tps.output.tms_utime+=t_output.tms_utime;
465 	tps.output.tms_stime+=t_output.tms_stime;
466 
467 
468 }
469 
470 #undef DEBUG
471 /*
472  * Runs sgrep in stream mode
473  */
run_stream(struct TREE_NODE * root,struct PHRASE_NODE * p_list)474 void run_stream(struct TREE_NODE *root, struct PHRASE_NODE *p_list)
475 {
476 	struct GC_LIST *result;
477 
478 	/* Pattern matching on input files */
479 #ifdef DEBUG
480 	fprintf(stderr,"Starting ACsearch\n");
481 #endif
482 	ACsearch(p_list,input_files,last_file);
483 	times(&tps.acsearch);
484 
485 	/* Evaluate the expression */
486 #ifdef DEBUG
487 	fprintf(stderr,"Evaluating.\n");
488 #endif
489 	result=eval(root);
490 #ifdef ASSERT
491 	assert(gc_lists_now<=4);
492 #endif
493 	times(&tps.eval);
494 
495 	/* Outputting result */
496 #ifdef DEBUG
497 	fprintf(stderr,"Output result.\n");
498 #endif
499 	fflush(stderr);
500 
501 	stats.output=LIST_SIZE(result);
502 	/* Should we show the count of matching regions */
503 	if ( display_count )
504 	{
505 		printf("%d\n",LIST_SIZE(result));
506 	}
507 	/* We show result list only if there wasn't -c option, and there was
508 	   something to output */
509 	if ( !display_count && !no_output && (
510 			stats.output>0 || print_all ))
511 		show_gc_list(result,input_files,last_file);
512 	fflush(stdout);
513 	times(&tps.output);
514 }
515 
516 /*
517  * Prints help
518  */
print_help()519 void print_help()
520 {
521 	int i;
522 
523 	printf("Usage: sgrep <options> 'region expression' [<files...>]\n");
524 	printf("If no files are given stdin is used instead.\n");
525 	printf("\noptions are:\n");
526 	for (i=0;options[i].opt!=0;i++)
527 	{
528 		printf("\t-%c %s\t%s\n",
529 			options[i].opt,
530 			(options[i].have_param==NULL) ?
531 				(char *)"\t":
532 				options[i].have_param,
533 			options[i].what_does);
534 	}
535 	printf("\t--\t\tno more options\n");
536 	printf("Options can also be specified with "ENV_OPTIONS" environment variable\n");
537 	printf("\nCopyright (C) 1996 University of Helsinki. Use sgrep -C for details,\n\n");
538 	exit(0);
539 }
540 
541 /*
542  * Creates and initializes the constant lists, start end and chars.
543  * They may need to be modified later, because when scanning each
544  * file separately end point keeps changing
545  */
create_constant_lists()546 void create_constant_lists()
547 {
548 	/* start list always is just (0,0) */
549 	start_list=new_gclist();
550 	add_region(start_list,0,0);
551 
552 	/* if in one by one mode, end lists region will be changed
553 	   to the file size being evaluated */
554 	end_list=new_gclist();
555 	add_region(end_list,stats.input_size,stats.input_size);
556 
557 	/* Chars list is optimized and created in a special way */
558 	chars_list=new_gclist();
559 	to_chars(chars_list,1);
560 
561 	stats.constant_lists+=3;
562 }
563 
564 /*
565  * Returns argument given to option like -o <arg> or -o<arg>
566  */
get_arg(char * (* argv[]),int * i,int * j)567 char *get_arg(char *(*argv[]),int *i,int *j)
568 {
569 	char *r;
570 
571 	if ((*(*argv))[*j+1]==0)
572 	{
573 		if ( ((*argv)[1])==NULL )
574 		{
575 			fprintf(stderr,"-%c requires an argument\n",
576 					(**argv)[*j]);
577 			exit (2);
578 		}
579 		r=*(++(*argv));
580 		(*i)++;
581 		*j=strlen(r)-1;
582 	}
583 	else {
584 		r=&(*(*argv))[(*j)+1];
585 		*j=strlen(*(*argv))-1;
586 	}
587 #ifdef DEBUG
588 	fprintf(stderr,"Got argument %s\n",r);
589 #endif
590 	return r;
591 }
592 
593 /*
594  * Adds a command to com_file_buf
595  */
add_command(char * com)596 void add_command(char *com)
597 {
598 	if (COMBUF_SIZE-com_file_buf_used < (int)strlen(com)+2)
599 	{
600 		fprintf(stderr,"Expression too long (>%d)\n",COMBUF_SIZE);
601 		exit(2);
602 	}
603 	strcpy(&com_file_buf[com_file_buf_used],com);
604 	com_file_buf_used+=strlen(com);
605 }
606 
607 /*
608  * Reads command file to command buffer
609  */
read_com_file(char * fname)610 void read_com_file(char *fname)
611 {
612 	int i;
613 	int size;
614 	int r;
615 
616 	if (fname[0]=='-' && fname[1]==0)
617 	{
618 		/* Commands are coming from stdin */
619 		if (stdin_read) {
620 			fprintf(stderr,
621 		"Stdin already read, Can't read expressions from stdin\n");
622 			exit(2);
623 		}
624 		stdin_read=TRUE;
625 		i=0;
626 	}
627 	else {
628 	 	i=open(fname,O_RDONLY);
629 		if (i==-1)
630 		{
631 			fprintf(stderr,"Command file %s : %s\n",
632 				fname,strerror(errno));
633 			exit(2);
634 		}
635 	}
636 	size=0;
637 
638 	/*
639 	 * When reading from file this loop is done only once
640 	 * When reading from pipe (file descriptor==i==0 )
641 	 * this loop is done as long as there is input coming
642 	 */
643 	do {
644 		r=read(i,&com_file_buf[com_file_buf_used+size],
645 			COMBUF_SIZE-com_file_buf_used-size);
646 		if ( r==-1 )
647 		{
648 			perror("Read command file");
649 			exit(2);
650 		}
651 		if ( r==0 && ( i!=0 ||  (i==0 && size==0) ) )
652 		{
653 			fprintf(stderr,"Empty command file %s\n",fname);
654 			exit(2);
655 		}
656 		size+=r;
657 	} while ( i==0 && r!=0 );
658 
659 	if ( size-2>COMBUF_SIZE-com_file_buf_used)
660 	{
661 		fprintf(stderr,"Expression too long (>%d)\n",COMBUF_SIZE);
662 		exit(2);
663 	}
664 	com_file_buf_used+=size;
665 	command_file_given=TRUE;
666 	if (i!=0) close(i);
667 }
668 
669 /*
670  * Reads the expression commands to com_file_buf
671  */
read_expressions()672 void read_expressions()
673 {
674 	int i;
675 
676 	i= (read_sgreprc) ? 0:1;
677 #ifdef ASSERT
678 	assert(exprs>0);
679 #endif
680 	while (i<exprs)
681 	{
682 		switch(expr_table[i].type){
683 		case E_FILE:
684 			read_com_file(expr_table[i].expr);
685 			break;
686 		case E_TEXT:
687 			add_command(expr_table[i].expr);
688 			break;
689 		default:
690 			fprintf(stderr,"Strange expression type\n");
691 			exit(3);
692 			break;
693 		}
694 		/* If there wasn't nl between command expressions we add one */
695 		if (com_file_buf_used>0)
696 			if (com_file_buf[com_file_buf_used-1]!='\n')
697 			 com_file_buf[com_file_buf_used++]='\n';
698 		i++;
699 	}
700 	com_file_buf[com_file_buf_used]=0;
701 }
702 
703 /*
704  * Reads output style from file
705  */
read_style_file(char * fname)706 void read_style_file(char *fname)
707 {
708 	int fd;
709 	int l,r;
710 
711 	fd=open(fname,O_RDONLY);
712 	if (fd==-1)
713 	{
714 		fprintf(stderr,"open style file %s : %s\n",fname,strerror(errno));
715 		exit(2);
716 	}
717 	l=lseek(fd,0,SEEK_END);
718 	if (l==-1)
719 	{
720 		fprintf(stderr,"lseek style file %s : %s\n",fname,strerror(errno));
721 		exit(2);
722 	}
723 	lseek(fd,0,SEEK_SET);
724 	output_style=(char *)e_malloc(l+1);
725 	r=read(fd,output_style,l);
726 	if (r==-1)
727 	{
728 		fprintf(stderr,"read style file %s : %s\n",fname,strerror(errno));
729 		exit(2);
730 	}
731 	if (r==0)
732 	{
733 		fprintf(stderr,"Empty style file %s\n",fname);
734 		exit(2);
735 	}
736 	output_style[r]=0;
737 	close(fd);
738 }
739 
740 /*
741  * Checks the command line options
742  */
get_options(char * argv[])743 int get_options(char *argv[])
744 {
745 	int o,i,j;
746 
747 	i=1;
748 	j=1;
749 
750 	while ( *argv!=NULL && *argv[0]=='-' )
751 	{
752 		/* option -- means no more options */
753 		if (strcmp(*argv,"--")==0) return i+1;
754 		o=0;
755 		while (options[o].opt!=0)
756 		{
757 			if (options[o].opt==(*argv)[j]) break;
758 			o++;
759 		}
760 		switch((*argv)[j])
761 		{
762 		case 'h':
763 			print_help();
764 			break;
765 		case 'V':
766 			printf("sgrep version %s compiled at %s\n",
767 				VERSION,__DATE__);
768 			exit(0);
769 			break;
770 		case 'T':
771 			have_stats=TRUE;
772 			break;
773 		case 't':
774 			have_times=TRUE;
775 			break;
776 		case 'a':
777 			print_all=TRUE;
778 			break;
779 		case 'i':
780 			ignore_case=TRUE;
781 			break;
782 		case 'l':
783 			output_style=LONG_OUTPUT;
784 			do_concat=FALSE;
785 			break;
786 		case 's':
787 			output_style=SHORT_OUTPUT;
788 			do_concat=TRUE;
789 			break;
790 		case 'o':
791 			output_style=get_arg(&argv,&i,&j);
792 			do_concat=FALSE;
793 			break;
794 		case 'c':
795 			display_count=TRUE;
796 			do_concat=FALSE;
797 			no_output=FALSE;
798 			break;
799 		case 'd':
800 			do_concat=FALSE;
801 			break;
802 		case 'N':
803 			print_newline=FALSE;
804 			break;
805 		case 'C':
806 			copyright_notice();
807 			exit(0);
808 			break;
809 		case 'f':
810 			if (exprs==MAX_EXPRESSIONS)
811 			{
812 				fprintf(stderr,
813 	"too many expressions. (-e and -f options more than %d)\n",
814 				MAX_EXPRESSIONS);
815 				exit(2);
816 			}
817 			expr_table[exprs].expr=get_arg(&argv,&i,&j);
818 			expr_table[exprs++].type=E_FILE;
819 			command_file_given=TRUE;
820 			break;
821 		case 'e':
822 			if (exprs==MAX_EXPRESSIONS)
823 			{
824 				fprintf(stderr,
825 	"too many expressions. (-e and -f options more than %d)\n",
826 				MAX_EXPRESSIONS);
827 				exit(2);
828 			}
829 			expr_table[exprs].expr=get_arg(&argv,&i,&j);
830 			expr_table[exprs++].type=E_TEXT;
831 			command_file_given=TRUE;
832 			break;
833 		case 'p':
834 			preprocessor=get_arg(&argv,&i,&j);
835 			break;
836 		case 'n':
837 			read_sgreprc=FALSE;
838 			break;
839 		case 'O':
840 			read_style_file(get_arg(&argv,&i,&j));
841 			break;
842 		case 'P':
843 			show_expr=TRUE;
844 			break;
845 #ifdef PROGRESS_REPORTS
846 		case 'D':
847 			progress_output=TRUE;
848 			break;
849 #endif
850 		case 'S':
851 			stream_mode=TRUE;
852 			break;
853 
854 		case 'q':
855 			no_output=TRUE;
856 			break;
857 
858 /*		case '�':
859 			fprintf(stderr,"Option not implemented yet.\n");
860 			exit(2);
861 			break; */
862 		default:
863 			fprintf(stderr,"Illegal option -%c\n",(*argv)[j]);
864 			return -1;
865 			break;
866 		}
867 		if ((*argv)[++j]==0)
868 		{
869 			argv++;
870 			i++;
871 			j=1;
872 		}
873 	}
874 	return i;
875 }
876 
877 /*
878  * Clears the stats struct which we use for gathering statistical information
879  */
clear_stats()880 void clear_stats()
881 {
882 	/* Everything is zero. At least so far */
883 	memset(&stats,0,sizeof(stats));
884 }
885 
886 /*
887  * Shows the statistics ( from stats struct )
888  */
show_stats()889 void show_stats()
890 {
891 	fprintf(stderr,
892 	"Scanned %d files, having total of %dK size finding %d phrases.\n",
893 		last_file,
894 		(input_files[last_file-1].start+
895 			input_files[last_file-1].length)/1024,
896 		stats.phrases);
897 	fprintf(stderr,"Operator tree size was %d, optimized %d\n",
898 		stats.tree_size,stats.tree_size-stats.opt_nodes);
899 	fprintf(stderr,"Output list size was %d regions.\n",stats.output);
900 	fprintf(stderr,
901 		"Operations:\n%15s:%-4d%6s:%-4d%5s:%-4d%5s:%-4d%11s:%-4d%3s:%-4d\n",
902 		"containing",stats.containing,
903 		"in",stats.in,
904 		"order",stats.order,
905 		"or",stats.or,
906 		"extracting",stats.extracting,
907 		"quote",stats.quote);
908 	fprintf(stderr,"%15s:%-4d%6s:%-4d%5s:%-4d%5s:%-4d%11s:%-4d%4s:%-4d\n",
909 		"not containing",stats.not_containing,
910 		"not in",stats.not_in,
911 		"inner",stats.inner,
912 		"outer",stats.outer,
913 		"concat",stats.concat,
914 		"join",stats.join);
915 	fprintf(stderr,"%15s:%-4d%6s:%-4d\n",
916 		"equal",stats.equal,
917 		"not equal",stats.not_equal);
918 	fprintf(stderr,"Memory:\n %dK memory allocated, %d realloc operations\n",
919 		stats.e_mallocs/1024,stats.reallocs);
920 	fprintf(stderr," %d gc lists, %d gc lists allocated\n",
921 		stats.gc_lists,stats.gc_lists_allocated);
922   	fprintf(stderr," %d gc blocks used, %d gc blocks allocated.\n",
923 		stats.gc_nodes,stats.gc_nodes_allocated);
924 	fprintf(stderr," Longest list size was %d regions.\n",
925 		stats.longest_list);
926 	fprintf(stderr,
927 			" %dK nest stack size, %dK inner tablesize\n",
928 			stats.nest_stacksize/1024,
929 			stats.inner_tablesize/1024);
930 	fprintf(stderr,
931 #ifdef REMOVE_DUPLICATES
932 		"Things done:\n %d %s, %d %s, %d %s\n %d %s, %d %s, %d %s\n",
933 #else
934 		"Things done:\n %d %s, %d %s, %d %s\n %d %s, %d %s\n",
935 #endif
936 		stats.regions,"regions created",
937 		stats.scans,"gc lists scanned",
938 		stats.scanned_regions,"regions scanned",
939 		stats.sorts_by_start,"sorts by start point",
940 		stats.sorts_by_end,"sorts by end point"
941 #ifdef REMOVE_DUPLICATES
942 		,stats.remove_duplicates,"remove duplicates"
943 #endif
944 		);
945 #ifdef OPTIMIZE_SORTS
946 	fprintf(stderr," %d sorts optimized\n",stats.sorts_optimized);
947 #endif
948 	if (stats.skipped_phrases)
949 	{
950 		fprintf(stderr," %d same phrases\n",stats.skipped_phrases);
951 	}
952 }
953 
954 /*
955  * Checks that files which are given in the command line really exist.
956  * If open_failure==true nonexistent files are considered fatal.
957  * Creates input_file list, skipping zero length files
958  */
check_files(int argc,char * argv[],int optind)959 void check_files(int argc, char *argv[], int optind)
960 {
961 	int fd,ls,r=0;
962 	int pos;
963 	char buf[1];
964 
965 	input_files=(struct INPUT_FILE *)
966 		e_malloc( sizeof(struct INPUT_FILE) * (argc-optind) );
967 	last_file=0;
968 	pos=0;
969 	ls=0;
970 	while (optind<argc)
971 	{
972 		if (strcmp(argv[optind],"-")==0)
973 		{
974 			optind++;
975 			/* We try to read stdin */
976 			pos+=read_stdin();
977 			continue;
978 		}
979 #ifdef DEBUG
980 		fprintf(stderr,"checking file %s\n",argv[optind]);
981 #endif
982 		/* We do sgrep only on files which we can open,read, lseek
983 		   and which are not empty */
984 		fd=open(argv[optind],O_RDONLY);
985 		if (fd!=-1) r=read(fd,buf,1);
986 		if (fd!=-1 && r!=-1 ) ls=lseek(fd,0,SEEK_END);
987 		if (fd==-1 || ls==-1 || r==-1 )
988 		{
989 			fprintf(stderr,"sgrep: %s: %s\n",argv[optind],strerror(errno));
990 			if (open_failure) exit(2);
991 		} else if (ls>0)
992 		{
993 			input_files[last_file].start=pos;
994 			input_files[last_file].length=ls;
995 			input_files[last_file].name=argv[optind];
996 			pos+=ls;
997 			last_file++;
998 		}
999 		close(fd);
1000 		optind++;
1001 	}
1002 	if (last_file==0)
1003 	{
1004 		fprintf(stderr,"No valid files\n");
1005 		exit(2);
1006 	}
1007 }
1008 
1009 /*
1010  * Calculates the difference between two times in seconds
1011  * and returns it
1012  */
calc_time(clock_t b,clock_t e)1013 float calc_time(clock_t b,clock_t e)
1014 {
1015 	static long clktck=0;
1016 
1017 	if (clktck==0) clktck=sysconf(_SC_CLK_TCK);
1018 	if (clktck<0) return 0;
1019 
1020 	return ((float)(e-b)/(float)clktck);
1021 }
1022 
1023 /*
1024  * Prints a nice looking line of time information with label
1025  */
print_time(char * label,struct tms * b,struct tms * e)1026 void print_time(char *label,struct tms *b,struct tms *e)
1027 {
1028 	float sys,usr;
1029 
1030 	usr=calc_time(b->tms_utime,e->tms_utime);
1031 	sys=calc_time(b->tms_stime,e->tms_stime);
1032 	fprintf(stderr,"  %-18s%6.2fs %6.2fs %6.2fs\n",label,usr,sys,usr+sys);;
1033 }
1034 
1035 /*
1036  * Prints information about time used to stderr
1037  */
show_times()1038 void show_times()
1039 {
1040 	fprintf(stderr,"%-18s%8s%8s%8s\n",
1041 		"sgrep time usage","usr","sys","total");
1042 	print_time("parsing",&tps.start,&tps.parsing);
1043 	print_time("acsearch",&tps.parsing,&tps.acsearch);
1044 	print_time("evaluating",&tps.acsearch,&tps.eval);
1045 	print_time("output",&tps.eval,&tps.output);
1046 	fprintf(stderr,"  -----------------------------------------\n");
1047 	print_time("total",&tps.start,&tps.output);
1048 	if (tps.output.tms_cutime>0)
1049 	{
1050 		fprintf(stderr,"\n");
1051 		print_time("preprocessor",
1052 		 (struct tms *)&tps.start.tms_cutime,
1053 		 (struct tms *)&tps.output.tms_cutime);
1054 	}
1055 
1056 }
1057 
1058 /*
1059  * Reads stdin to a temp file. Leaves temp file open and stdin_fd pointing
1060  * to it. File name will be NULL. Unlinks temp file, so that it will be
1061  * removed when program exits.
1062  * returns size of input file read
1063  */
read_stdin()1064 int read_stdin()
1065 {
1066 	char buf[4096];
1067 	char *temp_file;
1068 	int r,w;
1069 	static int length=-1;
1070 
1071 	if (length==0)
1072 		return 0; /* If stdin was already read, and was empty */
1073 
1074 	if ( input_files==NULL )
1075 	{
1076 		/*
1077 		 * If input_files hasn't been malloced, there is none,
1078 		 * and we read only from stdin
1079 		 */
1080 		input_files= (struct INPUT_FILE *)
1081 			e_malloc(sizeof(*input_files));
1082 		last_file=0;
1083 	}
1084 	if (last_file==0)
1085 	{
1086 		input_files[0].start=0;
1087 	} else
1088 	{
1089 		input_files[last_file].start=
1090 			input_files[last_file-1].start+
1091 			input_files[last_file-1].length;
1092 	}
1093 	input_files[last_file].name=NULL;
1094 	if (length>0)
1095 	{
1096 		/* We have already read stdin, so we just return what
1097 		   we already know */
1098 		input_files[last_file++].length=length;
1099 		return length;
1100 	}
1101 
1102 	if (stdin_read) {
1103 		/*
1104 		 * Somebody had already used stdin for something
1105 		 */
1106 		 fprintf(stderr,
1107 	"Can't read input from stdin, it's already used\n");
1108 		exit(2);
1109 	}
1110 	/* We read stdin to temporary file */
1111 	temp_file=tmpnam(NULL);
1112 	stdin_fd=open(temp_file,O_RDWR | O_CREAT);
1113 	if (unlink(temp_file)==-1)
1114 	{
1115 		perror("sgrep warning: unlinking temp file failed");
1116 	}
1117 	if (stdin_fd==-1)
1118 	{
1119 		 perror("creating tempfile: open");
1120 		 exit(2);
1121 	}
1122 	length=0;
1123 	while ( (r=read(0,buf,4096))!=0 )
1124 	{
1125 		if (r==-1)
1126 		{
1127 			perror("read stdin");
1128 			exit(2);
1129 		}
1130 		length+=r;
1131 		w=write(stdin_fd,buf,r);
1132 		if (w==-1)
1133 		{
1134 			perror("write tempfile");
1135 			exit(2);
1136 		}
1137 		if (w!=r)
1138 		{
1139 			fprintf(stderr,"Short write to tempfile\n");
1140 			exit(2);
1141 		}
1142 	}
1143 	if (length>0)
1144 	{
1145 		return (input_files[last_file++].length=length);
1146 	}
1147 	return 0;
1148 }
1149 
1150 /*
1151  * Reads the options from environ variable ENV_OPTIONS
1152  */
environ_options()1153 int environ_options()
1154 {
1155 	char *av[100];
1156 	int i=0;
1157 	int j=0;
1158 	char *o;
1159 
1160 	if (getenv(ENV_OPTIONS)==NULL) return 0;
1161 
1162 	o=(char *)e_malloc(strlen(getenv(ENV_OPTIONS)+1));
1163 	strcpy(o,getenv(ENV_OPTIONS));
1164 
1165 	do {
1166 		while( o[i]==' ' )
1167 		{
1168 			o[i++]=0;
1169 		}
1170 		if (!o[i]) break;
1171 		av[j++]=&o[i];
1172 		if (j==100)
1173 		{
1174 			fprintf(stderr,"Too complex "ENV_OPTIONS"\n");
1175 			exit(2);
1176 		}
1177 		while( o[i]!=' ' && o[i]!=0 ) i++;
1178 	} while (o[i]);
1179 	av[j]=NULL;
1180 
1181 #ifdef DEBUG
1182 	fprintf(stderr,"Environment options: ");
1183 	for (i=0;av[i]!=NULL;i++)
1184 	{
1185 		fprintf(stderr,"'%s' ",av[i]);
1186 	}
1187 	fprintf(stderr,"\n");
1188 #endif
1189 	i=get_options(av);
1190 	if (i==-1)
1191 	{
1192 		fprintf(stderr,"Invalid "ENV_OPTIONS" ("ENV_OPTIONS"=%s)\n",getenv(ENV_OPTIONS));
1193 		return -1;
1194 	}
1195 	if (i<=j)
1196 	{
1197 		fprintf(stderr,"No files or expressions allowed in "ENV_OPTIONS"\n");
1198 		return -1;
1199 	}
1200 	return 0;
1201 }
1202 
1203 /*
1204  * Displays the copyright notice.
1205  */
copyright_notice()1206 void copyright_notice()
1207 {
1208 	int i;
1209 
1210 	for (i=0;copyright_text[i]!=NULL;i++)
1211 	{
1212 		printf("\t%s\n",copyright_text[i]);
1213 	}
1214 }
1215