1 /* @(#)match.c	1.41 21/08/20 Copyright 1985-2021 J. Schilling */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)match.c	1.41 21/08/20 Copyright 1985-2021 J. Schilling";
6 #endif
7 /*
8  *	search file(s) for a pattern
9  *
10  *	Copyright (c) 1985-2021 J. Schilling
11  */
12 /*
13  * The contents of this file are subject to the terms of the
14  * Common Development and Distribution License, Version 1.0 only
15  * (the "License").  You may not use this file except in compliance
16  * with the License.
17  *
18  * See the file CDDL.Schily.txt in this distribution for details.
19  * A copy of the CDDL is also available via the Internet at
20  * http://www.opensource.org/licenses/cddl1.txt
21  *
22  * When distributing Covered Code, include this CDDL HEADER in each
23  * file and include the License file CDDL.Schily.txt from this distribution.
24  */
25 
26 #include <schily/stdio.h>
27 #include <schily/stdlib.h>
28 #include <schily/unistd.h>	/* Include sys/types.h to make off_t available */
29 #include <schily/string.h>
30 #include <schily/utypes.h>
31 #include <schily/patmatch.h>
32 #include <schily/standard.h>
33 #include <schily/ctype.h>
34 #define	GT_COMERR		/* #define comerr gtcomerr */
35 #define	GT_ERROR		/* #define error gterror   */
36 #include <schily/schily.h>
37 #include <schily/nlsdefs.h>
38 
39 #define	UC	(unsigned char *)
40 
41 #ifndef	HAVE_VALLOC
42 #define	valloc(a)	malloc(a)
43 #endif
44 
45 #define	BUFSIZE	8192
46 #define	MAXLINE	8192
47 
48 LOCAL	char	mchars[] = { ALT, REP, NIL, STAR, LBRACK, RBRACK,
49 			LCLASS, RCLASS, QUOTE, ANY, START, END,
50 			0,
51 };
52 LOCAL	char	notletter[] = "{^!$![^_A-Za-z0-9]}";
53 
54 LOCAL	char	*buf;			/* buffer		*/
55 LOCAL	int	rblen;			/* read buffer len	*/
56 LOCAL	int	linelen;		/* line buffer len	*/
57 #define	rbuf	(&buf[linelen])		/* read buffer		*/
58 #define	line	(&buf[0])		/* line buffer		*/
59 LOCAL	char	*lcasebuf;		/* low case line buffer	*/
60 
61 LOCAL	int	notflag = 0;
62 LOCAL	int	igncase = 0;
63 LOCAL	int	magic = 0;
64 LOCAL	int	nomagic = 0;
65 LOCAL	int	wordflag = 0;
66 LOCAL	int	xflag = 0;
67 LOCAL	int	cntflag = 0;
68 LOCAL	int	lflag = 0;
69 LOCAL	int	Lflag = 0;
70 LOCAL	int	Vflag = 0;
71 LOCAL	int	sflag = 0;
72 LOCAL	int	hflag = 0;
73 LOCAL	int	nflag = 0;
74 LOCAL	int	bflag = 0;
75 LOCAL	int	debug = 0;
76 LOCAL	int	dosimple = 0;
77 
78 LOCAL	void	usage		__PR((int exitcode));
79 EXPORT	int	main		__PR((int ac, char **av));
80 LOCAL	int	domatch		__PR((FILE *file, char *name, char *pat, int plen, int *aux, int alt, int *state));
81 LOCAL	void	strlower	__PR((char *s, int slen));
82 
83 LOCAL	BOOL	issimple	__PR((char *p));
84 LOCAL	int	smatch		__PR((char *linep, int llen, char *pat, int plen));
85 LOCAL	void	printpat	__PR((char *pat, int plen, int alt, int *aux));
86 LOCAL	BOOL	pmatch		__PR((char *linep, int llen, char *pat, int *aux, int alt, int *state));
87 
88 LOCAL void
usage(exitcode)89 usage(exitcode)
90 	int	exitcode;
91 {
92 	error("Usage:	match [options] pattern [file1...filen]\n");
93 	error("Options:\n");
94 	error("	-not,-v	Print all lines that do not match\n");
95 	error("	-i	Ignore the case of letters\n");
96 	error("	-m	Force not to use the magic mode\n");
97 	error("	-M	Force to use the magic mode\n");
98 	error("	-w	Search for pattern as a word\n");
99 	error("	-x	Display only those lines which match exactly\n");
100 	error("	-c	Display matching count for each file\n");
101 	error("	-V	Display name of each file whith no matches\n");
102 	error("	-l	Display name of each file which matches\n");
103 	error("	-L	Display first matching line of each file which matches\n");
104 	error("	-s	Be silent indicate match in exitcode\n");
105 	error("	-h	Do not display filenames\n");
106 	error("	-n	Precede matching lines with line number\n");
107 	error("	-b	Precede matching lines with block number\n");
108 	error("	-help	Print this help.\n");
109 	error("	-version Print version number.\n");
110 	error("	Standard in is used if no files are specified.\n");
111 	exit(exitcode);
112 }
113 
114 EXPORT int
main(ac,av)115 main(ac, av)
116 	int ac;
117 	char **av;
118 {
119 	FILE *f;
120 	char *pat;
121 	int *aux   = NULL;
122 	int *state = NULL;
123 	int alt = 0;
124 	char *options = "not,v,V,i,M,m,w,x,c,l,L,s,h,n,b,help,version,d";
125 	int help = 0;
126 	int	cac		= ac;
127 	char	* const *cav	= av;
128 	char	*name;
129 	int	plen;
130 	int	matches;
131 	int	anymatch = 0;
132 	BOOL	prversion = 0;
133 
134 	save_args(ac, av);
135 
136 	(void) setlocale(LC_ALL, "");
137 
138 #ifdef  USE_NLS
139 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
140 #define	TEXT_DOMAIN "match"	/* Use this only if it weren't */
141 #endif
142 	{ char	*dir;
143 	dir = searchfileinpath("share/locale", F_OK,
144 					SIP_ANY_FILE|SIP_NO_PATH, NULL);
145 	if (dir)
146 		(void) bindtextdomain(TEXT_DOMAIN, dir);
147 	else
148 #if defined(PROTOTYPES) && defined(INS_BASE)
149 	(void) bindtextdomain(TEXT_DOMAIN, INS_BASE "/share/locale");
150 #else
151 	(void) bindtextdomain(TEXT_DOMAIN, "/usr/share/locale");
152 #endif
153 	(void) textdomain(TEXT_DOMAIN);
154 	}
155 #endif 	/* USE_NLS */
156 
157 	if (getallargs(&cac, &cav, options,
158 			&notflag, &notflag,
159 			&Vflag,
160 			&igncase,
161 			&magic,
162 			&nomagic,
163 			&wordflag,
164 			&xflag,
165 			&cntflag, &lflag, &Lflag, &sflag,
166 			&hflag, &nflag, &bflag, &help, &prversion,
167 			&debug) < 0) {
168 		errmsgno(EX_BAD, "Bad flag: %s.\n", cav[0]);
169 		usage(EX_BAD);
170 	}
171 	if (help)
172 		usage(0);
173 	if (prversion) {
174 		gtprintf("Match release %s (%s-%s-%s) Copyright (C) 1985-2021 %s\n",
175 				"1.41",
176 				HOST_CPU, HOST_VENDOR, HOST_OS,
177 				_("J�rg Schilling"));
178 		exit(0);
179 	}
180 
181 	if (Vflag)
182 		sflag++;	/* Be silent while searching */
183 
184 	cac = ac;
185 	cav = av;
186 	cac--, cav++;
187 	if (getfiles(&cac, &cav, options) <= 0) {
188 		errmsgno(EX_BAD, "No pattern given.\n");
189 		usage(EX_BAD);
190 	}
191 	pat = cav[0];
192 	cac--, cav++;
193 
194 	plen = strlen(pat);
195 	if (magic)
196 		nomagic = 0;
197 	if (wordflag) {
198 		if (nomagic)
199 			comerrno(EX_BAD,
200 				"Cannot match words in nomagic mode.\n");
201 		plen += 2 * (sizeof (notletter) - 1);
202 		if ((name = malloc(plen+1)) == NULL)
203 			comerrno(EX_BAD, "No memory for pattern");
204 		strcatl(name, notletter, pat, notletter, (char *)NULL);
205 		pat = name;
206 	}
207 	if (igncase)
208 		strlower(pat, plen);
209 	if (nomagic || (!magic && issimple(pat))) {
210 		dosimple = TRUE;
211 	} else {
212 		aux = malloc(sizeof (int)*plen);
213 		state = malloc(sizeof (int)*(plen+1));
214 		if (aux == NULL || state == NULL)
215 			comerrno(EX_BAD, "No memory for pattern compiler.");
216 
217 		if ((alt = patcompile(UC pat, plen, aux)) == 0)
218 			comerrno(EX_BAD, "Bad pattern: '%s'.\n", pat);
219 	}
220 	if (debug)
221 		printpat(pat, plen, alt, aux);
222 
223 	while (rblen < BUFSIZE)
224 		rblen += getpagesize();
225 	while (linelen < MAXLINE)
226 		linelen += getpagesize();
227 
228 	buf = valloc(linelen+rblen);
229 	lcasebuf = valloc(linelen);
230 	if (buf == NULL || lcasebuf == NULL)
231 		comerr("No memory for read buffer.\n");
232 
233 	if (getfiles(&cac, &cav, options) <= 0) {	/* match stdin */
234 		name = "stdin";
235 		hflag++;
236 #ifdef	_FASCII		/* Mark Williams C 	*/
237 		stdin->_ff &= ~_FASCII;
238 #endif
239 		if ((matches = domatch(stdin, name, pat, plen, aux, alt, state)) != 0)
240 			anymatch++;
241 		if (cntflag)
242 			printf("%s:%d\n", name, matches);
243 		else if (Vflag && !matches)
244 			printf("%s\n", name);
245 		else if (lflag && matches)
246 			printf("%s\n", name);
247 	} else for (; getfiles(&cac, &cav, options); cac--, cav++) {
248 		name = cav[0];
249 		f = fileopen(name, "ru");
250 		if (f == NULL)
251 			errmsg("Cannot open '%s'.\n", name);
252 		else {
253 #ifdef	_FASCII		/* Mark Williams C 	*/
254 			f->_ff &= ~_FASCII;
255 #endif
256 			file_raise(f, FALSE);
257 			if ((matches = domatch(f, name, pat, plen, aux, alt, state)) != 0)
258 				anymatch++;
259 			fclose(f);
260 			if (cntflag)
261 				printf("%s:%d\n", name, matches);
262 			else if (Vflag && !matches)
263 				printf("%s\n", name);
264 			else if (lflag && matches)
265 				printf("%s\n", name);
266 		}
267 	}
268 	exit(anymatch ? 0 : 1);
269 	return (anymatch ? 0 : 1);	/* Keep lint happy */
270 }
271 
272 /*
273  * Search one file for a pattern.
274  */
275 LOCAL int
domatch(f,name,pat,plen,aux,alt,state)276 domatch(f, name, pat, plen, aux, alt, state)
277 	register FILE *f;
278 	char *name;
279 	char *pat;
280 	register int plen;
281 	int *aux;
282 	int alt;
283 	int *state;
284 {
285 	register char *linep;		/* pointer to fill up line */
286 	register char *pbuf = rbuf;	/* pointer to read buffer */
287 	register int lbuf;		/* chars in read buffer */
288 	register int llen;
289 	register char c;		/* temp */
290 	off_t total = 0;		/* total number of bytes read */
291 	int lineno = 0;			/* current line number */
292 	int matches = 0;		/* current match count */
293 	BOOL matched = TRUE;		/* last line has match */
294 	BOOL eof = FALSE;
295 	int nl = 0;			/* line has nl */
296 	int r;
297 
298 	lbuf = 0;
299 	for (;;) {
300 		if (!matched && !eof && nl == 0 && plen > 1) {
301 			/*
302 			 * If we are going to continue matching and the last
303 			 * match was for a long line (llen > linelen) then
304 			 * move the unmatched part of our line buffer to the
305 			 * beginning.
306 			 */
307 			linep = movebytes(line-plen+linelen+1, line, plen-1);
308 			llen = linelen+1-plen;
309 		} else {
310 			/*
311 			 * Start filling up a new line.
312 			 */
313 			linep = line;
314 			llen = linelen;
315 		}
316 		matched = FALSE;
317 		nl = 0;
318 		for (;;) {
319 			if (--lbuf < 0) {
320 				lbuf = ffileread(f, rbuf, rblen);
321 				if (lbuf < 0) {
322 					/*
323 					 * This may happen on NFS-mounted
324 					 * directories or OS that do not allow
325 					 * to read(2) directories, so we have
326 					 * to tolerate it.
327 					 */
328 					errmsg("Cannot read '%s'.\n", name);
329 					return (matches);
330 				}
331 				if (lbuf == 0) {	/* read hit EOF */
332 					eof = TRUE;
333 					if (linep != line)
334 						break;
335 					else
336 						return (matches);
337 				}
338 				pbuf = rbuf;
339 				total += lbuf;
340 				lbuf--;
341 			}
342 			if ((c = *pbuf++) == '\n') {
343 				nl = 1;
344 				lineno++;
345 				break;
346 			}
347 			if (--llen >= 0) {
348 				*linep++ = c;
349 			} else {
350 				lbuf++;
351 				pbuf--;
352 				break;
353 			}
354 		}
355 		/**plin = 0;*/
356 		llen = llen < 0 ? linelen : linelen - llen;
357 
358 		if ((r = dosimple	? smatch(line, llen, pat, plen)
359 					: pmatch(line, llen, pat, aux, alt, state)) != 0) {
360 			if (notflag)
361 				continue;
362 		} else {
363 			if (!notflag)
364 				continue;
365 		}
366 		matches++;
367 		matched = TRUE;
368 		if (lflag)
369 			return (1);
370 		if (cntflag || sflag)
371 			continue;
372 		if (name && !hflag)
373 			printf("%s:", name);
374 		if (nflag)
375 			printf("%d:", lineno);
376 		if (bflag)
377 			printf("%lld:", (Llong)((total-lbuf-r-nl)/512));
378 		(void) filewrite(stdout, line, llen);
379 		putchar('\n');
380 		flush();
381 		if (Lflag)
382 			return (1);
383 	}
384 }
385 
386 /*
387  * Convert a string in place to lower case.
388  */
389 LOCAL void
strlower(s,slen)390 strlower(s, slen)
391 	register char	*s;
392 	register int	slen;
393 {
394 	register Uchar	c;
395 
396 	while (--slen >= 0) {
397 		c = (Uchar)*s;
398 		if (isupper(c))
399 			*s = (char)tolower(c);
400 		s++;
401 	}
402 }
403 
404 /*
405  * Check whether the pattern only has non-magic chars.
406  */
407 LOCAL BOOL
issimple(p)408 issimple(p)
409 	register char *p;
410 {
411 	while (*p) {
412 		if (strchr(mchars, *p++))
413 			return (FALSE);
414 	}
415 	return (TRUE);
416 }
417 
418 /*
419  * Simple (non regular expression) match.
420  *
421  * Check one line (or the buffer if no newline was found) for matches.
422  */
423 LOCAL int
smatch(linep,llen,pat,plen)424 smatch(linep, llen, pat, plen)
425 	register char	*linep;
426 	register int	llen;
427 		char	*pat;
428 		int	plen;
429 {
430 	register char	*lp;		/* Line pointer		*/
431 	register char	*pp;		/* Pattern pointer	*/
432 	register char	*rpat = pat;
433 	register char	c = *pat;
434 
435 	if (igncase) {
436 		movebytes(linep, lcasebuf, llen);
437 		strlower(linep = lcasebuf, llen);
438 	}
439 #ifdef	MDEBUG
440 	printf("llen0 %d %.*s\n", llen, llen, linep);
441 #endif
442 	if (xflag) {
443 		if (llen == 0)
444 			return (*rpat == '\0');
445 		if (llen != plen)
446 			return (0);
447 		for (lp = linep, pp = rpat; --llen >= 0; )
448 			if (*lp++ != *pp++)
449 				return (0);
450 		return (1);
451 
452 		/* CSTYLED */
453 	} else for (llen -= plen-2; --llen > 0; ) {
454 #ifdef	MDEBUG
455 		printf("llen1 %d %.*s\n", llen, llen, linep);
456 #endif
457 		/*
458 		 * With a linelength of 16 and above, findbytes() is faster
459 		 */
460 		if (llen < 16) {
461 			while (llen > 0 && *linep != c) {
462 				linep++;
463 				llen--;
464 			}
465 			if (llen <= 0)
466 				return (0);
467 		} else {
468 			lp = findbytes(linep, llen, c);
469 			if (lp == NULL)
470 				return (0);
471 			llen -= lp - linep;
472 			linep = lp;
473 		}
474 #ifdef	MDEBUG
475 		printf("llen2 %d %.*s\n", llen, llen, linep);
476 #endif
477 
478 		for (lp = linep++, pp = rpat; ; )
479 			if (*pp == 0)
480 				return (llen+plen);
481 #ifdef	__needed__
482 			else if (*lp == 0)
483 				return (0);
484 #endif
485 			else if (*pp++ != *lp++)
486 				break;
487 	}
488 	return (0);
489 }
490 
491 LOCAL void
printpat(pat,plen,alt,aux)492 printpat(pat, plen, alt, aux)
493 	char	*pat;
494 	int	plen;
495 	int	alt;
496 	int	aux[];
497 {
498 	register int	i;
499 
500 	printf("pattern: '%s'.\n", pat);
501 	printf("patlen : %d.\n", plen);
502 	if (!dosimple) {
503 		printf("alt    : %d.\n", alt);
504 		printf("aux    :");
505 		for (i = 0; i < plen; i++)
506 			printf(" %d", aux[i]);
507 		printf(".\n");
508 	}
509 }
510 
511 /*
512  * Pattern (using regular expressions) match.
513  *
514  * Check one line (or the buffer if no newline was found) for matches.
515  */
516 LOCAL BOOL
pmatch(linep,llen,pat,aux,alt,state)517 pmatch(linep, llen, pat, aux, alt, state)
518 	char	*linep;
519 	int	llen;
520 	char	*pat;
521 	int	*aux;
522 	int	alt;
523 	int	*state;
524 {
525 	if (igncase) {
526 		movebytes(linep, lcasebuf, llen);
527 		strlower(linep = lcasebuf, llen);
528 	}
529 	if (xflag) {
530 		return (((long)((char *)patmatch(UC pat, aux, UC linep, 0, llen, alt, state) - linep))
531 								== llen);
532 	} else {
533 		return (patlmatch(UC pat, aux, UC linep, 0, llen, alt, state) != 0);
534 	}
535 }
536