xref: /openbsd/regress/lib/libc/regex/main.c (revision 905646f0)
1 /*	$OpenBSD: main.c,v 1.11 2020/02/14 19:17:34 schwarze Exp $	*/
2 /*	$NetBSD: main.c,v 1.2 1995/04/20 22:39:51 cgd Exp $	*/
3 
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <sys/types.h>
8 #include <regex.h>
9 #include <assert.h>
10 #include <unistd.h>
11 
12 #include "main.ih"
13 
14 char *progname;
15 int debug = 0;
16 int line = 0;
17 int status = 0;
18 
19 int copts = REG_EXTENDED;
20 int eopts = 0;
21 regoff_t startoff = 0;
22 regoff_t endoff = 0;
23 
24 
25 extern int split(char *, char *[], int, char *);
26 extern void regprint(regex_t *, FILE *);
27 
28 /*
29  - main - do the simple case, hand off to regress() for regression
30  */
31 int
32 main(int argc, char *argv[])
33 
34 {
35 	regex_t re;
36 #	define	NS	10
37 	regmatch_t subs[NS];
38 	char erbuf[100];
39 	int err;
40 	size_t len;
41 	int c;
42 	int errflg = 0;
43 	register int i;
44 
45 	progname = argv[0];
46 
47 	while ((c = getopt(argc, argv, "c:E:e:S:x")) != -1)
48 		switch (c) {
49 		case 'c':	/* compile options */
50 			copts = options('c', optarg);
51 			break;
52 		case 'E':	/* end offset */
53 			endoff = (regoff_t)atoi(optarg);
54 			break;
55 		case 'e':	/* execute options */
56 			eopts = options('e', optarg);
57 			break;
58 		case 'S':	/* start offset */
59 			startoff = (regoff_t)atoi(optarg);
60 			break;
61 		case 'x':	/* Debugging. */
62 			debug++;
63 			break;
64 		case '?':
65 		default:
66 			errflg++;
67 			break;
68 		}
69 	if (errflg) {
70 		fprintf(stderr, "usage: %s ", progname);
71 		fprintf(stderr, "[-x] [-c copt] [-E endoff] [-e eopt] [-S startoff] [re]\n");
72 		exit(2);
73 	}
74 
75 	if (optind >= argc) {
76 		regress(stdin);
77 		exit(status);
78 	}
79 
80 	err = regcomp(&re, argv[optind++], copts);
81 	if (err) {
82 		len = regerror(err, &re, erbuf, sizeof(erbuf));
83 		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
84 			eprint(err), len, sizeof(erbuf), erbuf);
85 		exit(status);
86 	}
87 	regprint(&re, stdout);
88 
89 	if (optind >= argc) {
90 		regfree(&re);
91 		exit(status);
92 	}
93 
94 	if (eopts&REG_STARTEND) {
95 		subs[0].rm_so = startoff;
96 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
97 	}
98 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
99 	if (err) {
100 		len = regerror(err, &re, erbuf, sizeof(erbuf));
101 		fprintf(stderr, "error %s, %zu/%zu `%s'\n",
102 			eprint(err), len, sizeof(erbuf), erbuf);
103 		exit(status);
104 	}
105 	if (!(copts&REG_NOSUB)) {
106 		len = (size_t)(subs[0].rm_eo - subs[0].rm_so);
107 		if (subs[0].rm_so != -1) {
108 			if (len != 0)
109 				printf("match `%.*s'\n", (int)len,
110 					argv[optind] + subs[0].rm_so);
111 			else
112 				printf("match `'@%.1s\n",
113 					argv[optind] + subs[0].rm_so);
114 		}
115 		for (i = 1; i < NS; i++)
116 			if (subs[i].rm_so != -1)
117 				printf("(%d) `%.*s'\n", i,
118 					(int)(subs[i].rm_eo - subs[i].rm_so),
119 					argv[optind] + subs[i].rm_so);
120 	}
121 	exit(status);
122 }
123 
124 /*
125  - regress - main loop of regression test
126  == void regress(FILE *in);
127  */
128 void
129 regress(in)
130 FILE *in;
131 {
132 	char inbuf[1000];
133 #	define	MAXF	10
134 	char *f[MAXF];
135 	int nf;
136 	int i;
137 	char erbuf[100];
138 	size_t ne;
139 	char *badpat = "invalid regular expression";
140 #	define	SHORT	10
141 	char *bpname = "REG_BADPAT";
142 	regex_t re;
143 
144 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
145 		line++;
146 		if (inbuf[0] == '#' || inbuf[0] == '\n')
147 			continue;			/* NOTE CONTINUE */
148 		inbuf[strcspn(inbuf, "\n")] = '\0';	/* get rid of stupid \n */
149 		if (debug)
150 			fprintf(stdout, "%d:\n", line);
151 		nf = split(inbuf, f, MAXF, "\t\t");
152 		if (nf < 3) {
153 			fprintf(stderr, "bad input, line %d\n", line);
154 			exit(1);
155 		}
156 		for (i = 0; i < nf; i++)
157 			if (strcmp(f[i], "\"\"") == 0)
158 				f[i] = "";
159 		if (nf <= 3)
160 			f[3] = NULL;
161 		if (nf <= 4)
162 			f[4] = NULL;
163 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
164 		if (opt('&', f[1]))	/* try with either type of RE */
165 			try(f[0], f[1], f[2], f[3], f[4],
166 					options('c', f[1]) &~ REG_EXTENDED);
167 	}
168 
169 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
170 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
171 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
172 							erbuf, badpat);
173 		status = 1;
174 	}
175 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
176 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
177 						ne != strlen(badpat)+1) {
178 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
179 						erbuf, SHORT-1, badpat);
180 		status = 1;
181 	}
182 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
183 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
184 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
185 						erbuf, bpname);
186 		status = 1;
187 	}
188 	re.re_endp = bpname;
189 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
190 	if (atoi(erbuf) != (int)REG_BADPAT) {
191 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
192 						erbuf, (long)REG_BADPAT);
193 		status = 1;
194 	} else if (ne != strlen(erbuf)+1) {
195 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
196 						erbuf, (long)REG_BADPAT);
197 		status = 1;
198 	}
199 }
200 
201 /*
202  - try - try it, and report on problems
203  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
204  */
205 void
206 try(f0, f1, f2, f3, f4, opts)
207 char *f0;
208 char *f1;
209 char *f2;
210 char *f3;
211 char *f4;
212 int opts;			/* may not match f1 */
213 {
214 	regex_t re;
215 #	define	NSUBS	10
216 	regmatch_t subs[NSUBS];
217 #	define	NSHOULD	15
218 	char *should[NSHOULD];
219 	int nshould;
220 	char erbuf[100];
221 	int err;
222 	int len;
223 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
224 	register int i;
225 	char *grump;
226 	char f0copy[1000];
227 	char f2copy[1000];
228 
229 	strlcpy(f0copy, f0, sizeof f0copy);
230 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
231 	fixstr(f0copy);
232 	err = regcomp(&re, f0copy, opts);
233 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
234 		/* unexpected error or wrong error */
235 		len = regerror(err, &re, erbuf, sizeof(erbuf));
236 		fprintf(stderr, "%d: %s error %s, %d/%zu `%s'\n",
237 					line, type, eprint(err), len,
238 					sizeof(erbuf), erbuf);
239 		status = 1;
240 	} else if (err == 0 && opt('C', f1)) {
241 		/* unexpected success */
242 		fprintf(stderr, "%d: %s should have given REG_%s\n",
243 						line, type, f2);
244 		status = 1;
245 		err = 1;	/* so we won't try regexec */
246 	}
247 
248 	if (err != 0) {
249 		regfree(&re);
250 		return;
251 	}
252 
253 	strlcpy(f2copy, f2, sizeof f2copy);
254 	fixstr(f2copy);
255 
256 	if (options('e', f1)&REG_STARTEND) {
257 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
258 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
259 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
260 		subs[0].rm_eo = strchr(f2, ')') - f2;
261 		/* the preceding character is relevant with REG_NOTBOL */
262 		f2copy[subs[0].rm_so - 1] = subs[0].rm_so > 1 ?
263 		    f2copy[subs[0].rm_so - 2] : 'X';
264 	}
265 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
266 
267 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
268 		/* unexpected error or wrong error */
269 		len = regerror(err, &re, erbuf, sizeof(erbuf));
270 		fprintf(stderr, "%d: %s exec error %s, %d/%zu `%s'\n",
271 					line, type, eprint(err), len,
272 					sizeof(erbuf), erbuf);
273 		status = 1;
274 	} else if (err != 0) {
275 		/* nothing more to check */
276 	} else if (f3 == NULL) {
277 		/* unexpected success */
278 		fprintf(stderr, "%d: %s exec should have failed\n",
279 						line, type);
280 		status = 1;
281 		err = 1;		/* just on principle */
282 	} else if (opts&REG_NOSUB) {
283 		/* nothing more to check */
284 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
285 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
286 		status = 1;
287 		err = 1;
288 	}
289 
290 	if (err != 0 || f4 == NULL) {
291 		regfree(&re);
292 		return;
293 	}
294 
295 	for (i = 1; i < NSHOULD; i++)
296 		should[i] = NULL;
297 	nshould = split(f4, should+1, NSHOULD-1, ",");
298 	if (nshould == 0) {
299 		nshould = 1;
300 		should[1] = "";
301 	}
302 	for (i = 1; i < NSUBS; i++) {
303 		grump = check(f2, subs[i], should[i]);
304 		if (grump != NULL) {
305 			fprintf(stderr, "%d: %s $%d %s\n", line,
306 							type, i, grump);
307 			status = 1;
308 			err = 1;
309 		}
310 	}
311 
312 	regfree(&re);
313 }
314 
315 /*
316  - options - pick options out of a regression-test string
317  == int options(int type, char *s);
318  */
319 int
320 options(type, s)
321 int type;			/* 'c' compile, 'e' exec */
322 char *s;
323 {
324 	register char *p;
325 	register int o = (type == 'c') ? copts : eopts;
326 	register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
327 
328 	for (p = s; *p != '\0'; p++)
329 		if (strchr(legal, *p) != NULL)
330 			switch (*p) {
331 			case 'b':
332 				o &= ~REG_EXTENDED;
333 				break;
334 			case 'i':
335 				o |= REG_ICASE;
336 				break;
337 			case 's':
338 				o |= REG_NOSUB;
339 				break;
340 			case 'n':
341 				o |= REG_NEWLINE;
342 				break;
343 			case 'm':
344 				o &= ~REG_EXTENDED;
345 				o |= REG_NOSPEC;
346 				break;
347 			case 'p':
348 				o |= REG_PEND;
349 				break;
350 			case '^':
351 				o |= REG_NOTBOL;
352 				break;
353 			case '$':
354 				o |= REG_NOTEOL;
355 				break;
356 			case '#':
357 				o |= REG_STARTEND;
358 				break;
359 			case 't':	/* trace */
360 				o |= REG_TRACE;
361 				break;
362 			case 'l':	/* force long representation */
363 				o |= REG_LARGE;
364 				break;
365 			case 'r':	/* force backref use */
366 				o |= REG_BACKR;
367 				break;
368 			}
369 	return(o);
370 }
371 
372 /*
373  - opt - is a particular option in a regression string?
374  == int opt(int c, char *s);
375  */
376 int				/* predicate */
377 opt(c, s)
378 int c;
379 char *s;
380 {
381 	return(strchr(s, c) != NULL);
382 }
383 
384 /*
385  - fixstr - transform magic characters in strings
386  == void fixstr(register char *p);
387  */
388 void
389 fixstr(p)
390 register char *p;
391 {
392 	if (p == NULL)
393 		return;
394 
395 	for (; *p != '\0'; p++)
396 		if (*p == 'N')
397 			*p = '\n';
398 		else if (*p == 'T')
399 			*p = '\t';
400 		else if (*p == 'S')
401 			*p = ' ';
402 		else if (*p == 'Z')
403 			*p = '\0';
404 }
405 
406 /*
407  - check - check a substring match
408  == char *check(char *str, regmatch_t sub, char *should);
409  */
410 char *				/* NULL or complaint */
411 check(str, sub, should)
412 char *str;
413 regmatch_t sub;
414 char *should;
415 {
416 	register int len;
417 	register int shlen;
418 	register char *p;
419 	static char grump[500];
420 	register char *at = NULL;
421 
422 	if (should != NULL && strcmp(should, "-") == 0)
423 		should = NULL;
424 	if (should != NULL && should[0] == '@') {
425 		at = should + 1;
426 		should = "";
427 	}
428 
429 	/* check rm_so and rm_eo for consistency */
430 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
431 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
432 				(sub.rm_so != -1 && sub.rm_so < 0) ||
433 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
434 		snprintf(grump, sizeof grump,
435 		    "start %ld end %ld", (long)sub.rm_so,
436 		    (long)sub.rm_eo);
437 		return(grump);
438 	}
439 
440 	/* check for no match */
441 	if (sub.rm_so == -1 && should == NULL)
442 		return(NULL);
443 	if (sub.rm_so == -1)
444 		return("did not match");
445 
446 	/* check for in range */
447 	if (sub.rm_eo > strlen(str)) {
448 		snprintf(grump, sizeof grump,
449 			"start %ld end %ld, past end of string",
450 			(long)sub.rm_so, (long)sub.rm_eo);
451 		return(grump);
452 	}
453 
454 	len = (int)(sub.rm_eo - sub.rm_so);
455 	p = str + sub.rm_so;
456 
457 	/* check for not supposed to match */
458 	if (should == NULL) {
459 		snprintf(grump, sizeof grump, "matched `%.*s'", len, p);
460 		return(grump);
461 	}
462 
463 	/* check for wrong match */
464 	shlen = (int)strlen(should);
465 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
466 		snprintf(grump, sizeof grump, "matched `%.*s' instead", len, p);
467 		return(grump);
468 	}
469 	if (shlen > 0)
470 		return(NULL);
471 
472 	/* check null match in right place */
473 	if (at == NULL)
474 		return(NULL);
475 	shlen = strlen(at);
476 	if (shlen == 0)
477 		shlen = 1;	/* force check for end-of-string */
478 	if (strncmp(p, at, shlen) != 0) {
479 		snprintf(grump, sizeof grump, "matched null at `%.20s'", p);
480 		return(grump);
481 	}
482 	return(NULL);
483 }
484 
485 /*
486  - eprint - convert error number to name
487  == static char *eprint(int err);
488  */
489 static char *
490 eprint(err)
491 int err;
492 {
493 	static char epbuf[100];
494 	size_t len;
495 
496 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
497 	assert(len <= sizeof(epbuf));
498 	return(epbuf);
499 }
500 
501 /*
502  - efind - convert error name to number
503  == static int efind(char *name);
504  */
505 static int
506 efind(name)
507 char *name;
508 {
509 	static char efbuf[100];
510 	regex_t re;
511 
512 	snprintf(efbuf, sizeof efbuf, "REG_%s", name);
513 	assert(strlen(efbuf) < sizeof(efbuf));
514 	re.re_endp = efbuf;
515 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
516 	return(atoi(efbuf));
517 }
518