1 /* main.c
2   re test source
3 */
4 #include <sys/types.h>
5 #include <stdlib.h> // for atoi, ...
6 #include <stdio.h>
7 #include <string.h>
8 #include <assert.h>
9 /* local headers */
10 #include "regex.h"
11 #ifdef _MSC_VER // local getopt source
12 #include "getopt\getopt.h"
13 #endif
14 
15 /* #include "main.ih" // why generate this? */
16 /* forward refs */
17 void regress(FILE *in);
18 void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
19 int options(int type, char *s);
20 int opt(int c, char *s);
21 void fixstr(register char *p);
22 char *check(char *str, regmatch_t sub, char *should);
23 static char *eprint(int err);
24 static int efind(char *name);
25 
26 char *progname;
27 int debug = 0;
28 int line = 0;
29 int status = 0;
30 
31 int copts = REG_EXTENDED;
32 int eopts = 0;
33 regoff_t startoff = 0;
34 regoff_t endoff = 0;
35 
36 
37 extern int split();
38 extern void regprint();
39 
40 /*
41  - main - do the simple case, hand off to regress() for regression
42  */
main(argc,argv)43 main(argc, argv)
44 int argc;
45 char *argv[];
46 {
47 	regex_t re;
48 #	define	NS	10
49 	regmatch_t subs[NS];
50 	char erbuf[100];
51 	int err;
52 	size_t len;
53 	int c;
54 	int errflg = 0;
55 	register int i;
56 	extern int optind;
57 	extern char *optarg;
58 
59 	progname = argv[0];
60 
61 	while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
62 		switch (c) {
63 		case 'c':	/* compile options */
64 			copts = options('c', optarg);
65 			break;
66 		case 'e':	/* execute options */
67 			eopts = options('e', optarg);
68 			break;
69 		case 'S':	/* start offset */
70 			startoff = (regoff_t)atoi(optarg);
71 			break;
72 		case 'E':	/* end offset */
73 			endoff = (regoff_t)atoi(optarg);
74 			break;
75 		case 'x':	/* Debugging. */
76 			debug++;
77 			break;
78 		case '?':
79 		default:
80 			errflg++;
81 			break;
82 		}
83 	if (errflg) {
84 		fprintf(stderr, "usage: %s ", progname);
85 		fprintf(stderr, "[-c copt][-C][-d] [re]\n");
86 		exit(2);
87 	}
88 
89 	if (optind >= argc) {
90 		regress(stdin);
91 		exit(status);
92 	}
93 
94 	err = regcomp(&re, argv[optind++], copts);
95 	if (err) {
96 		len = regerror(err, &re, erbuf, sizeof(erbuf));
97 		fprintf(stderr, "error %s, %d/%d `%s'\n",
98 			eprint(err), (int)len, (int)sizeof(erbuf), erbuf);
99 		exit(status);
100 	}
101 	regprint(&re, stdout);
102 
103 	if (optind >= argc) {
104 		regfree(&re);
105 		exit(status);
106 	}
107 
108 	if (eopts&REG_STARTEND) {
109 		subs[0].rm_so = startoff;
110 		subs[0].rm_eo = strlen(argv[optind]) - endoff;
111 	}
112 	err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
113 	if (err) {
114 		len = regerror(err, &re, erbuf, sizeof(erbuf));
115 		fprintf(stderr, "error %s, %d/%d `%s'\n",
116 			eprint(err), (int)len, (int)sizeof(erbuf), erbuf);
117 		exit(status);
118 	}
119 	if (!(copts&REG_NOSUB)) {
120 		len = (int)(subs[0].rm_eo - subs[0].rm_so);
121 		if (subs[0].rm_so != -1) {
122 			if (len != 0)
123 				printf("match `%.*s'\n", (int)len,
124 					argv[optind] + subs[0].rm_so);
125 			else
126 				printf("match `'@%.1s\n",
127 					argv[optind] + subs[0].rm_so);
128 		}
129 		for (i = 1; i < NS; i++)
130 			if (subs[i].rm_so != -1)
131 				printf("(%d) `%.*s'\n", i,
132 					(int)(subs[i].rm_eo - subs[i].rm_so),
133 					argv[optind] + subs[i].rm_so);
134 	}
135 	exit(status);
136 }
137 
138 /*
139  - regress - main loop of regression test
140  == void regress(FILE *in);
141  */
142 void
regress(in)143 regress(in)
144 FILE *in;
145 {
146 	char inbuf[1000];
147 #	define	MAXF	10
148 	char *f[MAXF];
149 	int nf;
150 	int i;
151 	char erbuf[100];
152 	size_t ne;
153 	char *badpat = "invalid regular expression";
154 #	define	SHORT	10
155 	char *bpname = "REG_BADPAT";
156 	regex_t re;
157 
158 	while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
159 		line++;
160 		if (inbuf[0] == '#' || inbuf[0] == '\n')
161 			continue;			/* NOTE CONTINUE */
162 		inbuf[strlen(inbuf)-1] = '\0';	/* get rid of stupid \n */
163 		if (debug)
164 			fprintf(stdout, "%d:\n", line);
165 		nf = split(inbuf, f, MAXF, "\t\t");
166 		if (nf < 3) {
167 			fprintf(stderr, "bad input, line %d\n", line);
168 			exit(1);
169 		}
170 		for (i = 0; i < nf; i++)
171 			if (strcmp(f[i], "\"\"") == 0)
172 				f[i] = "";
173 		if (nf <= 3)
174 			f[3] = NULL;
175 		if (nf <= 4)
176 			f[4] = NULL;
177 		try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
178 		if (opt('&', f[1]))	/* try with either type of RE */
179 			try(f[0], f[1], f[2], f[3], f[4],
180 					options('c', f[1]) &~ REG_EXTENDED);
181 	}
182 
183 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
184 	if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
185 		fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
186 							erbuf, badpat);
187 		status = 1;
188 	}
189 	ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
190 	if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
191 						ne != strlen(badpat)+1) {
192 		fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
193 						erbuf, SHORT-1, badpat);
194 		status = 1;
195 	}
196 	ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
197 	if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
198 		fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
199 						erbuf, bpname);
200 		status = 1;
201 	}
202 	re.re_endp = bpname;
203 	ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
204 	if (atoi(erbuf) != (int)REG_BADPAT) {
205 		fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
206 						erbuf, (long)REG_BADPAT);
207 		status = 1;
208 	} else if (ne != strlen(erbuf)+1) {
209 		fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
210 						erbuf, (long)REG_BADPAT);
211 		status = 1;
212 	}
213 }
214 
215 /*
216  - try - try it, and report on problems
217  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
218  */
219 void
try(f0,f1,f2,f3,f4,opts)220 try(f0, f1, f2, f3, f4, opts)
221 char *f0;
222 char *f1;
223 char *f2;
224 char *f3;
225 char *f4;
226 int opts;			/* may not match f1 */
227 {
228 	regex_t re;
229 #	define	NSUBS	10
230 	regmatch_t subs[NSUBS];
231 #	define	NSHOULD	15
232 	char *should[NSHOULD];
233 	int nshould;
234 	char erbuf[100];
235 	int err;
236 	int len;
237 	char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
238 	register int i;
239 	char *grump;
240 	char f0copy[1000];
241 	char f2copy[1000];
242 
243 	strcpy(f0copy, f0);
244 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
245 	fixstr(f0copy);
246 	err = regcomp(&re, f0copy, opts);
247 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
248 		/* unexpected error or wrong error */
249 		len = regerror(err, &re, erbuf, sizeof(erbuf));
250 		fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
251 					line, type, eprint(err), (int)len,
252 					(int)sizeof(erbuf), erbuf);
253 		status = 1;
254 	} else if (err == 0 && opt('C', f1)) {
255 		/* unexpected success */
256 		fprintf(stderr, "%d: %s should have given REG_%s\n",
257 						line, type, f2);
258 		status = 1;
259 		err = 1;	/* so we won't try regexec */
260 	}
261 
262 	if (err != 0) {
263 		regfree(&re);
264 		return;
265 	}
266 
267 	strcpy(f2copy, f2);
268 	fixstr(f2copy);
269 
270 	if (options('e', f1)&REG_STARTEND) {
271 		if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
272 			fprintf(stderr, "%d: bad STARTEND syntax\n", line);
273 		subs[0].rm_so = strchr(f2, '(') - f2 + 1;
274 		subs[0].rm_eo = strchr(f2, ')') - f2;
275 	}
276 	err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
277 
278 	if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
279 		/* unexpected error or wrong error */
280 		len = regerror(err, &re, erbuf, sizeof(erbuf));
281 		fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
282 					line, type, eprint(err), (int)len,
283 					(int)sizeof(erbuf), erbuf);
284 		status = 1;
285 	} else if (err != 0) {
286 		/* nothing more to check */
287 	} else if (f3 == NULL) {
288 		/* unexpected success */
289 		fprintf(stderr, "%d: %s exec should have failed\n",
290 						line, type);
291 		status = 1;
292 		err = 1;		/* just on principle */
293 	} else if (opts&REG_NOSUB) {
294 		/* nothing more to check */
295 	} else if ((grump = check(f2, subs[0], f3)) != NULL) {
296 		fprintf(stderr, "%d: %s %s\n", line, type, grump);
297 		status = 1;
298 		err = 1;
299 	}
300 
301 	if (err != 0 || f4 == NULL) {
302 		regfree(&re);
303 		return;
304 	}
305 
306 	for (i = 1; i < NSHOULD; i++)
307 		should[i] = NULL;
308 	nshould = split(f4, should+1, NSHOULD-1, ",");
309 	if (nshould == 0) {
310 		nshould = 1;
311 		should[1] = "";
312 	}
313 	for (i = 1; i < NSUBS; i++) {
314 		grump = check(f2, subs[i], should[i]);
315 		if (grump != NULL) {
316 			fprintf(stderr, "%d: %s $%d %s\n", line,
317 							type, i, grump);
318 			status = 1;
319 			err = 1;
320 		}
321 	}
322 
323 	regfree(&re);
324 }
325 
326 /*
327  - options - pick options out of a regression-test string
328  == int options(int type, char *s);
329  */
330 int
options(type,s)331 options(type, s)
332 int type;			/* 'c' compile, 'e' exec */
333 char *s;
334 {
335 	register char *p;
336 	register int o = (type == 'c') ? copts : eopts;
337 	register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
338 
339 	for (p = s; *p != '\0'; p++)
340 		if (strchr(legal, *p) != NULL)
341 			switch (*p) {
342 			case 'b':
343 				o &= ~REG_EXTENDED;
344 				break;
345 			case 'i':
346 				o |= REG_ICASE;
347 				break;
348 			case 's':
349 				o |= REG_NOSUB;
350 				break;
351 			case 'n':
352 				o |= REG_NEWLINE;
353 				break;
354 			case 'm':
355 				o &= ~REG_EXTENDED;
356 				o |= REG_NOSPEC;
357 				break;
358 			case 'p':
359 				o |= REG_PEND;
360 				break;
361 			case '^':
362 				o |= REG_NOTBOL;
363 				break;
364 			case '$':
365 				o |= REG_NOTEOL;
366 				break;
367 			case '#':
368 				o |= REG_STARTEND;
369 				break;
370 			case 't':	/* trace */
371 				o |= REG_TRACE;
372 				break;
373 			case 'l':	/* force long representation */
374 				o |= REG_LARGE;
375 				break;
376 			case 'r':	/* force backref use */
377 				o |= REG_BACKR;
378 				break;
379 			}
380 	return(o);
381 }
382 
383 /*
384  - opt - is a particular option in a regression string?
385  == int opt(int c, char *s);
386  */
387 int				/* predicate */
opt(c,s)388 opt(c, s)
389 int c;
390 char *s;
391 {
392 	return(strchr(s, c) != NULL);
393 }
394 
395 /*
396  - fixstr - transform magic characters in strings
397  == void fixstr(register char *p);
398  */
399 void
fixstr(p)400 fixstr(p)
401 register char *p;
402 {
403 	if (p == NULL)
404 		return;
405 
406 	for (; *p != '\0'; p++)
407 		if (*p == 'N')
408 			*p = '\n';
409 		else if (*p == 'T')
410 			*p = '\t';
411 		else if (*p == 'S')
412 			*p = ' ';
413 		else if (*p == 'Z')
414 			*p = '\0';
415 }
416 
417 /*
418  - check - check a substring match
419  == char *check(char *str, regmatch_t sub, char *should);
420  */
421 char *				/* NULL or complaint */
check(str,sub,should)422 check(str, sub, should)
423 char *str;
424 regmatch_t sub;
425 char *should;
426 {
427 	register int len;
428 	register int shlen;
429 	register char *p;
430 	static char grump[500];
431 	register char *at = NULL;
432 
433 	if (should != NULL && strcmp(should, "-") == 0)
434 		should = NULL;
435 	if (should != NULL && should[0] == '@') {
436 		at = should + 1;
437 		should = "";
438 	}
439 
440 	/* check rm_so and rm_eo for consistency */
441 	if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
442 				(sub.rm_so != -1 && sub.rm_eo == -1) ||
443 				(sub.rm_so != -1 && sub.rm_so < 0) ||
444 				(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
445 		sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
446 							(long)sub.rm_eo);
447 		return(grump);
448 	}
449 
450 	/* check for no match */
451 	if (sub.rm_so == -1 && should == NULL)
452 		return(NULL);
453 	if (sub.rm_so == -1)
454 		return("did not match");
455 
456 	/* check for in range */
457 	if (sub.rm_eo > strlen(str)) {
458 		sprintf(grump, "start %ld end %ld, past end of string",
459 					(long)sub.rm_so, (long)sub.rm_eo);
460 		return(grump);
461 	}
462 
463 	len = (int)(sub.rm_eo - sub.rm_so);
464 	shlen = (int)strlen(should);
465 	p = str + sub.rm_so;
466 
467 	/* check for not supposed to match */
468 	if (should == NULL) {
469 		sprintf(grump, "matched `%.*s'", len, p);
470 		return(grump);
471 	}
472 
473 	/* check for wrong match */
474 	if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
475 		sprintf(grump, "matched `%.*s' instead", len, p);
476 		return(grump);
477 	}
478 	if (shlen > 0)
479 		return(NULL);
480 
481 	/* check null match in right place */
482 	if (at == NULL)
483 		return(NULL);
484 	shlen = strlen(at);
485 	if (shlen == 0)
486 		shlen = 1;	/* force check for end-of-string */
487 	if (strncmp(p, at, shlen) != 0) {
488 		sprintf(grump, "matched null at `%.20s'", p);
489 		return(grump);
490 	}
491 	return(NULL);
492 }
493 
494 /*
495  - eprint - convert error number to name
496  == static char *eprint(int err);
497  */
498 static char *
eprint(err)499 eprint(err)
500 int err;
501 {
502 	static char epbuf[100];
503 	size_t len;
504 
505 	len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
506 	assert(len <= sizeof(epbuf));
507 	return(epbuf);
508 }
509 
510 /*
511  - efind - convert error name to number
512  == static int efind(char *name);
513  */
514 static int
efind(name)515 efind(name)
516 char *name;
517 {
518 	static char efbuf[100];
519 	// size_t n;
520 	regex_t re;
521 
522 	sprintf(efbuf, "REG_%s", name);
523 	assert(strlen(efbuf) < sizeof(efbuf));
524 	re.re_endp = efbuf;
525 	(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
526 	return(atoi(efbuf));
527 }
528