1 /* main.c
2 re test source
3 */
4 #include <sys/types.h>
5 #include <stdlib.h> // for atoi, ...
6 #include <stdio.h>
7 #include <string.h>
8 #include <assert.h>
9 /* local headers */
10 #include "regex.h"
11 #ifdef _MSC_VER // local getopt source
12 #include "getopt\getopt.h"
13 #endif
14
15 /* #include "main.ih" // why generate this? */
16 /* forward refs */
17 void regress(FILE *in);
18 void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
19 int options(int type, char *s);
20 int opt(int c, char *s);
21 void fixstr(register char *p);
22 char *check(char *str, regmatch_t sub, char *should);
23 static char *eprint(int err);
24 static int efind(char *name);
25
26 char *progname;
27 int debug = 0;
28 int line = 0;
29 int status = 0;
30
31 int copts = REG_EXTENDED;
32 int eopts = 0;
33 regoff_t startoff = 0;
34 regoff_t endoff = 0;
35
36
37 extern int split();
38 extern void regprint();
39
40 /*
41 - main - do the simple case, hand off to regress() for regression
42 */
main(argc,argv)43 main(argc, argv)
44 int argc;
45 char *argv[];
46 {
47 regex_t re;
48 # define NS 10
49 regmatch_t subs[NS];
50 char erbuf[100];
51 int err;
52 size_t len;
53 int c;
54 int errflg = 0;
55 register int i;
56 extern int optind;
57 extern char *optarg;
58
59 progname = argv[0];
60
61 while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
62 switch (c) {
63 case 'c': /* compile options */
64 copts = options('c', optarg);
65 break;
66 case 'e': /* execute options */
67 eopts = options('e', optarg);
68 break;
69 case 'S': /* start offset */
70 startoff = (regoff_t)atoi(optarg);
71 break;
72 case 'E': /* end offset */
73 endoff = (regoff_t)atoi(optarg);
74 break;
75 case 'x': /* Debugging. */
76 debug++;
77 break;
78 case '?':
79 default:
80 errflg++;
81 break;
82 }
83 if (errflg) {
84 fprintf(stderr, "usage: %s ", progname);
85 fprintf(stderr, "[-c copt][-C][-d] [re]\n");
86 exit(2);
87 }
88
89 if (optind >= argc) {
90 regress(stdin);
91 exit(status);
92 }
93
94 err = regcomp(&re, argv[optind++], copts);
95 if (err) {
96 len = regerror(err, &re, erbuf, sizeof(erbuf));
97 fprintf(stderr, "error %s, %d/%d `%s'\n",
98 eprint(err), (int)len, (int)sizeof(erbuf), erbuf);
99 exit(status);
100 }
101 regprint(&re, stdout);
102
103 if (optind >= argc) {
104 regfree(&re);
105 exit(status);
106 }
107
108 if (eopts®_STARTEND) {
109 subs[0].rm_so = startoff;
110 subs[0].rm_eo = strlen(argv[optind]) - endoff;
111 }
112 err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
113 if (err) {
114 len = regerror(err, &re, erbuf, sizeof(erbuf));
115 fprintf(stderr, "error %s, %d/%d `%s'\n",
116 eprint(err), (int)len, (int)sizeof(erbuf), erbuf);
117 exit(status);
118 }
119 if (!(copts®_NOSUB)) {
120 len = (int)(subs[0].rm_eo - subs[0].rm_so);
121 if (subs[0].rm_so != -1) {
122 if (len != 0)
123 printf("match `%.*s'\n", (int)len,
124 argv[optind] + subs[0].rm_so);
125 else
126 printf("match `'@%.1s\n",
127 argv[optind] + subs[0].rm_so);
128 }
129 for (i = 1; i < NS; i++)
130 if (subs[i].rm_so != -1)
131 printf("(%d) `%.*s'\n", i,
132 (int)(subs[i].rm_eo - subs[i].rm_so),
133 argv[optind] + subs[i].rm_so);
134 }
135 exit(status);
136 }
137
138 /*
139 - regress - main loop of regression test
140 == void regress(FILE *in);
141 */
142 void
regress(in)143 regress(in)
144 FILE *in;
145 {
146 char inbuf[1000];
147 # define MAXF 10
148 char *f[MAXF];
149 int nf;
150 int i;
151 char erbuf[100];
152 size_t ne;
153 char *badpat = "invalid regular expression";
154 # define SHORT 10
155 char *bpname = "REG_BADPAT";
156 regex_t re;
157
158 while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
159 line++;
160 if (inbuf[0] == '#' || inbuf[0] == '\n')
161 continue; /* NOTE CONTINUE */
162 inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
163 if (debug)
164 fprintf(stdout, "%d:\n", line);
165 nf = split(inbuf, f, MAXF, "\t\t");
166 if (nf < 3) {
167 fprintf(stderr, "bad input, line %d\n", line);
168 exit(1);
169 }
170 for (i = 0; i < nf; i++)
171 if (strcmp(f[i], "\"\"") == 0)
172 f[i] = "";
173 if (nf <= 3)
174 f[3] = NULL;
175 if (nf <= 4)
176 f[4] = NULL;
177 try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
178 if (opt('&', f[1])) /* try with either type of RE */
179 try(f[0], f[1], f[2], f[3], f[4],
180 options('c', f[1]) &~ REG_EXTENDED);
181 }
182
183 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
184 if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
185 fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
186 erbuf, badpat);
187 status = 1;
188 }
189 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
190 if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
191 ne != strlen(badpat)+1) {
192 fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
193 erbuf, SHORT-1, badpat);
194 status = 1;
195 }
196 ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
197 if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
198 fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
199 erbuf, bpname);
200 status = 1;
201 }
202 re.re_endp = bpname;
203 ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
204 if (atoi(erbuf) != (int)REG_BADPAT) {
205 fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
206 erbuf, (long)REG_BADPAT);
207 status = 1;
208 } else if (ne != strlen(erbuf)+1) {
209 fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
210 erbuf, (long)REG_BADPAT);
211 status = 1;
212 }
213 }
214
215 /*
216 - try - try it, and report on problems
217 == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
218 */
219 void
try(f0,f1,f2,f3,f4,opts)220 try(f0, f1, f2, f3, f4, opts)
221 char *f0;
222 char *f1;
223 char *f2;
224 char *f3;
225 char *f4;
226 int opts; /* may not match f1 */
227 {
228 regex_t re;
229 # define NSUBS 10
230 regmatch_t subs[NSUBS];
231 # define NSHOULD 15
232 char *should[NSHOULD];
233 int nshould;
234 char erbuf[100];
235 int err;
236 int len;
237 char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
238 register int i;
239 char *grump;
240 char f0copy[1000];
241 char f2copy[1000];
242
243 strcpy(f0copy, f0);
244 re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
245 fixstr(f0copy);
246 err = regcomp(&re, f0copy, opts);
247 if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
248 /* unexpected error or wrong error */
249 len = regerror(err, &re, erbuf, sizeof(erbuf));
250 fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
251 line, type, eprint(err), (int)len,
252 (int)sizeof(erbuf), erbuf);
253 status = 1;
254 } else if (err == 0 && opt('C', f1)) {
255 /* unexpected success */
256 fprintf(stderr, "%d: %s should have given REG_%s\n",
257 line, type, f2);
258 status = 1;
259 err = 1; /* so we won't try regexec */
260 }
261
262 if (err != 0) {
263 regfree(&re);
264 return;
265 }
266
267 strcpy(f2copy, f2);
268 fixstr(f2copy);
269
270 if (options('e', f1)®_STARTEND) {
271 if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
272 fprintf(stderr, "%d: bad STARTEND syntax\n", line);
273 subs[0].rm_so = strchr(f2, '(') - f2 + 1;
274 subs[0].rm_eo = strchr(f2, ')') - f2;
275 }
276 err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
277
278 if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
279 /* unexpected error or wrong error */
280 len = regerror(err, &re, erbuf, sizeof(erbuf));
281 fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
282 line, type, eprint(err), (int)len,
283 (int)sizeof(erbuf), erbuf);
284 status = 1;
285 } else if (err != 0) {
286 /* nothing more to check */
287 } else if (f3 == NULL) {
288 /* unexpected success */
289 fprintf(stderr, "%d: %s exec should have failed\n",
290 line, type);
291 status = 1;
292 err = 1; /* just on principle */
293 } else if (opts®_NOSUB) {
294 /* nothing more to check */
295 } else if ((grump = check(f2, subs[0], f3)) != NULL) {
296 fprintf(stderr, "%d: %s %s\n", line, type, grump);
297 status = 1;
298 err = 1;
299 }
300
301 if (err != 0 || f4 == NULL) {
302 regfree(&re);
303 return;
304 }
305
306 for (i = 1; i < NSHOULD; i++)
307 should[i] = NULL;
308 nshould = split(f4, should+1, NSHOULD-1, ",");
309 if (nshould == 0) {
310 nshould = 1;
311 should[1] = "";
312 }
313 for (i = 1; i < NSUBS; i++) {
314 grump = check(f2, subs[i], should[i]);
315 if (grump != NULL) {
316 fprintf(stderr, "%d: %s $%d %s\n", line,
317 type, i, grump);
318 status = 1;
319 err = 1;
320 }
321 }
322
323 regfree(&re);
324 }
325
326 /*
327 - options - pick options out of a regression-test string
328 == int options(int type, char *s);
329 */
330 int
options(type,s)331 options(type, s)
332 int type; /* 'c' compile, 'e' exec */
333 char *s;
334 {
335 register char *p;
336 register int o = (type == 'c') ? copts : eopts;
337 register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
338
339 for (p = s; *p != '\0'; p++)
340 if (strchr(legal, *p) != NULL)
341 switch (*p) {
342 case 'b':
343 o &= ~REG_EXTENDED;
344 break;
345 case 'i':
346 o |= REG_ICASE;
347 break;
348 case 's':
349 o |= REG_NOSUB;
350 break;
351 case 'n':
352 o |= REG_NEWLINE;
353 break;
354 case 'm':
355 o &= ~REG_EXTENDED;
356 o |= REG_NOSPEC;
357 break;
358 case 'p':
359 o |= REG_PEND;
360 break;
361 case '^':
362 o |= REG_NOTBOL;
363 break;
364 case '$':
365 o |= REG_NOTEOL;
366 break;
367 case '#':
368 o |= REG_STARTEND;
369 break;
370 case 't': /* trace */
371 o |= REG_TRACE;
372 break;
373 case 'l': /* force long representation */
374 o |= REG_LARGE;
375 break;
376 case 'r': /* force backref use */
377 o |= REG_BACKR;
378 break;
379 }
380 return(o);
381 }
382
383 /*
384 - opt - is a particular option in a regression string?
385 == int opt(int c, char *s);
386 */
387 int /* predicate */
opt(c,s)388 opt(c, s)
389 int c;
390 char *s;
391 {
392 return(strchr(s, c) != NULL);
393 }
394
395 /*
396 - fixstr - transform magic characters in strings
397 == void fixstr(register char *p);
398 */
399 void
fixstr(p)400 fixstr(p)
401 register char *p;
402 {
403 if (p == NULL)
404 return;
405
406 for (; *p != '\0'; p++)
407 if (*p == 'N')
408 *p = '\n';
409 else if (*p == 'T')
410 *p = '\t';
411 else if (*p == 'S')
412 *p = ' ';
413 else if (*p == 'Z')
414 *p = '\0';
415 }
416
417 /*
418 - check - check a substring match
419 == char *check(char *str, regmatch_t sub, char *should);
420 */
421 char * /* NULL or complaint */
check(str,sub,should)422 check(str, sub, should)
423 char *str;
424 regmatch_t sub;
425 char *should;
426 {
427 register int len;
428 register int shlen;
429 register char *p;
430 static char grump[500];
431 register char *at = NULL;
432
433 if (should != NULL && strcmp(should, "-") == 0)
434 should = NULL;
435 if (should != NULL && should[0] == '@') {
436 at = should + 1;
437 should = "";
438 }
439
440 /* check rm_so and rm_eo for consistency */
441 if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
442 (sub.rm_so != -1 && sub.rm_eo == -1) ||
443 (sub.rm_so != -1 && sub.rm_so < 0) ||
444 (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
445 sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
446 (long)sub.rm_eo);
447 return(grump);
448 }
449
450 /* check for no match */
451 if (sub.rm_so == -1 && should == NULL)
452 return(NULL);
453 if (sub.rm_so == -1)
454 return("did not match");
455
456 /* check for in range */
457 if (sub.rm_eo > strlen(str)) {
458 sprintf(grump, "start %ld end %ld, past end of string",
459 (long)sub.rm_so, (long)sub.rm_eo);
460 return(grump);
461 }
462
463 len = (int)(sub.rm_eo - sub.rm_so);
464 shlen = (int)strlen(should);
465 p = str + sub.rm_so;
466
467 /* check for not supposed to match */
468 if (should == NULL) {
469 sprintf(grump, "matched `%.*s'", len, p);
470 return(grump);
471 }
472
473 /* check for wrong match */
474 if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
475 sprintf(grump, "matched `%.*s' instead", len, p);
476 return(grump);
477 }
478 if (shlen > 0)
479 return(NULL);
480
481 /* check null match in right place */
482 if (at == NULL)
483 return(NULL);
484 shlen = strlen(at);
485 if (shlen == 0)
486 shlen = 1; /* force check for end-of-string */
487 if (strncmp(p, at, shlen) != 0) {
488 sprintf(grump, "matched null at `%.20s'", p);
489 return(grump);
490 }
491 return(NULL);
492 }
493
494 /*
495 - eprint - convert error number to name
496 == static char *eprint(int err);
497 */
498 static char *
eprint(err)499 eprint(err)
500 int err;
501 {
502 static char epbuf[100];
503 size_t len;
504
505 len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
506 assert(len <= sizeof(epbuf));
507 return(epbuf);
508 }
509
510 /*
511 - efind - convert error name to number
512 == static int efind(char *name);
513 */
514 static int
efind(name)515 efind(name)
516 char *name;
517 {
518 static char efbuf[100];
519 // size_t n;
520 regex_t re;
521
522 sprintf(efbuf, "REG_%s", name);
523 assert(strlen(efbuf) < sizeof(efbuf));
524 re.re_endp = efbuf;
525 (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
526 return(atoi(efbuf));
527 }
528