1 /* ed style regular expressions */
2 /* Copyright (c) 1992, 1999, 2001, 2002 John E. Davis
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Perl Artistic License.
6  */
7 
8 #include "slinclud.h"
9 
10 #include "slang.h"
11 #include "_slang.h"
12 #if SLANG_HAS_KANJI_SUPPORT
13 #include "slkanji.h"
14 #endif
15 
16 #define SET_BIT(b, n) b[(unsigned int) (n) >> 3] |= 1 << ((unsigned int) (n) % 8)
17 #define TEST_BIT(b, n) (b[(unsigned int)(n) >> 3] & (1 << ((unsigned int) (n) % 8)))
18 #define LITERAL 1
19 #define RANGE 2			       /* [...] */
20 #define ANY 3			       /* . */
21 #define BOL 4			       /* ^ */
22 #define EOL 5			       /* $ */
23 #define NTH_MATCH 6		       /* \1 \2 ... \9 */
24 #define OPAREN 7		       /* \( */
25 #define CPAREN 0x8		       /* \) */
26 #define ANY_DIGIT 0x9		       /* \d */
27 #define BOW	0xA		       /* \< */
28 #define EOW	0xB		       /* \> */
29 #if 0
30 #define NOT_LITERAL		0xC	       /* \~ */
31 #endif
32 #define STAR 0x80		       /* * */
33 #define LEAST_ONCE 0x40		       /* + */
34 #define MAYBE_ONCE 0x20		       /* ? */
35 #define MANY 0x10		       /* {n,m} */
36 /* The rest are additions */
37 #define YES_CASE (STAR | BOL)
38 #define NO_CASE  (STAR | EOL)
39 
40 #define UPPERCASE(x)  (cs ? (x) : UPPER_CASE(x))
41 #define LOWERCASE(x)  (cs ? (x) : LOWER_CASE(x))
42 
43 static unsigned char Word_Chars[256];
44 #define IS_WORD_CHAR(x) Word_Chars[(unsigned int) (x)]
45 
46 #if 0
47 static int ctx->open_paren_number;
48 static char Closed_Paren_Matches[10];
49 
50 static SLRegexp_Type *This_Reg;
51 static unsigned char *This_Str;
52 #endif
53 
54 typedef struct
55 {
56    SLRegexp_Type *reg;
57    unsigned char *str;
58    unsigned int len;
59    char closed_paren_matches[10];
60    int open_paren_number;
61 }
62 Re_Context_Type;
63 
do_nth_match(Re_Context_Type * ctx,int n,unsigned char * str,unsigned char * estr)64 static unsigned char *do_nth_match (Re_Context_Type *ctx, int n, unsigned char *str, unsigned char *estr)
65 {
66    unsigned char *bpos;
67 
68    if (ctx->closed_paren_matches[n] == 0)
69      return NULL;
70 
71    bpos = ctx->reg->beg_matches[n] + ctx->str;
72    n = ctx->reg->end_matches[n];
73    if (n == 0) return(str);
74    if (n > (int) (estr - str)) return (NULL);
75 
76    /* This needs fixed for case sensitive match */
77    if (0 != strncmp((char *) str, (char *) bpos, (unsigned int) n)) return (NULL);
78    str += n;
79    return (str);
80 }
81 
82 /* returns pointer to the end of regexp or NULL */
regexp_looking_at(Re_Context_Type * ctx,register unsigned char * str,unsigned char * estr,unsigned char * buf,register int cs)83 static unsigned char *regexp_looking_at (Re_Context_Type *ctx, register unsigned char *str, unsigned char *estr, unsigned char *buf, register int cs)
84 {
85    register unsigned char p, p1;
86    unsigned char *save_str, *tmpstr;
87    int n, n0, n1;
88    int save_num_open;
89    char save_closed_matches[10];
90 
91    p = *buf++;
92 
93    while (p != 0)
94      {
95 	/* p1 = UPPERCASE(*buf); */
96 	/* if (str < estr) c = UPPERCASE(*str); */
97 
98 	switch((unsigned char) p)
99 	  {
100 	   case BOW:
101 	     if ((str != ctx->str)
102 		 && ((str >= estr)
103 		     || IS_WORD_CHAR(*(str - 1))
104 		     || (0 == IS_WORD_CHAR(*str)))) return NULL;
105 	     break;
106 
107 	   case EOW:
108 	     if ((str < estr)
109 		 && IS_WORD_CHAR (*str)) return NULL;
110 	     break;
111 
112 	   case YES_CASE: cs = 1; break;
113 	   case NO_CASE: cs = 0; break;
114 
115 	   case OPAREN:
116 	     ctx->open_paren_number++;
117 	     ctx->reg->beg_matches[ctx->open_paren_number] = (int) (str - ctx->str);
118 	     break;
119 	   case CPAREN:
120 	     n = ctx->open_paren_number;
121 	     while (n > 0)
122 	       {
123 		  if (ctx->closed_paren_matches[n] != 0)
124 		    {
125 		       n--;
126 		       continue;
127 		    }
128 		  ctx->closed_paren_matches[n] = 1;
129 		  ctx->reg->end_matches[n] = (unsigned int) (str - (ctx->str + ctx->reg->beg_matches[n]));
130 		  break;
131 	       }
132 	     break;
133 #ifdef NOT_LITERAL
134 	   case NOT_LITERAL:
135 #if SLANG_HAS_KANJI_SUPPORT
136 	     if ((str >= estr) || ((!iskanji(*buf) && *buf != UPPERCASE(*str)) ||
137 				   (iskanji(*buf) && (*buf == *str || *(buf+2) == *(str+1)))))
138 	       return (NULL);
139 	     if(iskanji(*str))
140 	       {
141 		  str++; buf++; buf++;
142 	       }
143 #else
144 	     if ((str >= estr) || (*buf == UPPERCASE(*str))) return (NULL);
145 #endif
146 	     str++; buf++;
147 	     break;
148 
149 	   case MAYBE_ONCE | NOT_LITERAL:
150 	     save_str = str;
151 #if SLANG_HAS_KANJI_SUPPORT
152 	     if ((str < estr) && ((!iskanji(*str) && *buf == UPPERCASE(*str)) ||
153 				  (iskanji(*str) && *buf == *str && *(buf+1) == *(str+1))))
154 	       {
155 		  if(iskanji(*str)) { str++; buf++; buf++; }
156 		  str++;
157 	       }
158 #else
159 	     if ((str < estr) && (*buf != UPPERCASE(*str))) str++;
160 #endif
161 	     buf++;
162 	     goto match_rest;
163 
164 	   case NOT_LITERAL | LEAST_ONCE:   /* match at least once */
165 	     if(str >= estr) return (NULL);
166 #if SLANG_HAS_KANJI_SUPPORT
167 	     if(iskanji(*str))
168 	       if((*str == *buf) && (*(str+1) != *(buf+2))) return (NULL);
169 	     else
170 #endif
171 	     if ((UPPERCASE(*str) == UPPERCASE(*buf))) return (NULL);
172 #if SLANG_HAS_KANJI_SUPPORT
173 	     if(iskanji(*str)) str++;
174 #endif
175 	     str++;
176 	     /* drop */
177 	   case STAR | NOT_LITERAL:
178 	     save_str = str;  p1 = *buf;
179 #if SLANG_HAS_KANJI_SUPPORT
180 	     while ((str < estr) && ((!iskanji(*str) && (UPPERCASE(*str) == p1)) ||
181 				(iskanji(*str) && *str == p1 && *(str+1) == *(buf +2))))
182 	       {
183 		  if(iskanji(*str)) str++;
184 		  str++;
185 	       }
186 	     if(iskanji(p1)) { buf++; buf++; }
187 #else
188 	     while ((str < estr) && (UPPERCASE(*str) != p1)) str++;
189 #endif
190 	     buf++;
191 	     goto match_rest;
192 
193 	     /* this type consists of the expression + two bytes that
194 	        determine number of matches to perform */
195 	   case MANY | NOT_LITERAL:
196 	     p1 = *buf; buf++;
197 	     n = n0 = (int) (unsigned char) *buf++;
198 	     /* minimum number to match--- could be 0 */
199 	     n1 = (int) (unsigned char) *buf++;
200 	     /* maximum number to match */
201 
202 	     while (n && (str < estr) && (p1 != *str))
203 	       {
204 #if SLANG_HAS_KANJI_SUPPORT
205 		  if(iskanji(*str)) { n--; str++; }
206 #endif
207 		  n--;
208 		  str++;
209 	       }
210 	     if (n) return (NULL);
211 
212 	     save_str = str;
213 	     n = n1 - n0;
214 	     while (n && (str < estr) && (p1 != *str))
215 	       {
216 #if SLANG_HAS_KANJI_SUPPORT
217 		  if(iskanji(*str)) { n--; str++; }
218 #endif
219 		  n--;
220 		  str++;
221 	       }
222 	     goto match_rest;
223 #endif				       /* NOT_LITERAL */
224 	   case LITERAL:
225 #if SLANG_HAS_KANJI_SUPPORT
226 	     if ((str >= estr) || ((!iskanji(*buf) && *buf != UPPERCASE(*str)) ||
227 			  (iskanji(*buf) && (*buf == *str || *(buf+2) == *(str+1)))))
228 		   return (NULL);
229 	     if(iskanji(*str))
230 	       {
231 		  str++; buf++; buf++;
232 	       }
233 #else
234 	     if ((str >= estr) || (*buf != UPPERCASE(*str))) return (NULL);
235 #endif
236 	     str++; buf++;
237 	     break;
238 
239 	   case MAYBE_ONCE | LITERAL:
240 	     save_str = str;
241 #if SLANG_HAS_KANJI_SUPPORT
242 	     if ((str < estr) && ((!iskanji(*str) && *buf == UPPERCASE(*str)) ||
243 				  (iskanji(*str) && *buf == *str && *(buf+1) == *(str+1))))
244 	       {
245 		  if(iskanji(*str)) { str++; buf++; buf++; }
246 		  str++;
247 	       }
248 #else
249 	     if ((str < estr) && (*buf == UPPERCASE(*str))) str++;
250 #endif
251 	     buf++;
252 	     goto match_rest;
253 
254 	   case LITERAL | LEAST_ONCE:   /* match at least once */
255 	     if ((str >= estr) || (UPPERCASE(*str) != UPPERCASE(*buf))) return (NULL);
256 #if SLANG_HAS_KANJI_SUPPORT
257 	     if(iskanji(*str) && (*str != *buf || *(str+1) != *(buf+2))) return (NULL);
258 	     if(iskanji(*str)) str++;
259 #endif
260 	     str++;
261 	     /* drop */
262 	   case STAR | LITERAL:
263 	     save_str = str;  p1 = *buf;
264 #if SLANG_HAS_KANJI_SUPPORT
265 	     while ((str < estr) && ((!iskanji(*str) && (UPPERCASE(*str) == p1)) ||
266 				(iskanji(*str) && *str == p1 && *(str+1) == *(buf +2))))
267 	       {
268 		  if(iskanji(*str)) str++;
269 		  str++;
270 	       }
271 	     if(iskanji(p1)) { buf++; buf++; }
272 #else
273 	     while ((str < estr) && (UPPERCASE(*str) == p1)) str++;
274 #endif
275 	     buf++;
276 	     goto match_rest;
277 
278 	     /* this type consists of the expression + two bytes that
279 	        determine number of matches to perform */
280 	   case MANY | LITERAL:
281 	     p1 = *buf; buf++;
282 	     n = n0 = (int) (unsigned char) *buf++;
283 	     /* minimum number to match--- could be 0 */
284 	     n1 = (int) (unsigned char) *buf++;
285 	     /* maximum number to match */
286 
287 	     while (n && (str < estr) && (p1 == *str))
288 	       {
289 #if SLANG_HAS_KANJI_SUPPORT
290 		  if(iskanji(*str)) { n--; str++; }
291 #endif
292 		  n--;
293 		  str++;
294 	       }
295 	     if (n) return (NULL);
296 
297 	     save_str = str;
298 	     n = n1 - n0;
299 	     while (n && (str < estr) && (p1 == *str))
300 	       {
301 #if SLANG_HAS_KANJI_SUPPORT
302 		  if(iskanji(*str)) { n--; str++; }
303 #endif
304 		  n--;
305 		  str++;
306 	       }
307 	     goto match_rest;
308 
309 	   case NTH_MATCH:
310 	     if ((str = do_nth_match(ctx, (int) (unsigned char) *buf, str, estr)) == NULL) return(NULL);
311 	     buf++;
312 	     break;
313 
314 	   case MAYBE_ONCE | NTH_MATCH:
315 	     save_str = str;
316 	     tmpstr = do_nth_match (ctx, (int) (unsigned char) *buf, str, estr);
317 	     buf++;
318 	     if (tmpstr != NULL)
319 	       {
320 		  str = tmpstr;
321 		  goto match_rest;
322 	       }
323 	     continue;
324 
325 	   case LEAST_ONCE | NTH_MATCH:
326 	     if ((str = do_nth_match(ctx, (int) (unsigned char) *buf, str, estr)) == NULL) return(NULL);
327 	     /* drop */
328 	   case STAR | NTH_MATCH:
329 	     save_str = str;
330 	     while (NULL != (tmpstr = do_nth_match(ctx, (int) (unsigned char) *buf, str, estr)))
331 	       {
332 		  str = tmpstr;
333 	       }
334 	     buf++;
335 	     goto match_rest;
336 
337 	   case MANY | NTH_MATCH: return(NULL);
338 	     /* needs done */
339 
340 	   case RANGE:
341 	     if (str >= estr) return (NULL);
342 	     if (TEST_BIT(buf, UPPERCASE(*str)) == 0) return (NULL);
343 #if SLANG_HAS_KANJI_SUPPORT
344 	     if(iskanji(*str)) str++;
345 #endif
346 	     buf += 32; str++;
347 	     break;
348 
349 	   case MAYBE_ONCE | RANGE:
350 	     save_str = str;
351 	     if ((str < estr) && TEST_BIT(buf, UPPERCASE(*str)))
352 	       {
353 #if SLANG_HAS_KANJI_SUPPORT
354 		  if(iskanji(*str)) str++;
355 #endif
356 		  str++;
357 	       }
358 	     buf += 32;
359 	     goto match_rest;
360 
361 	   case LEAST_ONCE | RANGE:
362 	     if ((str >= estr) || (0 == TEST_BIT(buf, UPPERCASE(*str)))) return NULL;
363 #if SLANG_HAS_KANJI_SUPPORT
364 	     if(iskanji(*str)) str++;
365 #endif
366 	     str++;
367 	     /* drop */
368 	   case STAR | RANGE:
369 	     save_str = str;
370 	     while ((str < estr) && TEST_BIT(buf, UPPERCASE(*str)))
371 	       {
372 #if SLANG_HAS_KANJI_SUPPORT
373 		  if(iskanji(*str)) str++;
374 #endif
375 		  str++;
376 	       }
377 	     buf += 32;
378 	     goto match_rest;
379 
380 	     /* The first 32 bytes correspond to the range and the two
381 	      * following bytes indicate the min and max number of matches.
382 	      */
383 	   case MANY | RANGE:
384 	     /* minimum number to match--- could be 0 */
385 	     n = n0 = (int) (unsigned char) *(buf + 32);
386 	     /* maximum number to match */
387 	     n1 = (int) (unsigned char) *(buf + 33);
388 
389 	     while (n && (str < estr) && (TEST_BIT(buf, UPPERCASE(*str))))
390 	       {
391 		  /* for Kanji */
392 #if SLANG_HAS_KANJI_SUPPORT
393 #endif
394 		  n--;
395 		  str++;
396 	       }
397 	     if (n) return (NULL);
398 	     save_str = str;
399 	     n = n1 - n0;
400 	     while (n && (str < estr) && (TEST_BIT(buf, UPPERCASE(*str))))
401 	       {
402 		  /* for kanji */
403 #if SLANG_HAS_KANJI_SUPPORT
404 #endif
405 		  n--;
406 		  str++;
407 	       }
408 	     buf += 34;		       /* 32 + 2 */
409 	     goto match_rest;
410 
411 	   case ANY_DIGIT:
412 	     if ((str >= estr) || (*str > '9') || (*str < '0')) return (NULL);
413 	     str++;
414 	     break;
415 
416 	   case MAYBE_ONCE | ANY_DIGIT:
417 	     save_str = str;
418 	     if ((str < estr) && ((*str > '9') || (*str < '0')))
419 	       {
420 #if SLANG_HAS_KANJI_SUPPORT
421 		  if(iskanji(*str)) str++;
422 #endif
423 		  str++;
424 	       }
425 	     goto match_rest;
426 
427 	   case LEAST_ONCE | ANY_DIGIT:
428 	     if ((str >= estr) || ((*str > '9') || (*str < '0'))) return NULL;
429 	     str++;
430 	     /* drop */
431 	   case STAR | ANY_DIGIT:
432 	     save_str = str;
433 	     while ((str < estr) && ((*str <= '9') && (*str >= '0'))) str++;
434 	     goto match_rest;
435 
436 	   case MANY | ANY_DIGIT:
437 	     /* needs finished */
438 	     return (NULL);
439 
440 	   case ANY:
441 	     if ((str >= estr) || (*str == '\n')) return (NULL);
442 #if SLANG_HAS_KANJI_SUPPORT
443 	     if(iskanji(*str)) str++;
444 #endif
445 	     str++;
446 	     break;
447 
448 	   case MAYBE_ONCE | ANY:
449 	     save_str = str;
450 	     if ((str < estr) && (*str != '\n'))
451 	       {
452 #if SLANG_HAS_KANJI_SUPPORT
453 		  if(iskanji(*str)) str++;
454 #endif
455 		  str++;
456 	       }
457 	     goto match_rest;
458 
459 	   case LEAST_ONCE | ANY:
460 	     if ((str >= estr) || (*str == '\n')) return (NULL);
461 #if SLANG_HAS_KANJI_SUPPORT
462 	     if(iskanji(*str)) str++;
463 #endif
464 	     str++;
465 	     /* drop */
466 	   case STAR | ANY:
467 	     save_str = str;
468 	     while ((str < estr) && (*str != '\n'))
469 	       {
470 #if SLANG_HAS_KANJI_SUPPORT
471 		  if(iskanji(*str)) str++;
472 #endif
473 		  str++;
474 	       }
475 	     goto match_rest;
476 
477 	   case MANY | ANY:
478 	     return (NULL);
479 	     /* needs finished */
480 
481 	   case EOL:
482 	     if ((str >= estr) || (*str == '\n')) return (str);
483 	     return(NULL);
484 
485 	   default: return (NULL);
486 	  }
487 	p = *buf++;
488 	continue;
489 
490 	match_rest:
491 	if (save_str == str)
492 	  {
493 	     p = *buf++;
494 	     continue;
495 	  }
496 
497 	/* if (p == EOL)
498 	 * {
499 	 * if (str < estr) return (NULL); else return (str);
500 	 * }
501 	 */
502 
503 	SLMEMCPY(save_closed_matches, ctx->closed_paren_matches, sizeof(save_closed_matches));
504 	save_num_open = ctx->open_paren_number;
505 	while (str >= save_str)
506 	  {
507 	     tmpstr = regexp_looking_at (ctx, str, estr, buf, cs);
508 	     if (tmpstr != NULL) return(tmpstr);
509 	     SLMEMCPY(ctx->closed_paren_matches, save_closed_matches, sizeof(ctx->closed_paren_matches));
510 	     ctx->open_paren_number = save_num_open;
511 	     str--;
512 #if SLANG_HAS_KANJI_SUPPORT
513 	     if(iskanji2nd(save_str, str - save_str)) str--;
514 #endif
515 	  }
516 	return NULL;
517      }
518    if ((p != 0) && (p != EOL)) return (NULL); else return (str);
519 }
520 
521 static void
fixup_beg_end_matches(Re_Context_Type * ctx,SLRegexp_Type * r,unsigned char * str,unsigned char * epos)522 fixup_beg_end_matches (Re_Context_Type *ctx, SLRegexp_Type *r, unsigned char *str, unsigned char *epos)
523 {
524    int i;
525 
526    if (str == NULL)
527      {
528 	r->beg_matches[0] = -1;
529 	r->end_matches[0] = 0;
530 	SLMEMSET(ctx->closed_paren_matches, 0, sizeof(ctx->closed_paren_matches));
531      }
532    else
533      {
534 	r->beg_matches[0] = (int) (str - ctx->str);
535 	r->end_matches[0] = (unsigned int) (epos - str);
536      }
537 
538    for (i = 1; i < 10; i++)
539      {
540 	if (ctx->closed_paren_matches [i] == 0)
541 	  {
542 	     r->beg_matches[i] = -1;
543 	     r->end_matches[i] = 0;
544 	  }
545      }
546 }
547 
init_re_context(Re_Context_Type * ctx,SLRegexp_Type * reg,unsigned char * str,unsigned int len)548 static void init_re_context (Re_Context_Type *ctx, SLRegexp_Type *reg,
549 			     unsigned char *str, unsigned int len)
550 {
551    memset ((char *) ctx, 0, sizeof (Re_Context_Type));
552    ctx->reg = reg;
553    ctx->str = str;
554    ctx->len = len;
555 }
556 
SLang_regexp_match(unsigned char * str,unsigned int len,SLRegexp_Type * reg)557 unsigned char *SLang_regexp_match(unsigned char *str,
558 				  unsigned int len, SLRegexp_Type *reg)
559 {
560    register unsigned char c = 0, *estr = str + len;
561    int cs = reg->case_sensitive, lit = 0;
562    unsigned char *buf = reg->buf, *epos = NULL;
563    Re_Context_Type ctx_buf;
564 
565    if (reg->min_length > len) return NULL;
566 
567    init_re_context (&ctx_buf, reg, str, len);
568 
569    if (*buf == BOL)
570      {
571 	if (NULL == (epos = regexp_looking_at (&ctx_buf, str, estr, buf + 1, cs)))
572 	  str = NULL;
573 
574 	fixup_beg_end_matches (&ctx_buf, reg, str, epos);
575 	return str;
576      }
577 
578    if (*buf == NO_CASE)
579      {
580 	buf++;  cs = 0;
581      }
582 
583    if (*buf == YES_CASE)
584      {
585 	buf++;  cs = 1;
586      }
587 
588    if (*buf == LITERAL)
589      {
590 	lit = 1;
591 	c = *(buf + 1);
592      }
593    else if ((*buf == OPAREN) && (*(buf + 1) == LITERAL))
594      {
595 	lit = 1;
596 	c = *(buf + 2);
597      }
598 
599    while (1)
600      {
601 	ctx_buf.open_paren_number = 0;
602 	memset (ctx_buf.closed_paren_matches, 0, sizeof(ctx_buf.closed_paren_matches));
603 	/* take care of leading chars */
604 	if (lit)
605 	  {
606 	     while ((str < estr) && (c != UPPERCASE(*str)))
607 		   {
608 #if SLANG_HAS_KANJI_SUPPORT
609 		      if(iskanji(*str)) str++;
610 #endif
611 		      str++;
612 		   }
613 	     if (str >= estr)
614 	       break;		       /* failed */
615 	  }
616 
617 	if (NULL != (epos = regexp_looking_at(&ctx_buf, str, estr, buf, cs)))
618 	  {
619 	     fixup_beg_end_matches (&ctx_buf, reg, str, epos);
620 	     return str;
621 	  }
622 	if (str >= estr)
623 	  break;
624 #if SLANG_HAS_KANJI_SUPPORT
625 	if(iskanji(*str))
626 	  {
627 	     if ((str +1) == estr)
628 	       break;
629 	     str++;
630 	  }
631 #endif
632 	str++;
633      }
634    fixup_beg_end_matches (&ctx_buf, reg, NULL, epos);
635    return NULL;
636 }
637 
convert_digit(unsigned char * pat,int * nn)638 static unsigned char *convert_digit(unsigned char *pat, int *nn)
639 {
640    int n = 0, m = 0;
641    unsigned char c;
642    while (c = (unsigned char) *pat, (c <= '9') && (c >= '0'))
643      {
644 	pat++;
645 	n = 10 * n + (c - '0');
646 	m++;
647      }
648    if (m == 0)
649      {
650 	return (NULL);
651      }
652    *nn = n;
653    return pat;
654 }
655 
656 #define ERROR  return (int) (pat - reg->pat)
657 
658 /* Returns 0 if successful or offset in pattern of error */
SLang_regexp_compile(SLRegexp_Type * reg)659 int SLang_regexp_compile (SLRegexp_Type *reg)
660 {
661    register unsigned char *buf, *ebuf, *pat;
662    unsigned char *last = NULL, *tmppat;
663    register unsigned char c;
664    int i, reverse = 0, n, cs;
665    int oparen = 0, nparen = 0;
666    /* substring stuff */
667    int count, last_count, this_max_mm = 0, max_mm = 0, ordinary_search,
668      no_osearch = 0, min_length = 0;
669    unsigned char *mm_p = NULL, *this_mm_p = NULL;
670    static int already_initialized;
671 
672    reg->beg_matches[0] = reg->end_matches[0] = 0;
673    buf = reg->buf;
674    ebuf = (reg->buf + reg->buf_len) - 2; /* make some room */
675    pat = reg->pat;
676    cs = reg->case_sensitive;
677 
678    if (already_initialized == 0)
679      {
680 	SLang_init_case_tables ();
681 #if SLANG_HAS_KANJI_SUPPORT
682 	SLmake_lut (Word_Chars, (unsigned char *) "_0-9a-zA-Z", 0);
683 #else
684 # ifdef IBMPC_SYSTEM
685 	SLmake_lut (Word_Chars, (unsigned char *) "_0-9a-zA-Z\200-\232\240-\245\341-\353", 0);
686 # else
687 	SLmake_lut (Word_Chars, (unsigned char *) "_0-9a-zA-Z\277-\326\330-\336\340-\366\370-\376", 0);
688 # endif
689 #endif
690 	already_initialized = 1;
691      }
692 
693    i = 1; while (i < 10)
694      {
695 	reg->beg_matches[i] = -1;
696 	reg->end_matches[i] = 0;
697 	i++;
698      }
699 
700    if (*pat == '\\')
701      {
702 	if (pat[1] == 'c')
703 	  {
704 	     cs = 1;
705 	     pat += 2;
706 	     no_osearch = 1;
707 	  }
708 	else if (pat[1] == 'C')
709 	  {
710 	     cs = 0;
711 	     pat += 2;
712 	     no_osearch = 1;
713 	  }
714      }
715 
716    if (*pat == '^')
717      {
718 	pat++;
719 	*buf++ = BOL;
720 	reg->must_match_bol = 1;
721      }
722    else reg->must_match_bol = 0;
723 
724    if (cs != reg->case_sensitive)
725      {
726 	if (cs) *buf++ = YES_CASE;
727 	else *buf++ = NO_CASE;
728      }
729 
730    *buf = 0;
731 
732    last_count = count = 0;
733    while ((c = *pat++) != 0)
734      {
735 #if SLANG_HAS_KANJI_SUPPORT
736 	if ((buf >= ebuf - 3) || (iskanji(c) && buf >= ebuf - 5))
737 #else
738 	if (buf >= ebuf - 3)
739 #endif
740 	  {
741 	     SLang_doerror ("Pattern too large to be compiled.");
742 	     ERROR;
743 	  }
744 
745 	count++;
746 	switch (c)
747 	  {
748 	   case '$':
749 	     if (*pat != 0) goto literal_char;
750 	     *buf++ = EOL;
751 	     break;
752 
753 	   case '\\':
754 	     c = *pat++;
755 	     no_osearch = 1;
756 	     switch(c)
757 	       {
758 		case 'e': c = 033; goto literal_char;
759 		case 'n': c = '\n'; goto literal_char;
760 		case 't': c = '\t'; goto literal_char;
761 		case 'C': cs = 0; *buf++ = NO_CASE; break;
762 		case 'c': cs = 1; *buf++ = YES_CASE; break;
763 		case '1': case '2': case '3':  case '4':  case '5':
764 		case '6': case '7': case '8':  case '9':
765 		  c = c - '0';
766 		  if ((int) c > nparen) ERROR;
767 		  last = buf;
768 		  *buf++ = NTH_MATCH; *buf++ = c;
769 		  break;
770 #ifdef NOT_LITERAL
771 		case '~':	       /* slang extension */
772 		  if ((c = *pat) == 0) ERROR;
773 		  pat++;
774 		  last = buf;
775 		  *buf++ = NOT_LITERAL;
776 		  *buf++ = c;
777 #if SLANG_HAS_KANJI_SUPPORT
778 		  if(iskanji(c))
779 		    {
780 		       *buf++ = NOT_LITERAL;
781 		       *buf++ = *pat++;
782 		       min_length++;
783 		    }
784 #endif
785 		  min_length++;
786 		  break;
787 #endif
788 		case 'd':	       /* slang extension */
789 		  last = buf;
790 		  *buf++ = ANY_DIGIT;
791 		  min_length++;
792 		  break;
793 
794 		case '<':
795 		  last = NULL;
796 		  *buf++ = BOW;
797 		  break;
798 
799 		case '>':
800 		  last = NULL;
801 		  *buf++ = EOW;
802 		  break;
803 
804 		case '{':
805 		  if (last == NULL) goto literal_char;
806 		  *last |= MANY;
807 		  tmppat = convert_digit(pat, &n);
808 		  if (tmppat == NULL) ERROR;
809 		  pat = tmppat;
810 		  *buf++ = n;
811 
812 		  min_length += (n - 1);
813 
814 		  if (*pat == '\\')
815 		    {
816 		       *buf++ = n;
817 		    }
818 		  else if (*pat == ',')
819 		    {
820 		       pat++;
821 		       if (*pat == '\\')
822 			 {
823 			    n = 255;
824 			 }
825 		       else
826 			 {
827 			    tmppat = convert_digit(pat, &n);
828 			    if (tmppat == NULL) ERROR;
829 			    pat = tmppat;
830 			    if (*pat != '\\') ERROR;
831 			 }
832 		       *buf++ = n;
833 		    }
834 		  else ERROR;
835 		  last = NULL;
836 		  pat++;
837 		  if (*pat != '}') ERROR;
838 		  pat++;
839 		  break;   /* case '{' */
840 
841 		case '(':
842 		  oparen++;
843 		  if (oparen > 9) ERROR;
844 		  *buf++ = OPAREN;
845 		  break;
846 		case ')':
847 		  if (oparen == 0) ERROR;
848 		  oparen--;
849 		  nparen++;
850 		  *buf++ = CPAREN;
851 		  break;
852 
853 		case 0: ERROR;
854 		default:
855 		  goto literal_char;
856 	       }
857 	     break;
858 
859 	   case '[':
860 
861 	     *buf = RANGE;
862 	     last = buf++;
863 
864 	     if (buf + 32 >= ebuf) ERROR;
865 
866 	     for (i = 0; i < 32; i++) buf[i] = 0;
867 	     c = *pat++;
868 	     if (c == '^')
869 	       {
870 		  reverse = 1;
871 		  SET_BIT(buf, '\n');
872 		  c = *pat++;
873 	       }
874 
875 	     if (c == ']')
876 	       {
877 		  SET_BIT(buf, c);
878 		  c = *pat++;
879 	       }
880 	     while (c && (c != ']'))
881 	       {
882 		  if (c == '\\')
883 		    {
884 		       c = *pat++;
885 		       switch(c)
886 			 {
887 			    case 'n': c = '\n'; break;
888 			    case 't': c = '\t'; break;
889 			    case 0: ERROR;
890 			 }
891 		    }
892 
893 		  if (*pat == '-')
894 		    {
895 		       pat++;
896 		       while (c < *pat)
897 			 {
898 			    if (cs == 0)
899 			      {
900 				 SET_BIT(buf, UPPERCASE(c));
901 				 SET_BIT(buf, LOWERCASE(c));
902 			      }
903 			    else SET_BIT(buf, c);
904 			    c++;
905 			 }
906 		    }
907 		  if (cs == 0)
908 		    {
909 		       SET_BIT(buf, UPPERCASE(c));
910 		       SET_BIT(buf, LOWERCASE(c));
911 		    }
912 		  else SET_BIT(buf, c);
913 		  c = *pat++;
914 	       }
915 	     if (c != ']') ERROR;
916 	     if (reverse) for (i = 0; i < 32; i++) buf[i] = buf[i] ^ 0xFF;
917 	     reverse = 0;
918 	     buf += 32;
919 	     min_length++;
920 	     break;
921 
922 	   case '.':
923 	     last = buf;
924 	     *buf++ = ANY;
925 	     min_length++;
926 	     break;
927 
928 	   case '*':
929 	     if (last == NULL) goto literal_char;
930 	     *last |= STAR;
931 	     min_length--;
932 	     last = NULL;
933 	     break;
934 
935 	   case '+':
936 	     if (last == NULL) goto literal_char;
937 	     *last |= LEAST_ONCE;
938 	     last = NULL;
939 	     break;
940 
941 	   case '?':
942 	     if (last == NULL) goto literal_char;
943 	     *last |= MAYBE_ONCE;
944 	     last = NULL;
945 	     min_length--;
946 	     break;
947 
948 	   literal_char:
949 	   default:
950 	     /* This is to keep track of longest substring */
951 	     min_length++;
952 	     this_max_mm++;
953 #if SLANG_HAS_KANJI_SUPPORT
954 	     if(iskanji(c))
955 	       {
956 		  min_length++;
957 		  this_max_mm++;
958 	       }
959 #endif
960 	     if (last_count + 1 == count)
961 	       {
962 		  if (this_max_mm == 1)
963 		    {
964 		       this_mm_p = buf;
965 		    }
966 		  else if (max_mm < this_max_mm)
967 		    {
968 		       mm_p = this_mm_p;
969 		       max_mm = this_max_mm;
970 		    }
971 	       }
972 	     else
973 	       {
974 		  this_mm_p = buf;
975 		  this_max_mm = 1;
976 	       }
977 
978 	     last_count = count;
979 
980 	     last = buf;
981 	     *buf++ = LITERAL;
982 #if SLANG_HAS_KANJI_SUPPORT
983 	     if(iskanji(c))
984 	       {
985 		  *buf++ = c;
986 		  *buf++ = LITERAL;
987 		  *buf++ = *pat++;
988 	       }
989 	     else
990 #endif
991 	       *buf++ = UPPERCASE(c);
992 	  }
993      }
994    *buf = 0;
995    /* Check for ordinary search */
996    ebuf = buf;
997    buf = reg->buf;
998 
999    if (no_osearch) ordinary_search = 0;
1000    else
1001      {
1002 	ordinary_search = 1;
1003 	while (buf < ebuf)
1004 	  {
1005 	     if (*buf != LITERAL)
1006 	       {
1007 		  ordinary_search = 0;
1008 		  break;
1009 	       }
1010 	     buf += 2;
1011 	  }
1012      }
1013 
1014    reg->osearch = ordinary_search;
1015    reg->must_match_str[15] = 0;
1016    reg->min_length = (min_length > 0) ? (unsigned int) min_length : 0;
1017    if (ordinary_search)
1018      {
1019 	strncpy((char *) reg->must_match_str, (char *) reg->pat, 15);
1020 	reg->must_match = 1;
1021 	return(0);
1022      }
1023    /* check for longest substring of pattern */
1024    reg->must_match = 0;
1025    if ((mm_p == NULL) && (this_mm_p != NULL)) mm_p = this_mm_p;
1026    if (mm_p == NULL)
1027      {
1028 	return (0);
1029      }
1030    n = 15;
1031    pat = reg->must_match_str;
1032    buf = mm_p;
1033    while (n--)
1034      {
1035 	if (*buf++ != LITERAL) break;
1036 	*pat++ = *buf++;
1037      }
1038    *pat = 0;
1039    if (pat != reg->must_match_str) reg->must_match = 1;
1040    return(0);
1041 }
1042 
SLregexp_quote_string(char * re,char * buf,unsigned int buflen)1043 char *SLregexp_quote_string (char *re, char *buf, unsigned int buflen)
1044 {
1045    char ch;
1046    char *b, *bmax;
1047 
1048    if (re == NULL) return NULL;
1049 
1050    b = buf;
1051    bmax = buf + buflen;
1052 
1053    while (b < bmax)
1054      {
1055 	switch (ch = *re++)
1056 	  {
1057 	   case 0:
1058 	     *b = 0;
1059 	     return buf;
1060 
1061 	   case '$':
1062 	   case '\\':
1063 	   case '[':
1064 	   case ']':
1065 	   case '.':
1066 	   case '^':
1067 	   case '*':
1068 	   case '+':
1069 	   case '?':
1070 	     *b++ = '\\';
1071 	    if (b == bmax) break;
1072 	     /* drop */
1073 
1074 	   default:
1075 #if SLANG_HAS_KANJI_SUPPORT
1076 	     if(iskanji(ch))
1077 	       {
1078 		  *b++ = ch;
1079 		  ch = *re++;
1080 	       }
1081 #endif
1082 	     *b++ = ch;
1083 	  }
1084      }
1085    return NULL;
1086 }
1087 
1088 #if 0
1089 #define MAX_EXP 4096
1090 int main(int argc, char **argv)
1091 {
1092    FILE *fp;
1093    char *regexp, *file;
1094    char expbuf[MAX_EXP], buf[512];
1095    SLRegexp_Type reg;
1096 
1097    file = argv[2];
1098    regexp = argv[1];
1099 
1100    if (NULL == (fp = fopen(file, "r")))
1101      {
1102 	fprintf(stderr, "File not open\n");
1103 	return(1);
1104      }
1105 
1106    reg.buf = expbuf;
1107    reg.buf_len = MAX_EXP;
1108    reg.pat = regexp;
1109    reg.case_sensitive = 1;
1110 
1111    if (!regexp_compile(&reg)) while (NULL != fgets(buf, 511, fp))
1112      {
1113 	if (reg.osearch)
1114 	  {
1115 	     if (NULL == strstr(buf, reg.pat)) continue;
1116 	  }
1117 	else
1118 	  {
1119 	     if (reg.must_match && (NULL == strstr(buf, reg.must_match_str))) continue;
1120 	     if (0 == regexp_match(buf, buf + strlen(buf), &reg)) continue;
1121 	  }
1122 
1123 	fputs(buf, stdout);
1124      }
1125    return (0);
1126 }
1127 #endif
1128