1 /************************************************************************
2  * This program is Copyright (C) 1986-1996 by Jonathan Payne.  JOVE is  *
3  * provided to you without charge, and with no warranty.  You may give  *
4  * away copies of JOVE, including sources, provided that this notice is *
5  * included in all the files.                                           *
6  ************************************************************************/
7 
8 /* search package */
9 
10 #include "jove.h"
11 #include "re.h"
12 #include "jctype.h"
13 #include "ask.h"
14 #include "disp.h"
15 #include "fmt.h"
16 #include "marks.h"
17 
18 private bool
19 	do_comp proto((struct RE_block *,int));
20 
21 char
22 	rep_search[128],	/* replace search string */
23 	rep_str[128];		/* contains replacement string */
24 
25 bool
26 	CaseIgnore = NO,	/* VAR: ignore case in search */
27 	WrapScan = NO;		/* VAR: make searches wrap */
28 
29 private ZXchar	REpeekc;
30 private char	*REptr;
31 
32 private ZXchar
REgetc()33 REgetc()
34 {
35 	ZXchar	c;
36 
37 	if ((c = REpeekc) != EOF)
38 		REpeekc = EOF;
39 	else if (*REptr != '\0')
40 		c = ZXC(*REptr++);
41 	else
42 		c = EOF;
43 
44 	return c;
45 }
46 
47 #define STAR	01	/* Match any number of last RE (ORed into other ops). */
48 
49 #define AT_BOL	2	/* ^ */
50 #define AT_EOL	4	/* $ */
51 #define AT_BOW	6	/* \< */
52 #define AT_EOW	8	/* \> */
53 #define OPENP	10	/* \( {chunk number} */
54 #define CLOSEP	12	/* \) {chunk number} */
55 #define CURLYB	14	/* \{ {number of alt, alts } */
56 
57 #define NOSTR	14	/* Codes <= NOSTR can't be *'d. */
58 
59 #define ANYC	(NOSTR+2)		/* . */
60 #define NORMC	(ANYC+2)		/* normal chars {len, char...} */
61 #define CINDC	(NORMC+2)		/* case independent chars {len, char...} */
62 #define ONE_OF	(CINDC+2)		/* [xxx] {bitmask} */
63 #define NONE_OF	(ONE_OF+2)	/* [^xxx] {bitmask} */
64 #define BACKREF	(NONE_OF+2)	/* \# {chunk number} */
65 #define EOP	(BACKREF+2)	/* end of pattern */
66 
67 #define CHAR_MASK	((1 << CHAR_BITS) - 1)	/* byte mask, really */
68 #define ALT_LEN_LEN	2	/* an alt starts with a two-byte length */
69 #define ALT_LEN(p)	(((p)[0] & CHAR_MASK) + (((p)[1] & CHAR_MASK) << CHAR_BITS))
70 
71 /* ONE_OF/NONE_OF is represented as a bit vector.
72  * These symbols parameterize the representation.
73  */
74 
75 #define	SETSIZE		(NCHARS / CHAR_BITS)
76 #define	SETBYTE(c)	((c) / CHAR_BITS)
77 #define	SETBIT(c)	(1 << ((c) % CHAR_BITS))
78 
79 #define NPAR	10	/* [0-9] - 0th is the entire matched string, i.e. & */
80 private char	*comp_ptr,
81 		**alt_p,
82 		**alt_endp;
83 
84 void
REcompile(pattern,re,re_blk)85 REcompile(pattern, re, re_blk)
86 char	*pattern;
87 bool	re;
88 struct RE_block	*re_blk;
89 {
90 	REptr = pattern;
91 	REpeekc = EOF;
92 	comp_ptr = re_blk->r_compbuf;
93 	alt_p = re_blk->r_alternates;
94 	alt_endp = alt_p + NALTS - 1;
95 	*alt_p++ = comp_ptr;
96 	re_blk->r_nparens = 0;
97 	(void) do_comp(re_blk, re ? OKAY_RE : NORM);
98 	*alt_p = NULL;
99 
100 	re_blk->r_anchored = NO;
101 	re_blk->r_firstc = EOF;
102 	/* do a little post processing */
103 	if (re_blk->r_alternates[1] == NULL) {
104 		char	*p;
105 
106 		p = re_blk->r_alternates[0];
107 		for (;;) {
108 			switch (*p) {
109 			case OPENP:
110 			case CLOSEP:
111 				p += 2;
112 				continue;
113 
114 			case AT_BOW:
115 			case AT_EOW:
116 				p += 1;
117 				continue;
118 
119 			case AT_BOL:
120 				re_blk->r_anchored = YES;
121 				/* don't set firstc -- won't work */
122 				break;
123 
124 			case NORMC:
125 			case CINDC:
126 				re_blk->r_firstc = CharUpcase(p[2]);
127 				break;
128 
129 			default:
130 				break;
131 			}
132 			break;
133 		}
134 	}
135 }
136 
137 /* compile the pattern into an internal code */
138 
139 private bool
do_comp(re_blk,kind)140 do_comp(re_blk, kind)
141 struct RE_block	*re_blk;
142 int	kind;
143 {
144 	char	*this_verb,
145 		*prev_verb,
146 		*start_p,
147 		*comp_endp;
148 	int	parens[NPAR],
149 		*parenp,
150 		outer_max_paren = -1;
151 	ZXchar	c;
152 	bool	done_cb = NO;
153 
154 	parenp = parens;
155 	this_verb = NULL;
156 	comp_endp = &re_blk->r_compbuf[COMPSIZE - 6];
157 
158 	/* wrap the whole expression around (implied) parens */
159 	if (kind != IN_CB) {
160 		if (re_blk->r_nparens >= NPAR)
161 				complain("Too many ('s; max is %d.", NPAR);
162 		*comp_ptr++ = OPENP;
163 		*parenp++ = *comp_ptr++ = re_blk->r_nparens++;
164 	}
165 
166 	start_p = comp_ptr;
167 
168 	while ((c = REgetc()) != EOF) {
169 		if (comp_ptr > comp_endp) {
170 toolong:
171 			complain("Search string too long/complex.");
172 		}
173 		prev_verb = this_verb;
174 		this_verb = comp_ptr;
175 
176 		/* The following test ought to be
177 		 *	kind == NORM && c != '\\'
178 		 * but Jon likes to put ^, $, and \ in i-searches.
179 		 * Don't tell him, but $ only sort of works. -- DHR
180 		 */
181 		if (kind == NORM && strchr("^$\\", c) == NULL)
182 			goto defchar;
183 		switch (c) {
184 		case '\\':
185 			switch (c = REgetc()) {
186 			case EOF:
187 				complain("[Premature end of pattern]");
188 				/*NOTREACHED*/
189 
190 			case '{':
191 			    {
192 				char	*altcntp;		/* alternate count */
193 				int
194 					init_paren = re_blk->r_nparens,
195 					max_paren = -1;
196 
197 				*comp_ptr++ = CURLYB;
198 				altcntp = comp_ptr;
199 				*comp_ptr++ = 0;	/* initialize alt-count */
200 				for (;;) {
201 					char	*comp_len = comp_ptr;
202 					bool	done;
203 					long	len;
204 
205 					comp_ptr += ALT_LEN_LEN;
206 					re_blk->r_nparens = init_paren;
207 					done = do_comp(re_blk, IN_CB);
208 
209 					/* We demand that each alternate has the same number
210 					 * of parens because we currently have no mechanism to
211 					 * set the matching strings to a meaningful default.
212 					 */
213 					if (max_paren == -1)
214 						max_paren = re_blk->r_nparens;
215 					if (max_paren != re_blk->r_nparens)
216 						complain("[each alternate must have the same number of \\( \\)]");
217 
218 					len = comp_ptr - comp_len;
219 					comp_len[0] = (char) len;	/* truncate */
220 					comp_len[1] = (char) (len >> CHAR_BITS);	/* truncate */
221 					(*altcntp) += 1;
222 					if (done)
223 						break;
224 				}
225 				break;
226 			    }
227 
228 			case '}':
229 				if (kind != IN_CB)
230 					complain("Unexpected \\}.");
231 				done_cb = YES;
232 				goto outahere;
233 
234 			case '(':
235 				if (re_blk->r_nparens >= NPAR)
236 					complain("Too many ('s; max is %d.", NPAR);
237 				*comp_ptr++ = OPENP;
238 				*parenp++ = *comp_ptr++ = re_blk->r_nparens++;
239 				break;
240 
241 			case ')':
242 				if (parenp == parens)
243 					complain("Too many )'s.");
244 				*comp_ptr++ = CLOSEP;
245 				*comp_ptr++ = *--parenp;
246 				break;
247 
248 			case '|':
249 				if (kind == IN_CB)
250 					goto outahere;
251 				if (alt_p >= alt_endp)
252 					complain("Too many alternates; max %d.", NALTS);
253 				/* close off previous alternate */
254 				*comp_ptr++ = CLOSEP;
255 				*comp_ptr++ = *--parenp;
256 				if (parenp != parens)
257 					complain("Unmatched \\(.");
258 				*comp_ptr++ = EOP;
259 
260 				/* We demand that each alternate has the same number
261 				 * of parens because we currently have no mechanism to
262 				 * set the matching strings to a meaningful default.
263 				 */
264 				if (outer_max_paren == -1)
265 					outer_max_paren = re_blk->r_nparens;
266 				if (outer_max_paren != re_blk->r_nparens)
267 					complain("[each alternate must have the same number of \\( \\)]");
268 
269 				/* start a new alt */
270 				*alt_p++ = comp_ptr;
271 				re_blk->r_nparens = 0;
272 				*comp_ptr++ = OPENP;
273 				*parenp++ = *comp_ptr++ = re_blk->r_nparens++;
274 				start_p = comp_ptr;
275 				break;
276 
277 			case '1':
278 			case '2':
279 			case '3':
280 			case '4':
281 			case '5':
282 			case '6':
283 			case '7':
284 			case '8':
285 			case '9':
286 				*comp_ptr++ = BACKREF;
287 				*comp_ptr++ = c - '0';
288 				break;
289 
290 			case '<':
291 				*comp_ptr++ = AT_BOW;
292 				break;
293 
294 			case '>':
295 				*comp_ptr++ = AT_EOW;
296 				break;
297 
298 			default:
299 				goto defchar;
300 			}
301 			break;
302 
303 		case '.':
304 			*comp_ptr++ = ANYC;
305 			break;
306 
307 		case '^':
308 			if (comp_ptr == start_p) {
309 				*comp_ptr++ = AT_BOL;
310 				break;
311 			}
312 			goto defchar;
313 
314 		case '$':
315 			if ((REpeekc = REgetc()) != EOF && REpeekc != '\\')
316 				goto defchar;
317 			*comp_ptr++ = AT_EOL;
318 			break;
319 
320 		case '[':
321 			*comp_ptr++ = ONE_OF;
322 			if (comp_ptr + SETSIZE >= comp_endp)
323 				goto toolong;
324 			byte_zero(comp_ptr, (size_t) SETSIZE);
325 			c = REgetc();
326 			if (c == '^') {
327 				*this_verb = NONE_OF;
328 				c = REgetc();
329 			}
330 			do {
331 				if (c == EOF)
332 					break;
333 				if (c == '\\') {
334 					c = REgetc();
335 					if (c == EOF)
336 						break;
337 				}
338 				if ((REpeekc = REgetc()) == '-') {
339 					/* possibly a range */
340 					ZXchar	i = c;
341 
342 					REpeekc = EOF;	/* discard '-' */
343 					c = REgetc();
344 					if (c == EOF)
345 						break;
346 					if (c == ']') {
347 						/* not a range after all */
348 						REpeekc = c;	/* push back ']' */
349 						c = '-';	/* recycle '-' */
350 						comp_ptr[SETBYTE(i)] |= SETBIT(i);	/* handle initial char */
351 					} else {
352 						/* really a range: add members up to c */
353 						if (c == '\\') {
354 							c = REgetc();
355 							if (c == EOF)
356 								break;
357 						}
358 						while (i < c) {
359 							comp_ptr[SETBYTE(i)] |= SETBIT(i);
360 							i += 1;
361 						}
362 					}
363 				}
364 				comp_ptr[SETBYTE(c)] |= SETBIT(c);
365 				c = REgetc();
366 			} while (c != ']');
367 			if (c == EOF)
368 				complain("Missing ].");
369 			comp_ptr += SETSIZE;
370 			break;
371 
372 		case '*':
373 			if (prev_verb == NULL || *prev_verb <= NOSTR || (*prev_verb&STAR)!=0)
374 				goto defchar;
375 
376 			if (*prev_verb == NORMC || *prev_verb == CINDC) {
377 				char	lastc = comp_ptr[-1];
378 
379 				/* The * operator applies only to the
380 				 * previous character.  Since we were
381 				 * building a string-matching command
382 				 * (NORMC or CINDC), we must split it
383 				 * up and work with the last character.
384 				 *
385 				 * Note that the STARed versions of these
386 				 * commands do not operate on strings, and
387 				 * so do not need or have character counts.
388 				 */
389 
390 				if (prev_verb[1] == 1) {
391 					/* Only one char in string:
392 					 * delete old command.
393 					 */
394 					this_verb = prev_verb;
395 				} else {
396 					/* Several chars in string:
397 					 * strip off the last.
398 					 * New verb is derived from old.
399 					 */
400 					prev_verb[1] -= 1;
401 					this_verb -= 1;
402 					*this_verb = *prev_verb;
403 				}
404 				comp_ptr = this_verb + 1;
405 				*comp_ptr++ = lastc;
406 			} else {
407 				/* This command is just the previous one,
408 				 * whose verb we will modify.
409 				 */
410 				this_verb = prev_verb;
411 			}
412 			*this_verb |= STAR;
413 			break;
414 		default:
415 defchar:
416 			if (prev_verb == NULL
417 			|| !(*prev_verb == NORMC || *prev_verb == CINDC))
418 			{
419 				/* create new string command */
420 				*comp_ptr++ = (CaseIgnore) ? CINDC : NORMC;
421 				*comp_ptr++ = 0;
422 			} else {
423 				/* merge this into previous string command */
424 				this_verb = prev_verb;
425 			}
426 			this_verb[1] += 1;
427 			*comp_ptr++ = c;
428 			break;
429 		}
430 	}
431 outahere:
432 
433 	/* End of pattern, let's do some error checking. */
434 	if (kind != IN_CB) {
435 		*comp_ptr++ = CLOSEP;
436 		*comp_ptr++ = *--parenp;
437 	}
438 	if (parenp != parens)
439 		complain("Unmatched \\(.");
440 	if (kind == IN_CB && c == EOF)	/* end of pattern with missing \}. */
441 		complain("Missing \\}.");
442 	*comp_ptr++ = EOP;
443 
444 	/* We demand that each alternate has the same number
445 	 * of parens because we currently have no mechanism to
446 	 * set the matching strings to a meaningful default.
447 	 */
448 	if (outer_max_paren != -1 && outer_max_paren != re_blk->r_nparens)
449 		complain("[each alternate must have the same number of \\( \\)]");
450 
451 	return done_cb;
452 }
453 
454 private char	*pstrtlst[NPAR],	/* index into re_blk->r_lbuf */
455 		*pendlst[NPAR],
456 		*REbolp,	/* begining-of-line pointer */
457 		*locrater,	/* roof of last substitution */
458 		*loc1,	/* start of matched text */
459 		*loc2;	/* roof of matched text */
460 
461 int	REbom,		/* beginning and end columns of match */
462 	REeom,
463 	REdelta;	/* increase in line length due to last re_dosub */
464 
465 private bool
backref(n,linep)466 backref(n, linep)
467 int	n;
468 register char	*linep;
469 {
470 	register char	*backsp,
471 			*backep;
472 
473 	backsp = pstrtlst[n];
474 	backep = pendlst[n];
475 	while (*backsp++ == *linep++)
476 		if (backsp >= backep)
477 			return YES;
478 	return NO;
479 }
480 
481 private bool
member(comp_ptr,c,af)482 member(comp_ptr, c, af)
483 register char	*comp_ptr;
484 register ZXchar	c;
485 bool		af;
486 {
487 	return c != '\0' && ((comp_ptr[SETBYTE(c)] & SETBIT(c))? af : !af);
488 }
489 
490 private bool
REmatch(linep,comp_ptr)491 REmatch(linep, comp_ptr)
492 register char	*linep,
493 		*comp_ptr;
494 {
495 	char	*first_p;
496 	register int	n;
497 
498 	for (;;) switch (*comp_ptr++) {
499 	case NORMC:
500 		n = *comp_ptr++;
501 		while (--n >= 0)
502 			if (*linep++ != *comp_ptr++)
503 				return NO;
504 		continue;
505 
506 	case CINDC:	/* case independent comparison */
507 		n = *comp_ptr++;
508 		while (--n >= 0)
509 			if (!cind_eq(*linep++, *comp_ptr++))
510 				return NO;
511 		continue;
512 
513 	case EOP:
514 		loc2 = linep;
515 		REeom = (loc2 - REbolp);
516 		return YES;	/* Success! */
517 
518 	case AT_BOL:
519 		if (linep == REbolp && linep != locrater)
520 			continue;
521 		return NO;
522 
523 	case AT_EOL:
524 		if (*linep == '\0')
525 			continue;
526 		return NO;
527 
528 	case ANYC:
529 		if (*linep++ != '\0')
530 			continue;
531 		return NO;
532 
533 	case AT_BOW:
534 		if (linep != locrater && jisident(*linep)
535 		&& (linep == REbolp || !jisident(linep[-1])))
536 			continue;
537 		return NO;
538 
539 	case AT_EOW:
540 		if (linep != locrater && (*linep == '\0' || !jisident(*linep))
541 		&& (linep != REbolp && jisident(linep[-1])))
542 			continue;
543 		return NO;
544 
545 	case ONE_OF:
546 	case NONE_OF:
547 		if (member(comp_ptr, ZXC(*linep++), comp_ptr[-1] == ONE_OF)) {
548 			comp_ptr += SETSIZE;
549 			continue;
550 		}
551 		return NO;
552 
553 	case OPENP:
554 		pstrtlst[*comp_ptr++] = linep;
555 		continue;
556 
557 	case CLOSEP:
558 		pendlst[*comp_ptr++] = linep;
559 		continue;
560 
561 	case BACKREF:
562 		if (pstrtlst[n = *comp_ptr++] == NULL) {
563 			s_mess("\\%d was not specified.", n + 1);
564 		} else if (backref(n, linep)) {
565 			linep += pendlst[n] - pstrtlst[n];
566 			continue;
567 		}
568 		return NO;
569 
570 	case CURLYB:
571 	    {
572 		int	altcnt = *comp_ptr++;
573 		bool	any = NO;
574 
575 		while (--altcnt >= 0) {
576 			if (!any)
577 				any = REmatch(linep, comp_ptr + ALT_LEN_LEN);
578 			comp_ptr += ALT_LEN(comp_ptr);
579 		}
580 		if (!any)
581 			return NO;
582 		linep = loc2;
583 		continue;
584 	    }
585 
586 	case ANYC | STAR:
587 		first_p = linep;
588 		do ; while (*linep++ != '\0');
589 		goto star;
590 
591 	case NORMC | STAR:
592 		first_p = linep;
593 		do ; while (*comp_ptr == *linep++);
594 		comp_ptr += 1;
595 		goto star;
596 
597 	case CINDC | STAR:
598 		first_p = linep;
599 		do ; while (cind_eq(*comp_ptr, *linep++));
600 		comp_ptr += 1;
601 		goto star;
602 
603 	case ONE_OF | STAR:
604 	case NONE_OF | STAR:
605 		first_p = linep;
606 		do ; while (member(comp_ptr, ZXC(*linep++), comp_ptr[-1] == (ONE_OF | STAR)));
607 		comp_ptr += SETSIZE;
608 		/* fall through */
609 star:
610 		/* linep points *after* first unmatched char.
611 		 * first_p points at where starred element started matching.
612 		 */
613 		while (--linep > first_p) {
614 			if ((*comp_ptr != NORMC || *linep == comp_ptr[2])
615 			&& REmatch(linep, comp_ptr))
616 				return YES;
617 		}
618 		continue;
619 
620 	case BACKREF | STAR:
621 		first_p = linep;
622 		n = *comp_ptr++;
623 		while (backref(n, linep))
624 			linep += pendlst[n] - pstrtlst[n];
625 		while (linep > first_p) {
626 			if (REmatch(linep, comp_ptr))
627 				return YES;
628 			linep -= pendlst[n] - pstrtlst[n];
629 		}
630 		continue;
631 
632 	default:
633 		complain("RE error match (%d).", comp_ptr[-1]);
634 	}
635 	/* NOTREACHED */
636 }
637 
638 private void
REreset()639 REreset()
640 {
641 	register int	i;
642 
643 	for (i = 0; i < NPAR; i++)
644 		pstrtlst[i] = pendlst[i] = NULL;
645 }
646 
647 /* Index LINE at OFFSET.  If lbuf_okay is YES it's okay to use linebuf
648    if LINE is the current line.  This should save lots of time in things
649    like paren matching in LISP mode.  Saves all that copying from linebuf
650    to a local buffer.  substitute() is the guy who calls re_lindex with
651    lbuf_okay as NO, since the substitution gets placed in linebuf ...
652    doesn't work too well when the source and destination strings are the
653    same.  I hate all these arguments!
654 
655    This code is cumbersome, repetetive for reasons of efficiency.  Fast
656    search is a must as far as I am concerned. */
657 
658 bool
re_lindex(line,offset,dir,re_blk,lbuf_okay,crater)659 re_lindex(line, offset, dir, re_blk, lbuf_okay, crater)
660 LinePtr	line;
661 int	offset;
662 int	dir;
663 struct RE_block	*re_blk;
664 bool	lbuf_okay;
665 int	crater;	/* offset of previous substitute (or -1) */
666 {
667 	register char	*p;
668 	register ZXchar	firstc = re_blk->r_firstc;
669 	register bool	anchored = re_blk->r_anchored;
670 	char		**alts = re_blk->r_alternates;
671 
672 	REreset();
673 	if (lbuf_okay) {
674 		REbolp = lbptr(line);
675 		if (offset == -1)
676 			offset = strlen(REbolp);	/* arg! */
677 	} else {
678 		REbolp = ltobuf(line, re_blk->r_lbuf);
679 		if (offset == -1) {	/* Reverse search, find end of line. */
680 			offset = Jr_Len;	/* Just Read Len. */
681 		}
682 	}
683 
684 	if (anchored) {
685 		if (dir == FORWARD) {
686 			if (offset != 0 || crater != -1)
687 				return NO;
688 		} else {
689 			offset = 0;
690 		}
691 	}
692 
693 	p = REbolp + offset;
694 	locrater = REbolp + crater;
695 
696 	if (firstc != EOF) {
697 		char	*first_alt = *alts;
698 
699 		if (dir == FORWARD) {
700 			while (CharUpcase(*p) != firstc || !REmatch(p, first_alt))
701 				if (*p++ == '\0')
702 					return NO;
703 		} else {
704 			while (CharUpcase(*p) != firstc || !REmatch(p, first_alt))
705 				if (--p < REbolp)
706 					return NO;
707 		}
708 	} else {
709 		for (;;) {
710 			register char	**altp = alts;
711 
712 			while (*altp != NULL)
713 				if (REmatch(p, *altp++))
714 					goto success;
715 			if (anchored
716 			|| (dir == FORWARD ? *p++ == '\0' : --p < REbolp))
717 				return NO;
718 		}
719 success:;
720 	}
721 	loc1 = p;
722 	REbom = loc1 - REbolp;
723 
724 	return YES;
725 }
726 
727 bool	okay_wrap = NO;	/* Do a wrap search ... not when we're
728 			   parsing errors ... */
729 
730 Bufpos *
dosearch(pattern,dir,re)731 dosearch(pattern, dir, re)
732 char	*pattern;
733 int	dir;
734 bool	re;
735 {
736 	Bufpos	*pos;
737 	struct RE_block	re_blk;		/* global re-compiled buffer */
738 
739 	if (bobp() && eobp())	/* Can't match!  There's no buffer. */
740 		return NULL;
741 
742 	REcompile(pattern, re, &re_blk);
743 
744 	pos = docompiled(dir, &re_blk);
745 	return pos;
746 }
747 
748 Bufpos *
docompiled(dir,re_blk)749 docompiled(dir, re_blk)
750 int dir;
751 register struct RE_block	*re_blk;
752 {
753 	static Bufpos	ret;
754 	register LinePtr	lp;
755 	register int	offset;
756 	bool	we_wrapped = NO;
757 
758 	lsave();
759 	/* Search now lsave()'s so it doesn't make any assumptions on
760 	   whether the the contents of curline/curchar are in linebuf.
761 	   Nowhere does search write all over linebuf.  However, we have to
762 	   be careful about what calls we make here, because many of them
763 	   assume (and rightly so) that curline is in linebuf. */
764 
765 	lp = curline;
766 	offset = curchar;
767 	if (dir == BACKWARD) {
768 		if (bobp()) {
769 			if (okay_wrap && WrapScan)
770 				goto doit;
771 			return NULL;
772 		}
773 		/* here we simulate BackChar() */
774 		if (bolp()) {
775 			lp = lp->l_prev;
776 			offset = length(lp);
777 		} else {
778 			offset -= 1;
779 		}
780 	} else if (dir==FORWARD && lbptr(lp)[offset]=='\0' && !lastp(lp)) {
781 		lp = lp->l_next;
782 		offset = 0;
783 	}
784 
785 	do {
786 		if (re_lindex(lp, offset, dir, re_blk, YES, -1))
787 			break;
788 doit:
789 		lp = (dir == FORWARD) ? lp->l_next : lp->l_prev;
790 		if (lp == NULL) {
791 			if (okay_wrap && WrapScan) {
792 				lp = (dir == FORWARD) ?
793 				     curbuf->b_first : curbuf->b_last;
794 				we_wrapped = YES;
795 			} else
796 				 break;
797 		}
798 		if (dir == FORWARD)
799 			offset = 0;
800 		else
801 			offset = -1;	/* signals re_lindex ... */
802 	} while (lp != curline);
803 
804 	if (lp == curline && we_wrapped)
805 		lp = NULL;
806 	if (lp == NULL)
807 		return NULL;
808 	ret.p_line = lp;
809 	ret.p_char = (dir == FORWARD) ? REeom : REbom;
810 	return &ret;
811 }
812 
813 private char *
insert(off,endp,which)814 insert(off, endp, which)
815 char	*off,
816 	*endp;
817 int which;
818 {
819 	register char	*pp;
820 	register int	n;
821 
822 	n = pendlst[which] - pstrtlst[which];
823 	pp = pstrtlst[which];
824 	while (--n >= 0) {
825 		*off++ = *pp++;
826 		if (off >= endp)
827 			len_error(JMP_ERROR);
828 	}
829 	return off;
830 }
831 
832 /* Perform the substitution.  If DELP is YES the matched string is
833    deleted, i.e., the substitution string is not inserted. */
834 
835 void
re_dosub(re_blk,tobuf,delp)836 re_dosub(re_blk, tobuf, delp)
837 struct RE_block	*re_blk;
838 char	*tobuf;
839 bool delp;
840 {
841 	register char	*tp,
842 			*rp;
843 	char	*endp;
844 
845 	tp = tobuf;
846 	endp = tp + LBSIZE;
847 	rp = re_blk->r_lbuf;
848 
849 	while (rp < loc1)
850 		*tp++ = *rp++;
851 
852 	if (!delp) {
853 		register char	c;
854 
855 		rp = rep_str;
856 		while ((c = *rp++) != '\0') {
857 			if (c == '\\') {
858 				c = *rp++;
859 				if (c >= '0' && c < re_blk->r_nparens + '0') {
860 					tp = insert(tp, endp, c - '0');
861 					continue;
862 				}
863 				if (c == '\0') {
864 					/* treat \ at the end as if it were \\ */
865 					c = '\\';
866 					rp--;	/* be sure to hit again */
867 				}
868 			}
869 			*tp++ = c;
870 			if (tp >= endp)
871 				len_error(JMP_ERROR);
872 		}
873 	}
874 	rp = loc2;
875 	REdelta = -REeom;
876 	REeom = tp - tobuf;
877 	REdelta += REeom;
878 	if (loc1==rp && *rp!='\0') {
879 		/* Skip an extra character if the matched text was a null
880 		 * string, but don't skip over the end of line.  This is to
881 		 * prevent an infinite number of replacements in the same
882 		 * position, e.g., replace "^" with "".
883 		 */
884 		REeom += 1;
885 	}
886 	loc2 = re_blk->r_lbuf + REeom;
887 	while ((*tp++ = *rp++) != '\0')
888 		if (tp >= endp)
889 			len_error(JMP_ERROR);
890 }
891 
892 void
putmatch(which,buf,size)893 putmatch(which, buf, size)
894 int which;
895 char	*buf;
896 size_t size;
897 {
898 	*(insert(buf, buf + size, which)) = '\0';
899 }
900 
901 void
RErecur()902 RErecur()
903 {
904 	char	repbuf[sizeof rep_str];
905 	Mark	*m = MakeMark(curline, REbom);
906 
907 	message("Type ^X ^C to continue with query replace.");
908 
909 	byte_copy(rep_str, repbuf, sizeof rep_str);
910 	Recur();
911 	byte_copy(repbuf, rep_str, sizeof rep_str);
912 	if (!is_an_arg())
913 		ToMark(m);
914 	DelMark(m);
915 }
916 
917 /* Do we match PATTERN at OFFSET in BUF? */
918 
919 bool
LookingAt(pattern,buf,offset)920 LookingAt(pattern, buf, offset)
921 char	*pattern,
922 	*buf;
923 int offset;
924 {
925 	struct RE_block	re_blk;
926 	char	**alt = re_blk.r_alternates;
927 
928 	REcompile(pattern, YES, &re_blk);
929 	REreset();
930 	locrater = NULL;
931 	REbolp = buf;
932 
933 	while (*alt)
934 		if (REmatch(buf + offset, *alt++))
935 			return YES;
936 	return NO;
937 }
938 
939 bool
look_at(expr)940 look_at(expr)
941 char	*expr;
942 {
943 	struct RE_block	re_blk;
944 
945 	REcompile(expr, NO, &re_blk);
946 	REreset();
947 	locrater = NULL;
948 	REbolp = linebuf;
949 	if (REmatch(linebuf + curchar, re_blk.r_alternates[0]))
950 		return YES;
951 	return NO;
952 }
953