1 /************************************************************************
2 * This program is Copyright (C) 1986-1996 by Jonathan Payne. JOVE is *
3 * provided to you without charge, and with no warranty. You may give *
4 * away copies of JOVE, including sources, provided that this notice is *
5 * included in all the files. *
6 ************************************************************************/
7
8 /* search package */
9
10 #include "jove.h"
11 #include "re.h"
12 #include "jctype.h"
13 #include "ask.h"
14 #include "disp.h"
15 #include "fmt.h"
16 #include "marks.h"
17
18 private bool
19 do_comp proto((struct RE_block *,int));
20
21 char
22 rep_search[128], /* replace search string */
23 rep_str[128]; /* contains replacement string */
24
25 bool
26 CaseIgnore = NO, /* VAR: ignore case in search */
27 WrapScan = NO; /* VAR: make searches wrap */
28
29 private ZXchar REpeekc;
30 private char *REptr;
31
32 private ZXchar
REgetc()33 REgetc()
34 {
35 ZXchar c;
36
37 if ((c = REpeekc) != EOF)
38 REpeekc = EOF;
39 else if (*REptr != '\0')
40 c = ZXC(*REptr++);
41 else
42 c = EOF;
43
44 return c;
45 }
46
47 #define STAR 01 /* Match any number of last RE (ORed into other ops). */
48
49 #define AT_BOL 2 /* ^ */
50 #define AT_EOL 4 /* $ */
51 #define AT_BOW 6 /* \< */
52 #define AT_EOW 8 /* \> */
53 #define OPENP 10 /* \( {chunk number} */
54 #define CLOSEP 12 /* \) {chunk number} */
55 #define CURLYB 14 /* \{ {number of alt, alts } */
56
57 #define NOSTR 14 /* Codes <= NOSTR can't be *'d. */
58
59 #define ANYC (NOSTR+2) /* . */
60 #define NORMC (ANYC+2) /* normal chars {len, char...} */
61 #define CINDC (NORMC+2) /* case independent chars {len, char...} */
62 #define ONE_OF (CINDC+2) /* [xxx] {bitmask} */
63 #define NONE_OF (ONE_OF+2) /* [^xxx] {bitmask} */
64 #define BACKREF (NONE_OF+2) /* \# {chunk number} */
65 #define EOP (BACKREF+2) /* end of pattern */
66
67 #define CHAR_MASK ((1 << CHAR_BITS) - 1) /* byte mask, really */
68 #define ALT_LEN_LEN 2 /* an alt starts with a two-byte length */
69 #define ALT_LEN(p) (((p)[0] & CHAR_MASK) + (((p)[1] & CHAR_MASK) << CHAR_BITS))
70
71 /* ONE_OF/NONE_OF is represented as a bit vector.
72 * These symbols parameterize the representation.
73 */
74
75 #define SETSIZE (NCHARS / CHAR_BITS)
76 #define SETBYTE(c) ((c) / CHAR_BITS)
77 #define SETBIT(c) (1 << ((c) % CHAR_BITS))
78
79 #define NPAR 10 /* [0-9] - 0th is the entire matched string, i.e. & */
80 private char *comp_ptr,
81 **alt_p,
82 **alt_endp;
83
84 void
REcompile(pattern,re,re_blk)85 REcompile(pattern, re, re_blk)
86 char *pattern;
87 bool re;
88 struct RE_block *re_blk;
89 {
90 REptr = pattern;
91 REpeekc = EOF;
92 comp_ptr = re_blk->r_compbuf;
93 alt_p = re_blk->r_alternates;
94 alt_endp = alt_p + NALTS - 1;
95 *alt_p++ = comp_ptr;
96 re_blk->r_nparens = 0;
97 (void) do_comp(re_blk, re ? OKAY_RE : NORM);
98 *alt_p = NULL;
99
100 re_blk->r_anchored = NO;
101 re_blk->r_firstc = EOF;
102 /* do a little post processing */
103 if (re_blk->r_alternates[1] == NULL) {
104 char *p;
105
106 p = re_blk->r_alternates[0];
107 for (;;) {
108 switch (*p) {
109 case OPENP:
110 case CLOSEP:
111 p += 2;
112 continue;
113
114 case AT_BOW:
115 case AT_EOW:
116 p += 1;
117 continue;
118
119 case AT_BOL:
120 re_blk->r_anchored = YES;
121 /* don't set firstc -- won't work */
122 break;
123
124 case NORMC:
125 case CINDC:
126 re_blk->r_firstc = CharUpcase(p[2]);
127 break;
128
129 default:
130 break;
131 }
132 break;
133 }
134 }
135 }
136
137 /* compile the pattern into an internal code */
138
139 private bool
do_comp(re_blk,kind)140 do_comp(re_blk, kind)
141 struct RE_block *re_blk;
142 int kind;
143 {
144 char *this_verb,
145 *prev_verb,
146 *start_p,
147 *comp_endp;
148 int parens[NPAR],
149 *parenp,
150 outer_max_paren = -1;
151 ZXchar c;
152 bool done_cb = NO;
153
154 parenp = parens;
155 this_verb = NULL;
156 comp_endp = &re_blk->r_compbuf[COMPSIZE - 6];
157
158 /* wrap the whole expression around (implied) parens */
159 if (kind != IN_CB) {
160 if (re_blk->r_nparens >= NPAR)
161 complain("Too many ('s; max is %d.", NPAR);
162 *comp_ptr++ = OPENP;
163 *parenp++ = *comp_ptr++ = re_blk->r_nparens++;
164 }
165
166 start_p = comp_ptr;
167
168 while ((c = REgetc()) != EOF) {
169 if (comp_ptr > comp_endp) {
170 toolong:
171 complain("Search string too long/complex.");
172 }
173 prev_verb = this_verb;
174 this_verb = comp_ptr;
175
176 /* The following test ought to be
177 * kind == NORM && c != '\\'
178 * but Jon likes to put ^, $, and \ in i-searches.
179 * Don't tell him, but $ only sort of works. -- DHR
180 */
181 if (kind == NORM && strchr("^$\\", c) == NULL)
182 goto defchar;
183 switch (c) {
184 case '\\':
185 switch (c = REgetc()) {
186 case EOF:
187 complain("[Premature end of pattern]");
188 /*NOTREACHED*/
189
190 case '{':
191 {
192 char *altcntp; /* alternate count */
193 int
194 init_paren = re_blk->r_nparens,
195 max_paren = -1;
196
197 *comp_ptr++ = CURLYB;
198 altcntp = comp_ptr;
199 *comp_ptr++ = 0; /* initialize alt-count */
200 for (;;) {
201 char *comp_len = comp_ptr;
202 bool done;
203 long len;
204
205 comp_ptr += ALT_LEN_LEN;
206 re_blk->r_nparens = init_paren;
207 done = do_comp(re_blk, IN_CB);
208
209 /* We demand that each alternate has the same number
210 * of parens because we currently have no mechanism to
211 * set the matching strings to a meaningful default.
212 */
213 if (max_paren == -1)
214 max_paren = re_blk->r_nparens;
215 if (max_paren != re_blk->r_nparens)
216 complain("[each alternate must have the same number of \\( \\)]");
217
218 len = comp_ptr - comp_len;
219 comp_len[0] = (char) len; /* truncate */
220 comp_len[1] = (char) (len >> CHAR_BITS); /* truncate */
221 (*altcntp) += 1;
222 if (done)
223 break;
224 }
225 break;
226 }
227
228 case '}':
229 if (kind != IN_CB)
230 complain("Unexpected \\}.");
231 done_cb = YES;
232 goto outahere;
233
234 case '(':
235 if (re_blk->r_nparens >= NPAR)
236 complain("Too many ('s; max is %d.", NPAR);
237 *comp_ptr++ = OPENP;
238 *parenp++ = *comp_ptr++ = re_blk->r_nparens++;
239 break;
240
241 case ')':
242 if (parenp == parens)
243 complain("Too many )'s.");
244 *comp_ptr++ = CLOSEP;
245 *comp_ptr++ = *--parenp;
246 break;
247
248 case '|':
249 if (kind == IN_CB)
250 goto outahere;
251 if (alt_p >= alt_endp)
252 complain("Too many alternates; max %d.", NALTS);
253 /* close off previous alternate */
254 *comp_ptr++ = CLOSEP;
255 *comp_ptr++ = *--parenp;
256 if (parenp != parens)
257 complain("Unmatched \\(.");
258 *comp_ptr++ = EOP;
259
260 /* We demand that each alternate has the same number
261 * of parens because we currently have no mechanism to
262 * set the matching strings to a meaningful default.
263 */
264 if (outer_max_paren == -1)
265 outer_max_paren = re_blk->r_nparens;
266 if (outer_max_paren != re_blk->r_nparens)
267 complain("[each alternate must have the same number of \\( \\)]");
268
269 /* start a new alt */
270 *alt_p++ = comp_ptr;
271 re_blk->r_nparens = 0;
272 *comp_ptr++ = OPENP;
273 *parenp++ = *comp_ptr++ = re_blk->r_nparens++;
274 start_p = comp_ptr;
275 break;
276
277 case '1':
278 case '2':
279 case '3':
280 case '4':
281 case '5':
282 case '6':
283 case '7':
284 case '8':
285 case '9':
286 *comp_ptr++ = BACKREF;
287 *comp_ptr++ = c - '0';
288 break;
289
290 case '<':
291 *comp_ptr++ = AT_BOW;
292 break;
293
294 case '>':
295 *comp_ptr++ = AT_EOW;
296 break;
297
298 default:
299 goto defchar;
300 }
301 break;
302
303 case '.':
304 *comp_ptr++ = ANYC;
305 break;
306
307 case '^':
308 if (comp_ptr == start_p) {
309 *comp_ptr++ = AT_BOL;
310 break;
311 }
312 goto defchar;
313
314 case '$':
315 if ((REpeekc = REgetc()) != EOF && REpeekc != '\\')
316 goto defchar;
317 *comp_ptr++ = AT_EOL;
318 break;
319
320 case '[':
321 *comp_ptr++ = ONE_OF;
322 if (comp_ptr + SETSIZE >= comp_endp)
323 goto toolong;
324 byte_zero(comp_ptr, (size_t) SETSIZE);
325 c = REgetc();
326 if (c == '^') {
327 *this_verb = NONE_OF;
328 c = REgetc();
329 }
330 do {
331 if (c == EOF)
332 break;
333 if (c == '\\') {
334 c = REgetc();
335 if (c == EOF)
336 break;
337 }
338 if ((REpeekc = REgetc()) == '-') {
339 /* possibly a range */
340 ZXchar i = c;
341
342 REpeekc = EOF; /* discard '-' */
343 c = REgetc();
344 if (c == EOF)
345 break;
346 if (c == ']') {
347 /* not a range after all */
348 REpeekc = c; /* push back ']' */
349 c = '-'; /* recycle '-' */
350 comp_ptr[SETBYTE(i)] |= SETBIT(i); /* handle initial char */
351 } else {
352 /* really a range: add members up to c */
353 if (c == '\\') {
354 c = REgetc();
355 if (c == EOF)
356 break;
357 }
358 while (i < c) {
359 comp_ptr[SETBYTE(i)] |= SETBIT(i);
360 i += 1;
361 }
362 }
363 }
364 comp_ptr[SETBYTE(c)] |= SETBIT(c);
365 c = REgetc();
366 } while (c != ']');
367 if (c == EOF)
368 complain("Missing ].");
369 comp_ptr += SETSIZE;
370 break;
371
372 case '*':
373 if (prev_verb == NULL || *prev_verb <= NOSTR || (*prev_verb&STAR)!=0)
374 goto defchar;
375
376 if (*prev_verb == NORMC || *prev_verb == CINDC) {
377 char lastc = comp_ptr[-1];
378
379 /* The * operator applies only to the
380 * previous character. Since we were
381 * building a string-matching command
382 * (NORMC or CINDC), we must split it
383 * up and work with the last character.
384 *
385 * Note that the STARed versions of these
386 * commands do not operate on strings, and
387 * so do not need or have character counts.
388 */
389
390 if (prev_verb[1] == 1) {
391 /* Only one char in string:
392 * delete old command.
393 */
394 this_verb = prev_verb;
395 } else {
396 /* Several chars in string:
397 * strip off the last.
398 * New verb is derived from old.
399 */
400 prev_verb[1] -= 1;
401 this_verb -= 1;
402 *this_verb = *prev_verb;
403 }
404 comp_ptr = this_verb + 1;
405 *comp_ptr++ = lastc;
406 } else {
407 /* This command is just the previous one,
408 * whose verb we will modify.
409 */
410 this_verb = prev_verb;
411 }
412 *this_verb |= STAR;
413 break;
414 default:
415 defchar:
416 if (prev_verb == NULL
417 || !(*prev_verb == NORMC || *prev_verb == CINDC))
418 {
419 /* create new string command */
420 *comp_ptr++ = (CaseIgnore) ? CINDC : NORMC;
421 *comp_ptr++ = 0;
422 } else {
423 /* merge this into previous string command */
424 this_verb = prev_verb;
425 }
426 this_verb[1] += 1;
427 *comp_ptr++ = c;
428 break;
429 }
430 }
431 outahere:
432
433 /* End of pattern, let's do some error checking. */
434 if (kind != IN_CB) {
435 *comp_ptr++ = CLOSEP;
436 *comp_ptr++ = *--parenp;
437 }
438 if (parenp != parens)
439 complain("Unmatched \\(.");
440 if (kind == IN_CB && c == EOF) /* end of pattern with missing \}. */
441 complain("Missing \\}.");
442 *comp_ptr++ = EOP;
443
444 /* We demand that each alternate has the same number
445 * of parens because we currently have no mechanism to
446 * set the matching strings to a meaningful default.
447 */
448 if (outer_max_paren != -1 && outer_max_paren != re_blk->r_nparens)
449 complain("[each alternate must have the same number of \\( \\)]");
450
451 return done_cb;
452 }
453
454 private char *pstrtlst[NPAR], /* index into re_blk->r_lbuf */
455 *pendlst[NPAR],
456 *REbolp, /* begining-of-line pointer */
457 *locrater, /* roof of last substitution */
458 *loc1, /* start of matched text */
459 *loc2; /* roof of matched text */
460
461 int REbom, /* beginning and end columns of match */
462 REeom,
463 REdelta; /* increase in line length due to last re_dosub */
464
465 private bool
backref(n,linep)466 backref(n, linep)
467 int n;
468 register char *linep;
469 {
470 register char *backsp,
471 *backep;
472
473 backsp = pstrtlst[n];
474 backep = pendlst[n];
475 while (*backsp++ == *linep++)
476 if (backsp >= backep)
477 return YES;
478 return NO;
479 }
480
481 private bool
member(comp_ptr,c,af)482 member(comp_ptr, c, af)
483 register char *comp_ptr;
484 register ZXchar c;
485 bool af;
486 {
487 return c != '\0' && ((comp_ptr[SETBYTE(c)] & SETBIT(c))? af : !af);
488 }
489
490 private bool
REmatch(linep,comp_ptr)491 REmatch(linep, comp_ptr)
492 register char *linep,
493 *comp_ptr;
494 {
495 char *first_p;
496 register int n;
497
498 for (;;) switch (*comp_ptr++) {
499 case NORMC:
500 n = *comp_ptr++;
501 while (--n >= 0)
502 if (*linep++ != *comp_ptr++)
503 return NO;
504 continue;
505
506 case CINDC: /* case independent comparison */
507 n = *comp_ptr++;
508 while (--n >= 0)
509 if (!cind_eq(*linep++, *comp_ptr++))
510 return NO;
511 continue;
512
513 case EOP:
514 loc2 = linep;
515 REeom = (loc2 - REbolp);
516 return YES; /* Success! */
517
518 case AT_BOL:
519 if (linep == REbolp && linep != locrater)
520 continue;
521 return NO;
522
523 case AT_EOL:
524 if (*linep == '\0')
525 continue;
526 return NO;
527
528 case ANYC:
529 if (*linep++ != '\0')
530 continue;
531 return NO;
532
533 case AT_BOW:
534 if (linep != locrater && jisident(*linep)
535 && (linep == REbolp || !jisident(linep[-1])))
536 continue;
537 return NO;
538
539 case AT_EOW:
540 if (linep != locrater && (*linep == '\0' || !jisident(*linep))
541 && (linep != REbolp && jisident(linep[-1])))
542 continue;
543 return NO;
544
545 case ONE_OF:
546 case NONE_OF:
547 if (member(comp_ptr, ZXC(*linep++), comp_ptr[-1] == ONE_OF)) {
548 comp_ptr += SETSIZE;
549 continue;
550 }
551 return NO;
552
553 case OPENP:
554 pstrtlst[*comp_ptr++] = linep;
555 continue;
556
557 case CLOSEP:
558 pendlst[*comp_ptr++] = linep;
559 continue;
560
561 case BACKREF:
562 if (pstrtlst[n = *comp_ptr++] == NULL) {
563 s_mess("\\%d was not specified.", n + 1);
564 } else if (backref(n, linep)) {
565 linep += pendlst[n] - pstrtlst[n];
566 continue;
567 }
568 return NO;
569
570 case CURLYB:
571 {
572 int altcnt = *comp_ptr++;
573 bool any = NO;
574
575 while (--altcnt >= 0) {
576 if (!any)
577 any = REmatch(linep, comp_ptr + ALT_LEN_LEN);
578 comp_ptr += ALT_LEN(comp_ptr);
579 }
580 if (!any)
581 return NO;
582 linep = loc2;
583 continue;
584 }
585
586 case ANYC | STAR:
587 first_p = linep;
588 do ; while (*linep++ != '\0');
589 goto star;
590
591 case NORMC | STAR:
592 first_p = linep;
593 do ; while (*comp_ptr == *linep++);
594 comp_ptr += 1;
595 goto star;
596
597 case CINDC | STAR:
598 first_p = linep;
599 do ; while (cind_eq(*comp_ptr, *linep++));
600 comp_ptr += 1;
601 goto star;
602
603 case ONE_OF | STAR:
604 case NONE_OF | STAR:
605 first_p = linep;
606 do ; while (member(comp_ptr, ZXC(*linep++), comp_ptr[-1] == (ONE_OF | STAR)));
607 comp_ptr += SETSIZE;
608 /* fall through */
609 star:
610 /* linep points *after* first unmatched char.
611 * first_p points at where starred element started matching.
612 */
613 while (--linep > first_p) {
614 if ((*comp_ptr != NORMC || *linep == comp_ptr[2])
615 && REmatch(linep, comp_ptr))
616 return YES;
617 }
618 continue;
619
620 case BACKREF | STAR:
621 first_p = linep;
622 n = *comp_ptr++;
623 while (backref(n, linep))
624 linep += pendlst[n] - pstrtlst[n];
625 while (linep > first_p) {
626 if (REmatch(linep, comp_ptr))
627 return YES;
628 linep -= pendlst[n] - pstrtlst[n];
629 }
630 continue;
631
632 default:
633 complain("RE error match (%d).", comp_ptr[-1]);
634 }
635 /* NOTREACHED */
636 }
637
638 private void
REreset()639 REreset()
640 {
641 register int i;
642
643 for (i = 0; i < NPAR; i++)
644 pstrtlst[i] = pendlst[i] = NULL;
645 }
646
647 /* Index LINE at OFFSET. If lbuf_okay is YES it's okay to use linebuf
648 if LINE is the current line. This should save lots of time in things
649 like paren matching in LISP mode. Saves all that copying from linebuf
650 to a local buffer. substitute() is the guy who calls re_lindex with
651 lbuf_okay as NO, since the substitution gets placed in linebuf ...
652 doesn't work too well when the source and destination strings are the
653 same. I hate all these arguments!
654
655 This code is cumbersome, repetetive for reasons of efficiency. Fast
656 search is a must as far as I am concerned. */
657
658 bool
re_lindex(line,offset,dir,re_blk,lbuf_okay,crater)659 re_lindex(line, offset, dir, re_blk, lbuf_okay, crater)
660 LinePtr line;
661 int offset;
662 int dir;
663 struct RE_block *re_blk;
664 bool lbuf_okay;
665 int crater; /* offset of previous substitute (or -1) */
666 {
667 register char *p;
668 register ZXchar firstc = re_blk->r_firstc;
669 register bool anchored = re_blk->r_anchored;
670 char **alts = re_blk->r_alternates;
671
672 REreset();
673 if (lbuf_okay) {
674 REbolp = lbptr(line);
675 if (offset == -1)
676 offset = strlen(REbolp); /* arg! */
677 } else {
678 REbolp = ltobuf(line, re_blk->r_lbuf);
679 if (offset == -1) { /* Reverse search, find end of line. */
680 offset = Jr_Len; /* Just Read Len. */
681 }
682 }
683
684 if (anchored) {
685 if (dir == FORWARD) {
686 if (offset != 0 || crater != -1)
687 return NO;
688 } else {
689 offset = 0;
690 }
691 }
692
693 p = REbolp + offset;
694 locrater = REbolp + crater;
695
696 if (firstc != EOF) {
697 char *first_alt = *alts;
698
699 if (dir == FORWARD) {
700 while (CharUpcase(*p) != firstc || !REmatch(p, first_alt))
701 if (*p++ == '\0')
702 return NO;
703 } else {
704 while (CharUpcase(*p) != firstc || !REmatch(p, first_alt))
705 if (--p < REbolp)
706 return NO;
707 }
708 } else {
709 for (;;) {
710 register char **altp = alts;
711
712 while (*altp != NULL)
713 if (REmatch(p, *altp++))
714 goto success;
715 if (anchored
716 || (dir == FORWARD ? *p++ == '\0' : --p < REbolp))
717 return NO;
718 }
719 success:;
720 }
721 loc1 = p;
722 REbom = loc1 - REbolp;
723
724 return YES;
725 }
726
727 bool okay_wrap = NO; /* Do a wrap search ... not when we're
728 parsing errors ... */
729
730 Bufpos *
dosearch(pattern,dir,re)731 dosearch(pattern, dir, re)
732 char *pattern;
733 int dir;
734 bool re;
735 {
736 Bufpos *pos;
737 struct RE_block re_blk; /* global re-compiled buffer */
738
739 if (bobp() && eobp()) /* Can't match! There's no buffer. */
740 return NULL;
741
742 REcompile(pattern, re, &re_blk);
743
744 pos = docompiled(dir, &re_blk);
745 return pos;
746 }
747
748 Bufpos *
docompiled(dir,re_blk)749 docompiled(dir, re_blk)
750 int dir;
751 register struct RE_block *re_blk;
752 {
753 static Bufpos ret;
754 register LinePtr lp;
755 register int offset;
756 bool we_wrapped = NO;
757
758 lsave();
759 /* Search now lsave()'s so it doesn't make any assumptions on
760 whether the the contents of curline/curchar are in linebuf.
761 Nowhere does search write all over linebuf. However, we have to
762 be careful about what calls we make here, because many of them
763 assume (and rightly so) that curline is in linebuf. */
764
765 lp = curline;
766 offset = curchar;
767 if (dir == BACKWARD) {
768 if (bobp()) {
769 if (okay_wrap && WrapScan)
770 goto doit;
771 return NULL;
772 }
773 /* here we simulate BackChar() */
774 if (bolp()) {
775 lp = lp->l_prev;
776 offset = length(lp);
777 } else {
778 offset -= 1;
779 }
780 } else if (dir==FORWARD && lbptr(lp)[offset]=='\0' && !lastp(lp)) {
781 lp = lp->l_next;
782 offset = 0;
783 }
784
785 do {
786 if (re_lindex(lp, offset, dir, re_blk, YES, -1))
787 break;
788 doit:
789 lp = (dir == FORWARD) ? lp->l_next : lp->l_prev;
790 if (lp == NULL) {
791 if (okay_wrap && WrapScan) {
792 lp = (dir == FORWARD) ?
793 curbuf->b_first : curbuf->b_last;
794 we_wrapped = YES;
795 } else
796 break;
797 }
798 if (dir == FORWARD)
799 offset = 0;
800 else
801 offset = -1; /* signals re_lindex ... */
802 } while (lp != curline);
803
804 if (lp == curline && we_wrapped)
805 lp = NULL;
806 if (lp == NULL)
807 return NULL;
808 ret.p_line = lp;
809 ret.p_char = (dir == FORWARD) ? REeom : REbom;
810 return &ret;
811 }
812
813 private char *
insert(off,endp,which)814 insert(off, endp, which)
815 char *off,
816 *endp;
817 int which;
818 {
819 register char *pp;
820 register int n;
821
822 n = pendlst[which] - pstrtlst[which];
823 pp = pstrtlst[which];
824 while (--n >= 0) {
825 *off++ = *pp++;
826 if (off >= endp)
827 len_error(JMP_ERROR);
828 }
829 return off;
830 }
831
832 /* Perform the substitution. If DELP is YES the matched string is
833 deleted, i.e., the substitution string is not inserted. */
834
835 void
re_dosub(re_blk,tobuf,delp)836 re_dosub(re_blk, tobuf, delp)
837 struct RE_block *re_blk;
838 char *tobuf;
839 bool delp;
840 {
841 register char *tp,
842 *rp;
843 char *endp;
844
845 tp = tobuf;
846 endp = tp + LBSIZE;
847 rp = re_blk->r_lbuf;
848
849 while (rp < loc1)
850 *tp++ = *rp++;
851
852 if (!delp) {
853 register char c;
854
855 rp = rep_str;
856 while ((c = *rp++) != '\0') {
857 if (c == '\\') {
858 c = *rp++;
859 if (c >= '0' && c < re_blk->r_nparens + '0') {
860 tp = insert(tp, endp, c - '0');
861 continue;
862 }
863 if (c == '\0') {
864 /* treat \ at the end as if it were \\ */
865 c = '\\';
866 rp--; /* be sure to hit again */
867 }
868 }
869 *tp++ = c;
870 if (tp >= endp)
871 len_error(JMP_ERROR);
872 }
873 }
874 rp = loc2;
875 REdelta = -REeom;
876 REeom = tp - tobuf;
877 REdelta += REeom;
878 if (loc1==rp && *rp!='\0') {
879 /* Skip an extra character if the matched text was a null
880 * string, but don't skip over the end of line. This is to
881 * prevent an infinite number of replacements in the same
882 * position, e.g., replace "^" with "".
883 */
884 REeom += 1;
885 }
886 loc2 = re_blk->r_lbuf + REeom;
887 while ((*tp++ = *rp++) != '\0')
888 if (tp >= endp)
889 len_error(JMP_ERROR);
890 }
891
892 void
putmatch(which,buf,size)893 putmatch(which, buf, size)
894 int which;
895 char *buf;
896 size_t size;
897 {
898 *(insert(buf, buf + size, which)) = '\0';
899 }
900
901 void
RErecur()902 RErecur()
903 {
904 char repbuf[sizeof rep_str];
905 Mark *m = MakeMark(curline, REbom);
906
907 message("Type ^X ^C to continue with query replace.");
908
909 byte_copy(rep_str, repbuf, sizeof rep_str);
910 Recur();
911 byte_copy(repbuf, rep_str, sizeof rep_str);
912 if (!is_an_arg())
913 ToMark(m);
914 DelMark(m);
915 }
916
917 /* Do we match PATTERN at OFFSET in BUF? */
918
919 bool
LookingAt(pattern,buf,offset)920 LookingAt(pattern, buf, offset)
921 char *pattern,
922 *buf;
923 int offset;
924 {
925 struct RE_block re_blk;
926 char **alt = re_blk.r_alternates;
927
928 REcompile(pattern, YES, &re_blk);
929 REreset();
930 locrater = NULL;
931 REbolp = buf;
932
933 while (*alt)
934 if (REmatch(buf + offset, *alt++))
935 return YES;
936 return NO;
937 }
938
939 bool
look_at(expr)940 look_at(expr)
941 char *expr;
942 {
943 struct RE_block re_blk;
944
945 REcompile(expr, NO, &re_blk);
946 REreset();
947 locrater = NULL;
948 REbolp = linebuf;
949 if (REmatch(linebuf + curchar, re_blk.r_alternates[0]))
950 return YES;
951 return NO;
952 }
953