1 /*
2 * compmatch.c - the complete module, completion matching code
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1999 Sven Wischnowsky
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Sven Wischnowsky or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Sven Wischnowsky and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Sven Wischnowsky and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose. The software
24 * provided hereunder is on an "as is" basis, and Sven Wischnowsky and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 */
29
30 #include "complete.mdh"
31 #include "compmatch.pro"
32
33 /*
34 * This compares two cpattern lists and returns non-zero if they are
35 * equal (N.B. opposite sense to usual *cmp()).
36 *
37 * The old version of this didn't worry about whether the lists
38 * were the same length. This one does. It's hard to see how
39 * that can be wrong even if it's unnecessary.
40 */
41
42 /**/
43 static int
cpatterns_same(Cpattern a,Cpattern b)44 cpatterns_same(Cpattern a, Cpattern b)
45 {
46 while (a) {
47 if (!b)
48 return 0;
49 if (a->tp != b->tp)
50 return 0;
51 switch (a->tp) {
52 case CPAT_CCLASS:
53 case CPAT_NCLASS:
54 case CPAT_EQUIV:
55 /*
56 * Patterns can actually match the same even if
57 * the range strings don't compare differently, but
58 * I don't think we need to handle that subtlety.
59 */
60 if (strcmp(a->u.str, b->u.str) != 0)
61 return 0;
62 break;
63
64 case CPAT_CHAR:
65 if (a->u.chr != b->u.chr)
66 return 0;
67 break;
68
69 default:
70 /* here to silence compiler */
71 break;
72 }
73
74 a = a->next;
75 b = b->next;
76 }
77 return !b;
78 }
79
80 /* This compares two cmatchers and returns non-zero if they are equal. */
81
82 /**/
83 static int
cmatchers_same(Cmatcher a,Cmatcher b)84 cmatchers_same(Cmatcher a, Cmatcher b)
85 {
86 return (a == b ||
87 (a->flags == b->flags &&
88 a->llen == b->llen && a->wlen == b->wlen &&
89 (!a->llen || cpatterns_same(a->line, b->line)) &&
90 (a->wlen <= 0 || cpatterns_same(a->word, b->word)) &&
91 (!(a->flags & (CMF_LEFT | CMF_RIGHT)) ||
92 (a->lalen == b->lalen && a->ralen == b->ralen &&
93 (!a->lalen || cpatterns_same(a->left, b->left)) &&
94 (!a->ralen || cpatterns_same(a->right, b->right))))));
95 }
96
97 /* Add the given matchers to the bmatcher list. */
98
99 /**/
100 mod_export void
add_bmatchers(Cmatcher m)101 add_bmatchers(Cmatcher m)
102 {
103 Cmlist old = bmatchers, *q = &bmatchers, n;
104
105 for (; m; m = m->next) {
106 if ((!m->flags && m->wlen > 0 && m->llen > 0) ||
107 (m->flags == CMF_RIGHT && m->wlen < 0 && !m->llen)) {
108 *q = n = (Cmlist) zhalloc(sizeof(struct cmlist));
109 n->matcher = m;
110 q = &(n->next);
111 }
112 }
113 *q = old;
114 }
115
116 /* This is called when the matchers in the mstack have changed to
117 * ensure that the bmatchers list contains no matchers not in mstack. */
118
119 /**/
120 mod_export void
update_bmatchers(void)121 update_bmatchers(void)
122 {
123 Cmlist p = bmatchers, ms;
124 Cmatcher mp;
125 int t;
126
127 while (p) {
128 t = 0;
129 for (ms = mstack; ms && !t; ms = ms->next)
130 for (mp = ms->matcher; mp && !t; mp = mp->next)
131 t = cmatchers_same(mp, p->matcher);
132
133 p = p->next;
134 if (!t) {
135 bmatchers = p;
136 }
137 }
138 }
139
140 /* This returns a new Cline structure. */
141
142 /**/
143 Cline
get_cline(char * l,int ll,char * w,int wl,char * o,int ol,int fl)144 get_cline(char *l, int ll, char *w, int wl, char *o, int ol, int fl)
145 {
146 Cline r;
147
148 /* Prefer to take it from the buffer list (freecl), if there
149 * is none, allocate a new one. */
150
151 if ((r = freecl))
152 freecl = r->next;
153 else
154 r = (Cline) zhalloc(sizeof(*r));
155
156 r->next = NULL;
157 r->line = l; r->llen = ll;
158 r->word = w; r->wlen = wl;
159 DPUTS(wl > 0 && !*w, "Bad word");
160 r->orig = o; r->olen = ol;
161 r->slen = 0;
162 r->flags = fl;
163 r->prefix = r->suffix = NULL;
164 r->min = r->max = 0;
165 return r;
166 }
167
168 /* This frees a cline list. */
169
170 /**/
171 void
free_cline(Cline l)172 free_cline(Cline l)
173 {
174 Cline n;
175
176 while (l) {
177 n = l->next;
178 l->next = freecl;
179 freecl = l;
180 free_cline(l->prefix);
181 free_cline(l->suffix);
182 l = n;
183 }
184 }
185
186 /* Copy a cline list. */
187
188 /**/
189 Cline
cp_cline(Cline l,int deep)190 cp_cline(Cline l, int deep)
191 {
192 Cline r = NULL, *p = &r, t, lp = NULL;
193
194 while (l) {
195 if ((t = freecl))
196 freecl = t->next;
197 else
198 t = (Cline) zhalloc(sizeof(*t));
199 memcpy(t, l, sizeof(*t));
200 if (deep) {
201 if (t->prefix)
202 t->prefix = cp_cline(t->prefix, 0);
203 if (t->suffix)
204 t->suffix = cp_cline(t->suffix, 0);
205 }
206 *p = lp = t;
207 p = &(t->next);
208 l = l->next;
209 }
210 *p = NULL;
211
212 return r;
213 }
214
215 /* Calculate the length of a cline and its sub-lists. */
216
217 /**/
218 int
cline_sublen(Cline l)219 cline_sublen(Cline l)
220 {
221 int len = ((l->flags & CLF_LINE) ? l->llen : l->wlen);
222
223 if (l->olen && !((l->flags & CLF_SUF) ? l->suffix : l->prefix))
224 len += l->olen;
225 else {
226 Cline p;
227
228 for (p = l->prefix; p; p = p->next)
229 len += ((p->flags & CLF_LINE) ? p->llen : p->wlen);
230 for (p = l->suffix; p; p = p->next)
231 len += ((p->flags & CLF_LINE) ? p->llen : p->wlen);
232 }
233 return len;
234 }
235
236 /* Set the lengths in the cline lists. */
237
238 /**/
239 void
cline_setlens(Cline l,int both)240 cline_setlens(Cline l, int both)
241 {
242 while (l) {
243 l->min = cline_sublen(l);
244 if (both)
245 l->max = l->min;
246 l = l->next;
247 }
248 }
249
250 /* This sets the CLF_MATCHED flag in the given clines. */
251
252 /**/
253 void
cline_matched(Cline p)254 cline_matched(Cline p)
255 {
256 while (p) {
257 p->flags |= CLF_MATCHED;
258 cline_matched(p->prefix);
259 cline_matched(p->suffix);
260
261 p = p->next;
262 }
263 }
264
265 /* This reverts the order of the elements of the given cline list and
266 * returns a pointer to the new head. */
267
268 /**/
269 Cline
revert_cline(Cline p)270 revert_cline(Cline p)
271 {
272 Cline r = NULL, n;
273
274 while (p) {
275 n = p->next;
276 p->next = r;
277 r = p;
278 p = n;
279 }
280 return r;
281 }
282
283 /* Global variables used during matching: a char-buffer for the string to
284 * use for the match, and two cline lists for the two levels we use. */
285
286 /**/
287 char *matchbuf = NULL;
288 /**/
289 int matchbuflen = 0, matchbufadded;
290
291 /**/
292 Cline matchparts, matchlastpart;
293 /**/
294 Cline matchsubs, matchlastsub;
295
296 /* This initialises the variables above. */
297
298 /**/
299 static void
start_match(void)300 start_match(void)
301 {
302 if (matchbuf)
303 *matchbuf = '\0';
304 matchbufadded = 0;
305 matchparts = matchlastpart = matchsubs = matchlastsub = NULL;
306 }
307
308 /* This aborts a matching, freeing the cline lists build. */
309
310 /**/
311 static void
abort_match(void)312 abort_match(void)
313 {
314 free_cline(matchparts);
315 free_cline(matchsubs);
316 matchparts = matchsubs = NULL;
317 }
318
319 /* This adds a new string in the static char buffer. The arguments are
320 * the matcher used (if any), the strings from the line and the word
321 * and the length of the string from the word. The last argument is
322 * non-zero if we are matching a suffix (where the given string has to
323 * be prepended to the contents of the buffer). */
324
325 /**/
326 static void
add_match_str(Cmatcher m,char * l,char * w,int wl,int sfx)327 add_match_str(Cmatcher m, char *l, char *w, int wl, int sfx)
328 {
329 /* Get the string and length to insert: either from the line
330 * or from the match. */
331 if (m && (m->flags & CMF_LINE)) {
332 wl = m->llen; w = l;
333 }
334 if (wl) {
335 /* Probably resize the buffer. */
336 if (matchbuflen - matchbufadded <= wl) {
337 int blen = matchbuflen + wl + 20;
338 char *buf;
339
340 buf = (char *) zalloc(blen);
341 if (matchbuf) {
342 memcpy(buf, matchbuf, matchbuflen);
343 zfree(matchbuf, matchbuflen);
344 }
345 #ifdef DEBUG
346 else {
347 DPUTS(matchbuflen, "matchbuflen with no matchbuf");
348 }
349 #endif
350 matchbuf = buf;
351 matchbuflen = blen;
352 }
353 /* Insert the string. */
354 if (sfx) {
355 memmove(matchbuf + wl, matchbuf, matchbufadded + 1);
356 memcpy(matchbuf, w, wl);
357 } else
358 memcpy(matchbuf + matchbufadded, w, wl);
359 matchbufadded += wl;
360 matchbuf[matchbufadded] = '\0';
361 }
362 }
363
364 /* This adds a cline for a word-part during matching. Arguments are the
365 * matcher used, pointers to the line and word strings for the anchor,
366 * a pointer to the original line string for the whole part, the string
367 * before (or after) the anchor that has not yet been added, the length
368 * of the line-string for that, and a flag saying if we are matching a
369 * suffix. */
370
371 /**/
372 static void
add_match_part(Cmatcher m,char * l,char * w,int wl,char * o,int ol,char * s,int sl,int osl,int sfx)373 add_match_part(Cmatcher m, char *l, char *w, int wl,
374 char *o, int ol, char *s, int sl, int osl, int sfx)
375 {
376 Cline p, lp, lprem;
377
378 /* If the anchors are equal, we keep only one. */
379
380 if (l && !strncmp(l, w, wl))
381 l = NULL;
382
383 /*
384 * Split the new part into parts and turn the last one into a
385 * `suffix' if we have a left anchor---don't do this if the last one
386 * came from a right anchor before the end of the part we're
387 * splitting.
388 */
389
390 p = bld_parts(s, sl, osl, &lp, &lprem);
391
392 if (lprem && m && (m->flags & CMF_LEFT)) {
393 lprem->flags |= CLF_SUF;
394 lprem->suffix = lprem->prefix;
395 lprem->prefix = NULL;
396 }
397 /* cline lists for suffixes are sorted from back to front, so we have
398 * to revert the list we got. */
399 if (sfx)
400 p = revert_cline(lp = p);
401 /* Now add the sub-clines we already had. */
402 if (matchsubs) {
403 if (sfx) {
404 Cline q;
405
406 if ((q = lp->prefix)) {
407 while (q->next)
408 q = q->next;
409 q->next = matchsubs;
410 } else
411 lp->prefix = matchsubs;
412
413 matchlastsub->next = NULL;
414 } else {
415 matchlastsub->next = p->prefix;
416 p->prefix = matchsubs;
417 }
418 matchsubs = matchlastsub = NULL;
419 }
420 /* Store the arguments in the last part-cline. */
421 if (lp->llen || lp->wlen) {
422 lp->next = get_cline(l, wl, w, wl, o, ol, CLF_NEW);
423 lp = lp->next;
424 } else {
425 lp->line = l; lp->llen = wl;
426 lp->word = w; lp->wlen = wl;
427 DPUTS(wl > 0 && !*w, "Bad word");
428 lp->orig = o; lp->olen = ol;
429 }
430 if (o || ol)
431 lp->flags &= ~CLF_NEW;
432
433 /* Finally, put the new parts on the list. */
434 if (matchlastpart)
435 matchlastpart->next = p;
436 else
437 matchparts = p;
438 matchlastpart = lp;
439 }
440
441 /* This adds a new sub-cline. Arguments are the matcher and the strings from
442 * the line and the word. */
443
444 /**/
445 static void
add_match_sub(Cmatcher m,char * l,int ll,char * w,int wl)446 add_match_sub(Cmatcher m, char *l, int ll, char *w, int wl)
447 {
448 int flags;
449 Cline n;
450
451 /* Check if we are interested only in the string from the line. */
452 if (m && (m->flags & CMF_LINE)) {
453 w = NULL; wl = 0;
454 flags = CLF_LINE;
455 } else
456 flags = 0;
457
458 /* And add the cline. */
459 if (wl || ll) {
460 Cline p, lp;
461
462 if ((p = n = bld_parts(w, wl, ll, &lp, NULL)) && n != lp) {
463 for (; p->next != lp; p = p->next);
464
465 if (matchsubs) {
466 matchlastsub->next = n->prefix;
467 n->prefix = matchsubs;
468 }
469 matchsubs = matchlastsub = lp;
470
471 if (matchlastpart)
472 matchlastpart->next = n;
473 else
474 matchparts = n;
475 p->next = 0;
476 matchlastpart = p;
477 } else {
478 n = get_cline(l, ll, w, wl, NULL, 0,
479 flags | ((m && m->wlen == -2) ? CLF_SKIP : 0));
480 if (matchlastsub)
481 matchlastsub->next = n;
482 else
483 matchsubs = n;
484 matchlastsub = n;
485 }
486 }
487 }
488
489 /* This tests if the string from the line l matches the word w. In *bpp
490 * the offset for the brace is returned, in rwlp the length of the
491 * matched prefix or suffix, not including the stuff before or after
492 * the last anchor is given. When sfx is non-zero matching is done from
493 * the ends of the strings backward, if test is zero, the global variables
494 * above are used to build the string for the match and the cline. If
495 * part is non-zero, we are satisfied if only a part of the line-string
496 * is used (and return the length used). */
497
498 /**/
499 int
match_str(char * l,char * w,Brinfo * bpp,int bc,int * rwlp,const int sfx,int test,int part)500 match_str(char *l, char *w, Brinfo *bpp, int bc, int *rwlp,
501 const int sfx, int test, int part)
502 {
503 /* How many characters from the line string and from the word string are
504 * yet to be matched. */
505 int ll = strlen(l), lw = strlen(w);
506 /* Number of characters from the line string and word string matched. */
507 int il = 0, iw = 0;
508 /* How many characters were matched exactly in the line and in the word. */
509 int exact = 0, wexact = 0;
510 int he = 0;
511 int bslash;
512 char *ow;
513 Cmlist ms; /* loop variable */
514 Cmatcher mp, lm = NULL;
515 Brinfo bp = NULL;
516 const int obc = bc;
517 const int ind = (sfx ? -1 : 0);
518 const int add = (sfx ? -1 : 1);
519 const int original_ll = ll, original_lw = lw;
520
521 /* INVARIANT: il+ll == original_ll; iw+lw == original_lw */
522
523 if (!test) {
524 start_match();
525 bp = *bpp;
526 }
527 /* Adjust the pointers and get the values for subscripting and
528 * incrementing. */
529
530 if (sfx) {
531 l += ll; w += lw;
532 }
533 /* ow will always point to the beginning (or end) of that sub-string
534 * in w that wasn't put in the match-variables yet. */
535
536 ow = w;
537
538 /* If the brace is at the beginning, we have to treat it now. */
539
540 if (!test && bp && bc >= bp->pos) {
541 bp->curpos = bc;
542 bp = bp->next;
543 }
544 /*** This once was: `while (ll && lw)', but then ignored characters at
545 * the end were not, well, ignored. */
546 while (ll) {
547
548 /* Hm, we unconditionally first tried the matchers for the cases
549 * where the beginnings of the line and word patterns match the
550 * same character(s).
551 * But first testing if the characters are the same is sooo
552 * much faster...
553 * Maybe it's OK to make same-character matching be preferred in
554 * recursive calls. At least, it /seems/ to work.
555 *
556 * Let's try.
557 *
558 * Update: this once tested `test && ...' to check for exact
559 * character matches only in recursive calls. But then one
560 * can't complete `nom<TAB>' to `nomatch' with a match spec
561 * of `B:[nN][oO]=' because that will eat the `no'.
562 * But that would break completion of strings like `nonomatch'
563 * because the `B:[nN][oO]=' doesn't match the second `no'.
564 * For this we added the code below that can remove already
565 * accepted exact characters and try again, preferring match
566 * specs.
567 */
568
569 bslash = 0;
570 if (!sfx && lw && (!part || test) &&
571 (l[ind] == w[ind] ||
572 (bslash = (lw > 1 && w[ind] == '\\' && w[ind+1] == l[0])))) {
573 /* No matcher could be used, but the strings have the same
574 * character here, skip over it. */
575 l += add; w += (bslash ? (add + add) : add);
576 il++; iw += 1 + bslash;
577 ll--; lw -= 1 + bslash;
578 bc++;
579 exact++;
580 wexact += 1 + bslash;
581 if (!test)
582 while (bp && bc >= (useqbr ? bp->qpos : bp->pos)) {
583 bp->curpos = matchbufadded + (w - ow) + obc;
584 bp = bp->next;
585 }
586 lm = NULL;
587 he = 0;
588
589 continue;
590 }
591 retry:
592 /* First try the matchers. Err... see above. */
593 for (mp = NULL, ms = mstack; !mp && ms; ms = ms->next) {
594 for (mp = ms->matcher; mp; mp = mp->next) {
595 if ((lm && lm == mp) ||
596 ((original_ll == ll || original_lw == lw) &&
597 (test == 1 || (test && !mp->left && !mp->right)) &&
598 mp->wlen < 0))
599 /* If we were called recursively, don't use `*' patterns
600 * at the beginning (avoiding infinite recursion). */
601 continue;
602
603 if (mp->wlen < 0) {
604 /* `*'-pattern. */
605 /*
606 * Similar to the identically-named variable in the 'else'
607 * block.
608 */
609 int t;
610 /*
611 * 1 iff the anchor and the word are on the same side of
612 * the line pattern; that is: if either
613 * - the anchor is on the left and we are matching
614 * a prefix; or
615 * - the anchor is on the right and we are matching
616 * a suffix.
617 */
618 int both;
619 /*
620 * Offset from the line pattern pointer ('l') to the start
621 * of the line pattern.
622 */
623 int loff;
624 /*
625 * Offset from the line pattern pointer ('l') to the start
626 * of the anchor.
627 */
628 int aoff;
629 /*
630 * The length of the line pattern.
631 */
632 int llen;
633 /*
634 * The length of the anchor.
635 *
636 * SEE: ap; aol, aop
637 */
638 int alen;
639 /*
640 * ### Related to 'zoff', which was removed in 2016.
641 */
642 int moff;
643 /*
644 * ### These two are related.
645 *
646 * ### They may have a relation similar to that of lw/iw
647 * ### (q.v.), at least during the 'for' loop. They may be
648 * ### overloaded/repurposed after it.
649 */
650 int ct, ict;
651 /*
652 * The length of the OTHER anchor: the left anchor when
653 * we're anchored on the right, and of the right anchor
654 * when we're anchored on the left.
655 */
656 int aol;
657 /*
658 * LOST: Documentation comment. Last seen 10 years ago in
659 * the temporal lobe. Reward promised for its safe return.
660 * Contact zsh-workers@zsh.org.
661 */
662 char *tp;
663 /*
664 * Temporary variable. Used as temporary storage for a
665 *
666 * {
667 * () {
668 * local foo="$foo"
669 * foo[1]=bar
670 * ...
671 * }
672 * (use original $foo here)
673 * }
674 *
675 * operation. Similar to savw.
676 */
677 char savl = 0;
678 /*
679 * The anchor on this end.
680 */
681 Cpattern ap;
682 /*
683 * The anchor on the other end.
684 */
685 Cpattern aop;
686
687 /* This is for `*' patterns, first initialise some
688 * local variables. */
689 llen = mp->llen;
690 if (mp->flags & CMF_LEFT) {
691 alen = mp->lalen; aol = mp->ralen;
692 } else {
693 alen = mp->ralen; aol = mp->lalen;
694 }
695 /* Give up if we don't have enough characters for the
696 * line-string and the anchor. */
697 if (ll < llen + alen || lw < alen)
698 continue;
699
700 if (mp->flags & CMF_LEFT) {
701 ap = mp->left; moff = alen; aop = mp->right;
702 if (sfx) {
703 both = 0; loff = -llen; aoff = -(llen + alen);
704 } else {
705 both = 1; loff = alen; aoff = 0;
706 }
707 } else {
708 ap = mp->right; moff = 0; aop = mp->left;
709 if (sfx) {
710 both = 1; loff = -(llen + alen); aoff = -alen;
711 } else {
712 both = 0; loff = 0; aoff = llen;
713 }
714 }
715 /* Try to match the line pattern and the anchor. */
716 if (!pattern_match(mp->line, l + loff, NULL, NULL))
717 continue;
718 if (ap) {
719 if (!pattern_match(ap, l + aoff, NULL, NULL) ||
720 (both &&
721 (!pattern_match(ap, w + aoff, NULL, NULL) ||
722 (aol && aol <= aoff + iw &&
723 !pattern_match(aop, w + aoff - aol,
724 NULL, NULL)) ||
725 !match_parts(l + aoff, w + aoff, alen, part))))
726 continue;
727 } else if (!both || ((mp->flags & CMF_INTER) ?
728 ((mp->flags & CMF_LINE) ? iw : il) :
729 (il || iw)))
730 continue;
731
732 /* Fine, now we call ourselves recursively to find the
733 * string matched by the `*'. */
734 if (sfx && (savl = l[-(llen + alen)]))
735 l[-(llen + alen)] = '\0';
736 for (t = 0, tp = w, ct = 0, ict = lw - alen + 1;
737 ict;
738 tp += add, ct++, ict--) {
739 if ((both &&
740 (!ap || !test ||
741 !pattern_match(ap, tp + aoff, NULL, NULL) ||
742 (aol && aol <= aoff + ct + iw &&
743 !pattern_match(aop, tp + aoff - aol,
744 NULL, NULL)))) ||
745 (!both &&
746 pattern_match(ap, tp - moff, NULL, NULL) &&
747 (!aol || (aol <= iw + ct - moff &&
748 pattern_match(aop, tp - moff - aol,
749 NULL, NULL))) &&
750 (mp->wlen == -1 ||
751 match_parts(l + aoff , tp - moff,
752 alen, part)))) {
753 if (!both && mp->wlen == -1 &&
754 !match_parts(l + aoff , tp - moff, alen, part))
755 break;
756 if (sfx) {
757 /* Call ourselves recursively with the
758 * anchor removed. */
759 char savw;
760 if ((savw = tp[-alen]))
761 tp[-alen] = '\0';
762 t = match_str(l - ll, w - lw,
763 NULL, 0, NULL, sfx, 2, part);
764 if (savw)
765 tp[-alen] = savw;
766 } else
767 t = match_str(l + llen + moff, tp + moff,
768 NULL, 0, NULL, sfx, 1, part);
769 if (t || (mp->wlen == -1 && !both))
770 break;
771 }
772 }
773 ict = ct;
774 if (sfx && savl)
775 l[-(llen + alen)] = savl;
776
777 /* Have we found a position in w where the rest of l
778 * matches? */
779 if (!t)
780 continue;
781
782 /* Yes, add the strings and clines if this is a
783 * top-level call. */
784 if (!test && (!he || (llen + alen))) {
785 char *op, *lp, *map, *wap, *wmp;
786 int ol;
787
788 if (sfx) {
789 op = w; ol = ow - w; lp = l - (llen + alen);
790 map = tp - alen;
791 if (mp->flags & CMF_LEFT) {
792 wap = tp - alen; wmp = tp;
793 } else {
794 wap = w - alen; wmp = tp - alen;
795 }
796 } else {
797 op = ow; ol = w - ow; lp = l;
798 map = ow;
799 if (mp->flags & CMF_LEFT) {
800 wap = w; wmp = w + alen;
801 } else {
802 wap = tp; wmp = ow;
803 }
804 }
805 /* If the matcher says that we are only interested
806 * in the line pattern, we just add that and the
807 * anchor and the string not added yet. Otherwise
808 * we add a new part. */
809 if (mp->flags & CMF_LINE) {
810 add_match_str(NULL, NULL, op, ol, sfx);
811 add_match_str(NULL, NULL, lp, llen + alen, sfx);
812 add_match_sub(NULL, NULL, ol, op, ol);
813 add_match_sub(NULL, NULL, llen + alen,
814 lp, llen + alen);
815 } else if (sfx) {
816 add_match_str(NULL, NULL,
817 map, ct + ol + alen, sfx);
818 add_match_part(mp, l + aoff, wap, alen,
819 l + loff, llen, op, ol, ol, sfx);
820 add_match_sub(NULL, NULL, 0, wmp, ct);
821 } else {
822 add_match_str(NULL, NULL,
823 map, ct + ol + alen, sfx);
824 if (both) {
825 add_match_sub(NULL, NULL, ol, op, ol);
826 ol = -1;
827 } else
828 ct += ol;
829 add_match_part(mp, l + aoff, wap, alen,
830 l + loff, llen, wmp, ct, ol, sfx);
831 }
832 }
833 /* Now skip over the matched portion and the anchor. */
834 llen += alen; alen += ict;
835 if (sfx) {
836 l -= llen; w -= alen;
837 } else {
838 l += llen; w += alen;
839 }
840 ll -= llen; il += llen;
841 lw -= alen; iw += alen;
842 bc += llen;
843 exact = 0;
844
845 if (!test) {
846 int bpc;
847 while (bp &&
848 bc >= (bpc = (useqbr ? bp->qpos : bp->pos))) {
849 bp->curpos = matchbufadded + bpc - bc + obc;
850 bp = bp->next;
851 }
852 }
853 ow = w;
854
855 if (!llen && !alen) {
856 lm = mp;
857 if (he) {
858 /* Signal the outer for loop to continue. */
859 mp = NULL;
860 }
861 else
862 he = 1;
863 } else {
864 lm = NULL; he = 0;
865 }
866 break;
867 } else if (ll >= mp->llen && lw >= mp->wlen) {
868 /* Non-`*'-pattern. */
869 /*
870 * Similar to the identically-named variable in the 'if'
871 * block.
872 */
873 int t = 1;
874 char *tl, *tw;
875 int tll, tlw, til, tiw;
876
877 /* We do this only if the line- and word-substrings
878 * are not equal. */
879 if (!(mp->flags & (CMF_LEFT | CMF_RIGHT)) &&
880 mp->llen == mp->wlen &&
881 !(sfx ? strncmp(l - mp->llen, w - mp->wlen, mp->llen) :
882 strncmp(l, w, mp->llen)))
883 continue;
884
885 /* Using local variables to make the following
886 * independent of whether we match a prefix or a
887 * suffix. */
888 if (sfx) {
889 tl = l - mp->llen; tw = w - mp->wlen;
890 til = ll - mp->llen; tiw = lw - mp->wlen;
891 tll = il + mp->llen; tlw = iw + mp->wlen;
892 } else {
893 tl = l; tw = w;
894 til = il; tiw = iw;
895 tll = ll; tlw = lw;
896 }
897 if (mp->flags & CMF_LEFT) {
898 /* Try to match the left anchor, if any. */
899 if (til < mp->lalen || tiw < mp->lalen + mp->ralen)
900 continue;
901 else if (mp->left)
902 t = pattern_match(mp->left, tl - mp->lalen,
903 NULL, NULL) &&
904 pattern_match(mp->left, tw - mp->lalen,
905 NULL, NULL) &&
906 (!mp->ralen ||
907 pattern_match(mp->right,
908 tw - mp->lalen - mp->ralen,
909 NULL, NULL));
910 else
911 t = (!sfx && !((mp->flags & CMF_INTER) ?
912 ((mp->flags & CMF_LINE) ? iw : il) :
913 (il || iw)));
914 }
915 if (mp->flags & CMF_RIGHT) {
916 /* Try to match the right anchor, if any. */
917 if (tll < mp->llen + mp->ralen ||
918 tlw < mp->wlen + mp->ralen + mp->lalen)
919 continue;
920 else if (mp->right)
921 t = pattern_match(mp->right,
922 /* tl + mp->llen - mp->ralen, */
923 tl + mp->llen,
924 NULL, NULL) &&
925 pattern_match(mp->right,
926 /* tw + mp->wlen - mp->ralen, */
927 tw + mp->wlen,
928 NULL, NULL) &&
929 (!mp->lalen ||
930 pattern_match(mp->left, tw + mp->wlen -
931 mp->ralen - mp->lalen,
932 NULL, NULL));
933 else
934 t = (sfx && !((mp->flags & CMF_INTER) ?
935 ((mp->flags & CMF_LINE) ? iw : il) :
936 (il || iw)));
937 }
938 /* Now try to match the line and word patterns. */
939 if (!t || !pattern_match(mp->line, tl, mp->word, tw))
940 continue;
941
942 /* Probably add the matched strings. */
943 if (!test) {
944 if (sfx)
945 {
946 if (ow >= w)
947 add_match_str(NULL, NULL, w, ow - w, sfx);
948 }
949 else
950 {
951 if (w >= ow)
952 add_match_str(NULL, NULL, ow, w - ow, sfx);
953 }
954 add_match_str(mp, tl, tw, mp->wlen, sfx);
955 if (sfx)
956 {
957 if (ow >= w)
958 add_match_sub(NULL, NULL, 0, w, ow - w);
959 }
960 else
961 {
962 if (w >= ow)
963 add_match_sub(NULL, NULL, 0, ow, w - ow);
964 }
965 add_match_sub(mp, tl, mp->llen, tw, mp->wlen);
966 }
967 if (sfx) {
968 l = tl; w = tw;
969 } else {
970 l += mp->llen; w += mp->wlen;
971 }
972 il += mp->llen; iw += mp->wlen;
973 ll -= mp->llen; lw -= mp->wlen;
974 bc += mp->llen;
975 exact = 0;
976
977 if (!test) {
978 int bpc;
979 while (bp &&
980 bc >= (bpc = (useqbr ? bp->qpos : bp->pos))) {
981 bp->curpos = matchbufadded + bpc - bc + obc;
982 bp = bp->next;
983 }
984 }
985 ow = w;
986 lm = NULL;
987 he = 0;
988 break;
989 }
990 }
991 }
992 if (mp)
993 continue;
994
995 /* Same code as at the beginning, used in top-level calls. */
996
997 bslash = 0;
998 if ((!test || sfx) && lw &&
999 (l[ind] == w[ind] ||
1000 (bslash = (lw > 1 && w[ind] == '\\' && w[ind+1] == l[0])))) {
1001 /* No matcher could be used, but the strings have the same
1002 * character here, skip over it. */
1003 l += add; w += (bslash ? (add + add ) : add);
1004 il++; iw += 1 + bslash;
1005 ll--; lw -= 1 + bslash;
1006 bc++;
1007 if (!test)
1008 while (bp && bc >= (useqbr ? bp->qpos : bp->pos)) {
1009 bp->curpos = matchbufadded + (sfx ? (ow - w) : (w - ow)) + obc;
1010 bp = bp->next;
1011 }
1012 lm = NULL;
1013 he = 0;
1014 } else {
1015
1016 if (!lw)
1017 break;
1018
1019 if (exact && !part) {
1020 /* If we just accepted some characters directly (at the
1021 * beginning of the loop) and now can't match any further,
1022 * we go back to before those characters and try again,
1023 * preferring match specs this time. */
1024
1025 il -= exact; iw -= wexact;
1026 ll += exact; lw += wexact;
1027 bc -= exact;
1028 l -= add * exact; w -= add * wexact;
1029
1030 exact = wexact = 0;
1031
1032 goto retry;
1033 }
1034 /* No matcher and different characters: l does not match w. */
1035 if (test)
1036 return 0;
1037
1038 abort_match();
1039
1040 return -1;
1041 }
1042 }
1043 /* If this is a recursive call, we just return if l matched w or not. */
1044 if (test)
1045 return (part || !ll);
1046
1047 /* In top-level calls, if ll is non-zero (unmatched portion in l),
1048 * we have to free the collected clines. */
1049 if (!part && ll) {
1050 abort_match();
1051
1052 return -1;
1053 }
1054 if (rwlp)
1055 *rwlp = iw - (sfx ? ow - w : w - ow);
1056
1057 /* If we matched a suffix, the anchors stored in the top-clines
1058 * will be in the wrong clines: shifted by one. Adjust this. */
1059 if (sfx && matchparts) {
1060 Cline t, tn, s;
1061
1062 if (matchparts->prefix || matchparts->suffix) {
1063 t = get_cline(NULL, 0, NULL, 0, NULL, 0, 0);
1064 t->next = matchparts;
1065 if (matchparts->prefix)
1066 t->prefix = (Cline) 1;
1067 else
1068 t->suffix = (Cline) 1;
1069 matchparts = t;
1070 }
1071 for (t = matchparts; (tn = t->next); t = tn) {
1072 s = (tn->prefix ? tn->prefix : tn->suffix);
1073 if (t->suffix)
1074 t->suffix = s;
1075 else
1076 t->prefix = s;
1077 }
1078 t->prefix = t->suffix = NULL;
1079 }
1080 /* Finally, return the number of matched characters. */
1081
1082 *bpp = bp;
1083 return (part ? il : iw);
1084 }
1085
1086 /* Wrapper for match_str(), only for a certain length and only doing
1087 * the test. */
1088
1089 /**/
1090 static int
match_parts(char * l,char * w,int n,int part)1091 match_parts(char *l, char *w, int n, int part)
1092 {
1093 char lsav = l[n], wsav = w[n];
1094 int ret;
1095
1096 if (lsav)
1097 l[n] = '\0';
1098 if (wsav)
1099 w[n] = '\0';
1100 ret = match_str(l, w, NULL, 0, NULL, 0, 1, part);
1101 if (lsav)
1102 l[n] = lsav;
1103 if (wsav)
1104 w[n] = wsav;
1105
1106 return ret;
1107 }
1108
1109 /* Check if the word w is matched by the strings in pfx and sfx (the prefix
1110 * and the suffix from the line) or the pattern cp. In clp a cline list for
1111 * w is returned.
1112 * qu is non-zero if the words has to be quoted before processed any
1113 * further: the value 2 indicates file quoting.
1114 * bpl and bsl are used to report the positions where the brace-strings in
1115 * the prefix and the suffix have to be re-inserted if this match is inserted
1116 * in the line.
1117 * The return value is the string to use as a completion or NULL if the prefix
1118 * and the suffix don't match the word w. */
1119
1120 /**/
1121 mod_export char *
comp_match(char * pfx,char * sfx,char * w,Patprog cp,Cline * clp,int qu,Brinfo * bpl,int bcp,Brinfo * bsl,int bcs,int * exact)1122 comp_match(char *pfx, char *sfx, char *w, Patprog cp, Cline *clp, int qu,
1123 Brinfo *bpl, int bcp, Brinfo *bsl, int bcs, int *exact)
1124 {
1125 char *r = NULL;
1126 int onoerrs = noerrs;
1127
1128 if (cp) {
1129 /* We have a globcomplete-like pattern, just use that. */
1130 int wl;
1131 char *teststr;
1132
1133 r = w;
1134 if (!qu) {
1135 /*
1136 * If we're not quoting the strings, that means they're
1137 * already quoted (?) and so when we test them against
1138 * a pattern we have to remove the quotes else we will
1139 * end up trying to match against the quote characters.
1140 *
1141 * Almost certainly this fails in some complicated cases
1142 * but it should catch the basic ones.
1143 */
1144 teststr = dupstring(r);
1145 tokenize(teststr);
1146 noerrs = 1;
1147 if (parse_subst_string(teststr))
1148 teststr = r;
1149 else {
1150 remnulargs(teststr);
1151 untokenize(teststr);
1152 }
1153 noerrs = onoerrs;
1154 } else
1155 teststr = r;
1156 if (!pattry(cp, teststr))
1157 return NULL;
1158
1159 r = (qu == 2 ? tildequote(r, 0) : multiquote(r, !qu));
1160
1161 /* We still break it into parts here, trying to build a sensible
1162 * cline list for these matches, too. */
1163 w = dupstring(w);
1164 wl = strlen(w);
1165 *clp = bld_parts(w, wl, wl, NULL, NULL);
1166 *exact = 0;
1167 } else {
1168 Cline pli, plil;
1169 int mpl, rpl, wl;
1170
1171 w = (qu == 2 ? tildequote(w, 0) : multiquote(w, !qu));
1172 wl = strlen(w);
1173
1174 /* Always try to match the prefix. */
1175
1176 useqbr = qu;
1177 if ((mpl = match_str(pfx, w, bpl, bcp, &rpl, 0, 0, 0)) < 0)
1178 return NULL;
1179
1180 if (sfx && *sfx) {
1181 int wpl = matchbufadded, msl, rsl;
1182 VARARR(char, wpfx, wpl);
1183 Cline mli, mlil;
1184
1185 /* We also have a suffix to match, so first save the
1186 * contents of the global matching variables. */
1187 memcpy(wpfx, matchbuf, wpl);
1188 if (matchsubs) {
1189 Cline tmp = get_cline(NULL, 0, NULL, 0, NULL, 0, 0);
1190
1191 tmp->prefix = matchsubs;
1192 if (matchlastpart)
1193 matchlastpart->next = tmp;
1194 else
1195 matchparts = tmp;
1196 }
1197 pli = matchparts;
1198 plil = matchlastpart;
1199
1200 /* The try to match the suffix. */
1201
1202 if ((msl = match_str(sfx, w + mpl, bsl, bcs, &rsl, 1, 0, 0)) < 0) {
1203 free_cline(pli);
1204
1205 return NULL;
1206 }
1207 /* Matched, so add the string in the middle and the saved
1208 * string for the prefix, and build a combined cline list
1209 * for the prefix and the suffix. */
1210 if (matchsubs) {
1211 Cline tmp = get_cline(NULL, 0, NULL, 0, NULL, 0, CLF_SUF);
1212
1213 tmp->suffix = matchsubs;
1214 if (matchlastpart)
1215 matchlastpart->next = tmp;
1216 else
1217 matchparts = tmp;
1218 }
1219 add_match_str(NULL, NULL, w + rpl, wl - rpl - rsl, 1);
1220 add_match_str(NULL, NULL, wpfx, wpl, 1);
1221
1222 mli = bld_parts(w + rpl, wl - rpl - rsl,
1223 (mpl - rpl) + (msl - rsl), &mlil, NULL);
1224 mlil->flags |= CLF_MID;
1225 mlil->slen = msl - rsl;
1226 mlil->next = revert_cline(matchparts);
1227
1228 if (plil)
1229 plil->next = mli;
1230 else
1231 pli = mli;
1232 } else {
1233 /* Only a prefix, add the string and a part-cline for it. */
1234 add_match_str(NULL, NULL, w + rpl, wl - rpl, 0);
1235
1236 add_match_part(NULL, NULL, NULL, 0, NULL, 0, w + rpl, wl - rpl,
1237 mpl - rpl, 0);
1238 pli = matchparts;
1239 }
1240 r = dupstring(matchbuf ? matchbuf : "");
1241
1242 *clp = pli;
1243
1244 /* Test if the string built is equal to the one from the line. */
1245 if (sfx && *sfx) {
1246 int pl = strlen(pfx);
1247
1248 *exact = (!strncmp(pfx, w, pl) && !strcmp(sfx, w + pl));
1249 } else
1250 *exact = !strcmp(pfx, w);
1251 }
1252 if (!qu)
1253 hasunqu = 1;
1254
1255 return r;
1256 }
1257
1258
1259 /*
1260 * Guts of a single pattern for pattern_match().
1261 * Return non-zero if match successful.
1262 * If the class was an equivalence, return 1 + the index into
1263 * the equivalence class (see pattern.c for how this is calculated).
1264 */
1265
1266 /**/
1267 mod_export convchar_t
pattern_match1(Cpattern p,convchar_t c,int * mtp)1268 pattern_match1(Cpattern p, convchar_t c, int *mtp)
1269 {
1270 convchar_t ind;
1271
1272 *mtp = 0;
1273 switch (p->tp) {
1274 case CPAT_CCLASS:
1275 return PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
1276
1277 case CPAT_NCLASS:
1278 return !PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
1279
1280 case CPAT_EQUIV:
1281 if (PATMATCHRANGE(p->u.str, CONVCAST(c), &ind, mtp))
1282 return ind + 1;
1283 else
1284 return 0;
1285
1286 case CPAT_ANY:
1287 return 1;
1288
1289 case CPAT_CHAR:
1290 return (p->u.chr == c);
1291
1292 default:
1293 DPUTS(1, "bad matcher pattern type");
1294 return 0;
1295 }
1296 }
1297
1298
1299 /*
1300 * Use an equivalence to deduce the line character from the word, or
1301 * vice versa. (If vice versa, then "line" and "word" are reversed
1302 * in what follows. The logic is symmetric.)
1303 * lp is the line pattern.
1304 * wind is the index returned by a pattern match on the word pattern,
1305 * with type wmtp.
1306 * wchr is the word character.
1307 * Return CHR_INVALID if no matching character, else the character.
1308 *
1309 * Only makes sense if lp->tp == CPAT_EQUIV and the (unseen) word
1310 * pattern also has that type.
1311 */
1312
1313 /**/
1314 mod_export convchar_t
pattern_match_equivalence(Cpattern lp,convchar_t wind,int wmtp,convchar_t wchr)1315 pattern_match_equivalence(Cpattern lp, convchar_t wind, int wmtp,
1316 convchar_t wchr)
1317 {
1318 convchar_t lchr;
1319 int lmtp;
1320
1321 if (!PATMATCHINDEX(lp->u.str, wind-1, &lchr, &lmtp)) {
1322 /*
1323 * No equivalent. No possible match; give up.
1324 */
1325 return CHR_INVALID;
1326 }
1327 /*
1328 * If we matched an exact character rather than a range
1329 * type, return it.
1330 */
1331 if (lchr != CHR_INVALID)
1332 return lchr;
1333
1334 /*
1335 * Check the match types. We may want a case-changed
1336 * version of the word character.
1337 */
1338 if (wmtp == PP_UPPER && lmtp == PP_LOWER)
1339 return ZC_tolower(wchr);
1340 else if (wmtp == PP_LOWER && lmtp == PP_UPPER)
1341 return ZC_toupper(wchr);
1342 else if (wmtp == lmtp) {
1343 /*
1344 * Be lenient and allow identical replacements
1345 * for character classes, although in fact this
1346 * doesn't give special functionality for equivalence
1347 * classes.
1348 */
1349 return wchr;
1350 } else {
1351 /*
1352 * Non-matching generic types; this can't work.
1353 */
1354 return CHR_INVALID;
1355 }
1356 }
1357
1358 /*
1359 * Check if the given pattern matches the given string.
1360 * p is either an anchor or line pattern and string;
1361 * wp and wsc are word (candidate) pattern and string
1362 *
1363 * Check that characters match for {...} classes by comparing positions in the
1364 * strings.
1365 *
1366 * prestrict is a chain of patterns at least as long
1367 * as the line string. In this case we are still assembling the line at
1368 * newline (which has been allocated but doesn't yet contain anything useful)
1369 * and must continue to do so as we go along; prestrict gives
1370 * restrictions on the line character to be applied along side the other
1371 * patterns. In the simple case a restriction is a character to be put
1372 * in place; otherwise it is a set of possible characters and we have to
1373 * deduce an actual matching character. Note prestrict is never an
1374 * equivalence class. In extreme cases we can't deduce a unique
1375 * character; then the match fails.
1376 *
1377 * If prestrict is not NULL, s will be NULL.
1378 */
1379
1380 /**/
1381 static int
pattern_match_restrict(Cpattern p,Cpattern wp,convchar_t * wsc,int wsclen,Cpattern prestrict,ZLE_STRING_T new_line)1382 pattern_match_restrict(Cpattern p, Cpattern wp, convchar_t *wsc, int wsclen,
1383 Cpattern prestrict, ZLE_STRING_T new_line)
1384 {
1385 convchar_t c;
1386 convchar_t ind, wind;
1387 int mt, wmt;
1388
1389 while (p && wp && wsclen && prestrict) {
1390 /* First test the word character */
1391 wind = pattern_match1(wp, *wsc, &wmt);
1392 if (!wind)
1393 return 0;
1394
1395 /*
1396 * Now the line character; deal with the case where
1397 * we don't yet have it, only a restriction on it.
1398 */
1399 if (prestrict->tp == CPAT_CHAR) {
1400 /*
1401 * Easy case: restricted to an exact character on
1402 * the line. Proceed as normal.
1403 */
1404 c = prestrict->u.chr;
1405 } else {
1406 if (p->tp == CPAT_CHAR) {
1407 /*
1408 * Normal line pattern is an exact character: as
1409 * long as this matches prestrict, we can proceed
1410 * as usual.
1411 */
1412 c = p->u.chr;
1413 } else if (p->tp == CPAT_EQUIV) {
1414 /*
1415 * An equivalence, so we can deduce the character
1416 * backwards from the word pattern and see if it
1417 * matches prestrict.
1418 */
1419 if ((c = pattern_match_equivalence(p, wind, wmt, *wsc)) ==
1420 CHR_INVALID)
1421 return 0;
1422 } else {
1423 /*
1424 * Not an equivalence, so that means we must match
1425 * the word (not just the word pattern), so grab it
1426 * and make sure it fulfills our needs. I think.
1427 * Not 100% sure about that, but what else can
1428 * we do? We haven't actually been passed a string
1429 * from the command line.
1430 */
1431 c = *wsc;
1432 }
1433 /* Character so deduced must match the restriction. */
1434 if (!pattern_match1(prestrict, c, &mt))
1435 return 0;
1436 }
1437
1438 /*
1439 * If either is "?", they match each other; no further tests.
1440 * Apply this even if the character wasn't convertable;
1441 * there's no point trying to be clever in that case.
1442 */
1443 if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
1444 {
1445 ind = pattern_match1(p, c, &mt);
1446 if (!ind)
1447 return 0;
1448 if (ind != wind)
1449 return 0;
1450 if (mt != wmt) {
1451 /*
1452 * Special case if matching lower vs. upper or
1453 * vice versa. The transformed characters must match.
1454 * We don't need to check the transformation is
1455 * the appropriate one for each character separately,
1456 * since that was done in pattern_match1(), so just
1457 * compare lower-cased versions of both.
1458 */
1459 if ((mt == PP_LOWER || mt == PP_UPPER) &&
1460 (wmt == PP_LOWER || wmt == PP_UPPER)) {
1461 if (ZC_tolower(c) != ZC_tolower(*wsc))
1462 return 0;
1463 } else {
1464 /* Other different classes can't match. */
1465 return 0;
1466 }
1467 }
1468 }
1469
1470 /* We need to assemble the line */
1471 *new_line++ = (ZLE_CHAR_T)c;
1472 prestrict = prestrict->next;
1473 wsc++;
1474 wsclen--;
1475 p = p->next;
1476 wp = wp->next;
1477 }
1478
1479 while (p && prestrict) {
1480 /*
1481 * As above, but with even less info to go on.
1482 * (Can this happen?) At least handle the cases where
1483 * one of our patterns has given us a specific character.
1484 */
1485 if (prestrict->tp == CPAT_CHAR) {
1486 c = prestrict->u.chr;
1487 } else {
1488 if (p->tp == CPAT_CHAR) {
1489 c = p->u.chr;
1490 } else {
1491 /*
1492 * OK. Here we are in a function with just a line
1493 * pattern and another pattern to restrict the
1494 * characters that can go on the line, and no actual
1495 * characters. We're matching two patterns against
1496 * one another to generate a character to insert.
1497 * This is a bit too psychedelic, so I'm going to
1498 * bale out now. See you on the ground.
1499 */
1500 return 0;
1501 }
1502 if (!pattern_match1(prestrict, c, &mt))
1503 return 0;
1504 }
1505 if (!pattern_match1(p, c, &mt))
1506 return 0;
1507 p = p->next;
1508 *new_line++ = (ZLE_CHAR_T)c;
1509 prestrict = prestrict->next;
1510 }
1511
1512 if (prestrict) {
1513 /* Restriction with nothing to match */
1514 return 0;
1515 }
1516
1517 while (wp && wsclen) {
1518 /* No funny business when we only have the word pattern. */
1519 if (!pattern_match1(wp, *wsc, &wmt))
1520 return 0;
1521 wp = wp->next;
1522 wsc++;
1523 wsclen--;
1524 }
1525
1526 return 1;
1527 }
1528
1529
1530 /*
1531 * The usual version of pattern matching, without the line string
1532 * being handled by restriction.
1533 *
1534 * Check if the given pattern matches the given string.
1535 * p and s are either anchor or line pattern and string;
1536 * wp and ws are word (candidate) pattern and string
1537 *
1538 * If only one pattern is given, we just check if characters match.
1539 * If both line and word are given, we check that characters match
1540 * for {...} classes by comparing positions in the strings.
1541 *
1542 * Patterns and strings are always passed in pairs, so it is enough
1543 * to check for non-NULL wp. p should always be present.
1544 */
1545 /**/
1546 mod_export int
pattern_match(Cpattern p,char * s,Cpattern wp,char * ws)1547 pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
1548 {
1549 convchar_t c, wc;
1550 convchar_t ind, wind;
1551 int len = 0, wlen = 0, mt, wmt;
1552
1553 while (p && wp && *s && *ws) {
1554 /* First test the word character */
1555 wc = unmeta_one(ws, &wlen);
1556 wind = pattern_match1(wp, wc, &wmt);
1557 if (!wind)
1558 return 0;
1559
1560 /*
1561 * Now the line character.
1562 */
1563 c = unmeta_one(s, &len);
1564 /*
1565 * If either is "?", they match each other; no further tests.
1566 * Apply this even if the character wasn't convertable;
1567 * there's no point trying to be clever in that case.
1568 */
1569 if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
1570 {
1571 ind = pattern_match1(p, c, &mt);
1572 if (!ind)
1573 return 0;
1574 if (ind != wind)
1575 return 0;
1576 if (mt != wmt) {
1577 /*
1578 * Special case if matching lower vs. upper or
1579 * vice versa. The transformed characters must match.
1580 * We don't need to check the transformation is
1581 * the appropriate one for each character separately,
1582 * since that was done in pattern_match1(), so just
1583 * compare lower-cased versions of both.
1584 */
1585 if ((mt == PP_LOWER || mt == PP_UPPER) &&
1586 (wmt == PP_LOWER || wmt == PP_UPPER)) {
1587 if (ZC_tolower(c) != ZC_tolower(wc))
1588 return 0;
1589 } else {
1590 /* Other different classes can't match. */
1591 return 0;
1592 }
1593 }
1594 }
1595
1596 s += len;
1597 ws += wlen;
1598 p = p->next;
1599 wp = wp->next;
1600 }
1601
1602 while (p && *s) {
1603 c = unmeta_one(s, &len);
1604 if (!pattern_match1(p, c, &mt))
1605 return 0;
1606 p = p->next;
1607 s += len;
1608 }
1609
1610 while (wp && *ws) {
1611 wc = unmeta_one(ws, &wlen);
1612 if (!pattern_match1(wp, wc, &wmt))
1613 return 0;
1614 wp = wp->next;
1615 ws += wlen;
1616 }
1617
1618 return 1;
1619 }
1620
1621
1622 /* This splits the given string into a list of cline structs, separated
1623 * at those places where one of the anchors of an `*' pattern was found.
1624 * plen gives the number of characters on the line that matched this
1625 * string.
1626 *
1627 * In *lp, if lp is not NULL, we return a pointer to the last cline struct we
1628 * build.
1629 *
1630 * In *lprem, if lprem is not NULL, we return a pointer to the last
1631 * cline struct we build if it came from the remainder of the
1632 * line rather than from a right anchor match, else NULL.
1633 */
1634
1635 /**/
1636 Cline
bld_parts(char * str,int len,int plen,Cline * lp,Cline * lprem)1637 bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
1638 {
1639 Cline ret = NULL, *q = &ret, n = NULL;
1640 Cmlist ms;
1641 Cmatcher mp;
1642 int t, op = plen;
1643 char *p = str, *os = str;
1644
1645 while (len) {
1646 for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) {
1647 mp = ms->matcher;
1648 if (mp && mp->flags == CMF_RIGHT && mp->wlen < 0 && mp->ralen &&
1649 !mp->llen && len >= mp->ralen && (str - os) >= mp->lalen &&
1650 pattern_match(mp->right, str, NULL, NULL) &&
1651 (!mp->lalen ||
1652 ((str - os) >= mp->lalen &&
1653 pattern_match(mp->left, str - mp->lalen, NULL, NULL)))) {
1654 int olen = str - p, llen;
1655
1656 /* We found an anchor, create a new cline. The NEW flag
1657 * is set if the characters before the anchor were not
1658 * on the line. */
1659 *q = n = get_cline(NULL, mp->ralen, str, mp->ralen, NULL, 0,
1660 ((plen <= 0) ? CLF_NEW : 0));
1661
1662 /* If there were any characters before the anchor, add
1663 * them as a cline struct. */
1664
1665 if (p != str) {
1666 llen = (op < 0 ? 0 : op);
1667
1668 if (llen > olen)
1669 llen = olen;
1670 n->prefix = get_cline(NULL, llen, p, olen, NULL, 0, 0);
1671 }
1672 q = &(n->next);
1673 str += mp->ralen; len -= mp->ralen;
1674 plen -= mp->ralen;
1675 op -= olen;
1676 p = str;
1677 t = 1;
1678 }
1679 }
1680 if (!t) {
1681 /* No anchor was found here, skip. */
1682 str++; len--;
1683 plen--;
1684 }
1685 }
1686 /* This is the cline struct for the remaining string at the end. */
1687
1688 if (p != str) {
1689 int olen = str - p, llen = (op < 0 ? 0 : op);
1690
1691 *q = n = get_cline(NULL, 0, NULL, 0, NULL, 0, (plen <= 0 ? CLF_NEW : 0));
1692
1693 if (llen > olen)
1694 llen = olen;
1695 n->prefix = get_cline(NULL, llen, p, olen, NULL, 0, 0);
1696 if (lprem)
1697 *lprem = n;
1698 }
1699 else if (!ret) {
1700 *q = n =
1701 get_cline(NULL, 0, NULL, 0, NULL, 0, (plen <= 0 ? CLF_NEW : 0));
1702 if (lprem)
1703 *lprem = n;
1704 } else if (lprem) {
1705 *lprem = NULL;
1706 }
1707
1708 if (n)
1709 n->next = NULL;
1710
1711 if (lp)
1712 *lp = n;
1713
1714 return ret;
1715 }
1716
1717
1718 /*
1719 * This builds all the possible line patterns for the pattern pat in the
1720 * buffer line. Then we test if this line matches the string given by
1721 * wlen and word.
1722 *
1723 * The matcher ) wpat, containing pattern that matched previously
1724 * mp gives ) lpat, containing the pattern for line we build
1725 * line is the line we are assembling; it is initially empty
1726 * mword is a string that matched wpat before
1727 * word is string that we try to match now
1728 *
1729 * The return value is the length of the string matched in the word, it
1730 * is zero if we couldn't build a line that matches the word.
1731 */
1732
1733 /**/
1734 static int
bld_line(Cmatcher mp,ZLE_STRING_T line,char * mword,char * word,int wlen,int sfx)1735 bld_line(Cmatcher mp, ZLE_STRING_T line, char *mword, char *word,
1736 int wlen, int sfx)
1737 {
1738 Cpattern lpat = mp->line;
1739 Cpattern wpat = mp->word;
1740 Cpattern curgenpat;
1741 Cmlist ms;
1742 int llen, rl, l;
1743 convchar_t convchr, *wordcp;
1744 VARARR(convchar_t, wordchars, wlen);
1745 VARARR(struct cpattern, genpatarr, mp->llen);
1746
1747 /*
1748 * We may need to start the "word" array from the end. This
1749 * is much easier if we convert it to an array of (possibly wide)
1750 * characters.
1751 */
1752 MB_METACHARINIT();
1753 for (l = wlen, wordcp = wordchars; l; l--) {
1754 int charlen = MB_METACHARLENCONV(word, &convchr);
1755 #ifdef MULTIBYTE_SUPPORT
1756 if (convchr == WEOF)
1757 convchr = (*word == Meta) ? word[1] ^ 32 : *word;
1758 #endif
1759 *wordcp++ = convchr;
1760 word += charlen;
1761 }
1762
1763 /*
1764 * Loop over all characters. At this stage, line is an empty
1765 * space of length llen (not counting the null byte) which we assemble as
1766 * we go along.
1767 *
1768 * However, first we need to know what characters can appear at each
1769 * point in the line. For this we assemble an list genpatarr of the
1770 * same length as the line. (It's convenient to store this as an
1771 * array but it's linked as a list, too.) If there are equivalences
1772 * we use mword to derive the equivalent character; when we've
1773 * reached the end of mword, equivalences are treated just like
1774 * ordinary character classes. For character classes we just attach
1775 * the class to the genpatarr list and apply it as a restriction
1776 * when we finally match the line against the set of matchers.
1777 */
1778 curgenpat = genpatarr;
1779 MB_METACHARINIT();
1780 while (lpat) {
1781 convchar_t wchr, wind;
1782 int wmtp, mwordlen;
1783 /*
1784 * If the line pattern is an equivalence, query wpat to find the
1785 * word part of the equivalence. If we don't find one we don't try
1786 * equivalencing but use lpat as an ordinary match. (It's not
1787 * entirely clear to me this is the correct behaviour on a
1788 * failed character match within the equivalence, but that was
1789 * the behaviour of the old logic that this replaces.)
1790 */
1791 if (lpat->tp == CPAT_EQUIV && wpat && *mword) {
1792 mwordlen = MB_METACHARLENCONV(mword, &wchr);
1793 wind = pattern_match1(wpat, wchr, &wmtp);
1794 wpat = wpat->next;
1795 mword += mwordlen;
1796 } else
1797 wind = 0;
1798 if (wind) {
1799 /*
1800 * Successful match for word side of equivalence.
1801 * Find the line equivalent.
1802 */
1803 convchar_t lchr;
1804 if ((lchr = pattern_match_equivalence(lpat, wind, wmtp, wchr))
1805 == CHR_INVALID) {
1806 /*
1807 * No equivalent. No possible match; give up.
1808 */
1809 return 0;
1810 }
1811 /*
1812 * We now have an exact character to match,
1813 * so make up a pattern element for it.
1814 */
1815 curgenpat->tp = CPAT_CHAR;
1816 curgenpat->u.chr = lchr;
1817 } else {
1818 /*
1819 * Not an equivalence class, so we just keep the
1820 * test in the lpat as it is.
1821 */
1822 curgenpat->tp = lpat->tp;
1823 if (lpat->tp == CPAT_CHAR)
1824 curgenpat->u.chr = lpat->u.chr;
1825 else if (lpat->tp != CPAT_ANY) {
1826 /*
1827 * The string isn't modified and is only needed in calls from
1828 * this function, so we don't even need to copy it.
1829 */
1830 curgenpat->u.str = lpat->u.str;
1831 }
1832 }
1833 lpat = lpat->next;
1834 /*
1835 * This linked list is defined above as an array.
1836 * We could get away with just keeping it as an array
1837 * and passing it down as such, but that's a bit icky
1838 * since the generic linkage of Cpatterns is as a linked
1839 * list and we should keep our local memory management
1840 * problems to ourselvess.
1841 */
1842 if (lpat)
1843 curgenpat->next = curgenpat+1;
1844 else
1845 curgenpat->next = NULL;
1846 curgenpat++;
1847 }
1848
1849 /*
1850 * We now know how to match the word with the line patterns; let's
1851 * see if it does. We will use the information in curgenpat if we
1852 * are successful to work out what character goes on the line. This
1853 * is a bit hairy, as in "the Yeti is a creature that is a bit
1854 * hairy".
1855 */
1856 llen = mp->llen;
1857 rl = 0;
1858
1859 if (sfx)
1860 {
1861 /*
1862 * We need to work backwards from the end of both the
1863 * word and the line strings.
1864 */
1865 wordcp = wordchars + wlen;
1866
1867 /*
1868 * We construct the line from the end.
1869 */
1870 line += llen;
1871 curgenpat = genpatarr + llen;
1872 } else {
1873 wordcp = wordchars;
1874 curgenpat = genpatarr;
1875 }
1876
1877 /* we now reuse mp, lpat, wpat for the global matchers */
1878 MB_METACHARINIT();
1879 while (llen && wlen) {
1880 int wmtp;
1881 convchar_t *wp;
1882 Cpattern tmpgenpat;
1883
1884 if (sfx) {
1885 wp = wordcp - 1;
1886 tmpgenpat = curgenpat - 1;
1887 } else {
1888 tmpgenpat = curgenpat;
1889 wp = wordcp;
1890 }
1891 if (pattern_match1(tmpgenpat, *wp, &wmtp))
1892 {
1893 convchar_t lchr;
1894 /*
1895 * We can match the line character directly with the word
1896 * character. If the line character is a fixed one,
1897 * keep it, since we went to all that trouble above,
1898 * else if it's generic, keep the word character,
1899 * since we have no choice.
1900 */
1901 if (tmpgenpat->tp == CPAT_CHAR)
1902 lchr = tmpgenpat->u.chr;
1903 else
1904 lchr = *wp;
1905
1906 if (sfx)
1907 *--line = lchr;
1908 else
1909 *line++ = lchr;
1910
1911 llen--;
1912 wlen--;
1913 rl++;
1914
1915 if (sfx) {
1916 wordcp = wp;
1917 curgenpat = tmpgenpat;
1918 } else {
1919 if (llen)
1920 curgenpat++;
1921 wordcp++;
1922 }
1923 }
1924 else
1925 {
1926 ZLE_CHAR_T *lp;
1927 /*
1928 * Need to loop over pattern matchers.
1929 */
1930 for (ms = bmatchers; ms; ms = ms->next) {
1931 mp = ms->matcher;
1932 /*
1933 * This is the nightmare case: we have line and
1934 * and word matchers and some pattern which restricts
1935 * the value on the line without us knowing exactly
1936 * what it is. Despatch to the special function
1937 * for that.
1938 */
1939 if (mp && !mp->flags && mp->wlen <= wlen &&
1940 mp->llen <= llen)
1941 {
1942 lp = line;
1943 wp = wordcp;
1944 tmpgenpat = curgenpat;
1945
1946 if (sfx) {
1947 lp -= mp->llen;
1948 wp -= mp->wlen;
1949 tmpgenpat -= mp->llen;
1950 }
1951
1952 if (pattern_match_restrict(mp->line, mp->word, wp,
1953 wlen - (wp - wordchars),
1954 tmpgenpat, lp)) {
1955 /*
1956 * Matched: advance over as many characters
1957 * of the patterns and strings as
1958 * we've done matches.
1959 */
1960 if (sfx) {
1961 line = lp;
1962 wordcp = wp;
1963 curgenpat = tmpgenpat;
1964 } else {
1965 line += mp->llen;
1966 wordcp += mp->wlen;
1967 curgenpat += mp->llen;
1968 }
1969 llen -= mp->llen;
1970 wlen -= mp->wlen;
1971 rl += mp->wlen;
1972 break;
1973 }
1974 }
1975 }
1976 if (!ms)
1977 return 0; /* Didn't match, give up */
1978 }
1979 }
1980 if (!llen) {
1981 /* Unmatched portion in the line built, return matched length. */
1982 return rl;
1983 }
1984 return 0;
1985 }
1986
1987 /* This builds a string that may be put on the line that fully matches the
1988 * given strings. The return value is NULL if no such string could be built
1989 * or that string in local static memory, dup it. */
1990
1991 /**/
1992 static char *
join_strs(int la,char * sa,int lb,char * sb)1993 join_strs(int la, char *sa, int lb, char *sb)
1994 {
1995 static char *rs = NULL;
1996 static int rl = 0;
1997
1998 Cmlist ms;
1999 Cmatcher mp;
2000 int t, bl;
2001 /** rr is the remaining length already allocated in rs */
2002 int rr = rl;
2003 /*
2004 * convlen is the length we need for the string converted to
2005 * char * (possibly multibyte).
2006 */
2007 int convlen;
2008 char *rp = rs;
2009
2010 while (la && lb) {
2011 if (*sa != *sb) {
2012 /* Different characters, try the matchers. */
2013 for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) {
2014 mp = ms->matcher;
2015 if (mp && !mp->flags && mp->wlen > 0 && mp->llen > 0 &&
2016 mp->wlen <= la && mp->wlen <= lb) {
2017 /* The pattern has no anchors and the word
2018 * pattern fits, try it. */
2019 if ((t = pattern_match(mp->word, sa, NULL, NULL)) ||
2020 pattern_match(mp->word, sb, NULL, NULL)) {
2021 /* It matched one of the strings, t says which one. */
2022 VARARR(ZLE_CHAR_T, line, mp->llen);
2023 char **ap, **bp;
2024 int *alp, *blp;
2025
2026 if (t) {
2027 ap = &sa;
2028 alp = &la;
2029
2030 bp = &sb;
2031 blp = &lb;
2032 } else {
2033 ap = &sb;
2034 alp = &lb;
2035
2036 bp = &sa;
2037 blp = &la;
2038 }
2039 /* Now try to build a string that matches the other
2040 * string. */
2041 if ((bl = bld_line(mp, line, *ap, *bp, *blp, 0))) {
2042 /* Found one, put it into the return string. */
2043 char *convstr =
2044 zlelineasstring(line, mp->llen, 0, &convlen,
2045 NULL, 0);
2046 if (rr <= convlen) {
2047 char *or = rs;
2048 int alloclen = (convlen > 20) ? convlen : 20;
2049
2050 rs = realloc(rs, (rl += alloclen));
2051 rr += alloclen;
2052 rp += rs - or;
2053 }
2054 memcpy(rp, convstr, convlen);
2055 rp += convlen;
2056 rr -= convlen;
2057 /* HERE: multibyte chars */
2058 *ap += mp->wlen;
2059 *alp -= mp->wlen;
2060
2061 *bp += bl;
2062 *blp -= bl;
2063 t = 1;
2064 free(convstr);
2065 } else
2066 t = 0;
2067 }
2068 }
2069 }
2070 if (!t)
2071 break;
2072 } else {
2073 /* Same character, just take it. */
2074 if (rr <= 1 /* HERE charlen */) {
2075 char *or = rs;
2076
2077 rs = realloc(rs, (rl += 20));
2078 rr += 20;
2079 rp += rs - or;
2080 }
2081 /* HERE: multibyte char */
2082 *rp++ = *sa;
2083 rr--;
2084 sa++;
2085 sb++;
2086 la--;
2087 lb--;
2088 }
2089 }
2090 if (la || lb)
2091 return NULL;
2092
2093 *rp = '\0';
2094
2095 return rs;
2096 }
2097
2098 /*
2099 * This compares the anchors stored in two top-level clines.
2100 * It returns 1 if the anchors are the same, 2 if they are
2101 * compatible (and have been combined in "o"), 0 otherwise.
2102 */
2103
2104 /**/
2105 static int
cmp_anchors(Cline o,Cline n,int join)2106 cmp_anchors(Cline o, Cline n, int join)
2107 {
2108 int line = 0;
2109 char *j;
2110
2111 /* First try the exact strings. */
2112 if ((!(o->flags & CLF_LINE) && o->wlen == n->wlen &&
2113 (!o->word || !strncmp(o->word, n->word, o->wlen))) ||
2114 (line = ((!o->line && !n->line && !o->wlen && !n->wlen) ||
2115 (o->llen == n->llen && o->line && n->line &&
2116 !strncmp(o->line, n->line, o->llen))))) {
2117 if (line) {
2118 o->flags |= CLF_LINE;
2119 o->word = NULL;
2120 o->wlen = 0;
2121 }
2122 return 1;
2123 }
2124 /* Didn't work, try to build a string matching both anchors. */
2125 if (join && !(o->flags & CLF_JOIN) && o->word && n->word &&
2126 (j = join_strs(o->wlen, o->word, n->wlen, n->word))) {
2127 o->flags |= CLF_JOIN;
2128 o->wlen = strlen(j);
2129 o->word = dupstring(j);
2130
2131 return 2;
2132 }
2133 return 0;
2134 }
2135
2136 /* Below is the code to join two cline lists. This struct is used to walk
2137 * through a sub-list. */
2138
2139 typedef struct cmdata *Cmdata;
2140
2141 struct cmdata {
2142 Cline cl, pcl;
2143 char *str, *astr;
2144 int len, alen, olen, line;
2145 };
2146
2147 /* This is used to ensure that a cmdata struct contains usable data.
2148 * The return value is non-zero if we reached the end. */
2149
2150 static int
check_cmdata(Cmdata md,int sfx)2151 check_cmdata(Cmdata md, int sfx)
2152 {
2153 /* We will use the str and len fields to contain the next sub-string
2154 * in the list. If len is zero, we have to use the next cline. */
2155 if (!md->len) {
2156 /* If there is none, we reached the end. */
2157 if (!md->cl)
2158 return 1;
2159
2160 /* Otherwise, get the string. Only the line-string or both.
2161 * We also have to adjust the pointer if this is for a suffix. */
2162 if (md->cl->flags & CLF_LINE) {
2163 md->line = 1;
2164 md->len = md->cl->llen;
2165 md->str = md->cl->line;
2166 } else {
2167 md->line = 0;
2168 md->len = md->olen = md->cl->wlen;
2169 /* HERE: multibyte */
2170 if ((md->str = md->cl->word) && sfx)
2171 md->str += md->len;
2172 md->alen = md->cl->llen;
2173 /* HERE: multibyte */
2174 if ((md->astr = md->cl->line) && sfx)
2175 md->astr += md->alen;
2176 }
2177 md->pcl = md->cl;
2178 md->cl = md->cl->next;
2179 }
2180 return 0;
2181 }
2182
2183 /* This puts the not-yet-matched portion back into the last cline and
2184 * returns that. */
2185
2186 static Cline
undo_cmdata(Cmdata md,int sfx)2187 undo_cmdata(Cmdata md, int sfx)
2188 {
2189 Cline r = md->pcl;
2190
2191 if (md->line) {
2192 r->word = NULL;
2193 r->wlen = 0;
2194 r->flags |= CLF_LINE;
2195 r->llen = md->len;
2196 /* HERE: multibyte */
2197 r->line = md->str - (sfx ? md->len : 0);
2198 } else if (md->len != md->olen) {
2199 r->wlen = md->len;
2200 /* HERE: multibyte */
2201 r->word = md->str - (sfx ? md->len : 0);
2202 DPUTS(r->wlen > 0 && !*r->word, "Bad word");
2203 }
2204 return r;
2205 }
2206
2207 /* This tries to build a string matching a sub-string in a sub-cline
2208 * that could not be matched otherwise. */
2209
2210 static Cline
join_sub(Cmdata md,char * str,int len,int * mlen,int sfx,int join)2211 join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
2212 {
2213 if (!check_cmdata(md, sfx)) {
2214 char *ow = str, *nw = md->str;
2215 int ol = len, nl = md->len;
2216 Cmlist ms;
2217 Cmatcher mp;
2218 int t;
2219
2220 if (sfx) {
2221 ow += ol; nw += nl;
2222 }
2223 for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) {
2224 mp = ms->matcher;
2225 /* We use only those patterns that match a non-empty
2226 * string in both the line and the word and that have
2227 * no anchors. */
2228 if (mp && !mp->flags && mp->wlen > 0 && mp->llen > 0) {
2229 /* We first test, if the old string matches already the
2230 * new one. */
2231 if (mp->llen <= ol && mp->wlen <= nl &&
2232 pattern_match(mp->line, ow - (sfx ? mp->llen : 0),
2233 mp->word, nw - (sfx ? mp->wlen : 0))) {
2234 /* It did, update the contents of the cmdata struct
2235 * and return a cline for the matched part. */
2236 if (sfx)
2237 md->str -= mp->wlen;
2238 else
2239 md->str += mp->wlen;
2240 md->len -= mp->wlen;
2241 *mlen = mp->llen;
2242
2243 return get_cline(NULL, 0, ow - (sfx ? mp->llen : 0),
2244 mp->llen, NULL, 0, 0);
2245 }
2246 /* Otherwise we will try to build a string that matches
2247 * both strings. But try the pattern only if the word-
2248 * pattern matches one of the strings. */
2249 if (join && mp->wlen <= ol && mp->wlen <= nl &&
2250 ((t = pattern_match(mp->word, ow - (sfx ? mp->wlen : 0),
2251 NULL, NULL)) ||
2252 pattern_match(mp->word, nw - (sfx ? mp->wlen : 0),
2253 NULL, NULL))) {
2254 VARARR(ZLE_CHAR_T, line, mp->llen);
2255 int bl;
2256 char *mw;
2257
2258 /* Then build all the possible lines and see
2259 * if one of them matches the other string. */
2260 /* HERE: they're multibyte */
2261 if (t)
2262 mw = ow - (sfx ? mp->wlen : 0);
2263 else
2264 mw = nw - (sfx ? mp->wlen : 0);
2265
2266 if ((bl = bld_line(mp, line, mw, (t ? nw : ow),
2267 (t ? nl : ol), sfx))) {
2268 /* Yep, one of the lines matched the other
2269 * string. */
2270
2271 /* HERE: multibyte characters */
2272 if (t) {
2273 ol = mp->wlen; nl = bl;
2274 } else {
2275 ol = bl; nl = mp->wlen;
2276 }
2277 if (sfx)
2278 md->str -= nl;
2279 else
2280 md->str += nl;
2281 md->len -= nl;
2282 *mlen = ol;
2283
2284 return get_cline(NULL, 0,
2285 zlelineasstring(line, mp->llen,
2286 0, NULL, NULL, 1),
2287 mp->llen, NULL, 0, CLF_JOIN);
2288 }
2289 }
2290 }
2291 }
2292 }
2293 return NULL;
2294 }
2295
2296 /* This is used to match a sub-string in a sub-cline. The length of the
2297 * matched portion is returned. This tests only for exact equality. */
2298
2299 static int
sub_match(Cmdata md,char * str,int len,int sfx)2300 sub_match(Cmdata md, char *str, int len, int sfx)
2301 {
2302 int ret = 0, l, ind, add;
2303 char *p, *q;
2304 #ifdef MULTIBYTE_SUPPORT
2305 int fulllen = len;
2306 char *fullstr = str;
2307 mbstate_t mbs;
2308 #endif
2309
2310 if (sfx) {
2311 str += len;
2312 ind = -1; add = -1;
2313 } else {
2314 ind = 0; add = 1;
2315 }
2316 /* str and len describe the old string, in md we have the new one. */
2317 while (len) {
2318 if (check_cmdata(md, sfx))
2319 return ret;
2320
2321 /*
2322 * Look for a common prefix. If we do include metafied
2323 * characters, at this stage we still need the overall length
2324 * including Meta's as separate characters.
2325 */
2326 for (l = 0, p = str, q = md->str;
2327 l < len && l < md->len && p[ind] == q[ind];
2328 l++, p += add, q += add) {}
2329
2330 /* Make sure we don't end in the middle of a Meta sequence. */
2331 if (add == 1) {
2332 if (l && p[-1] == Meta)
2333 l--;
2334 } else {
2335 if (l && ((l < len && p[-1] == Meta)
2336 || (l < md->len && q[-1] == Meta)))
2337 l--;
2338 }
2339 #ifdef MULTIBYTE_SUPPORT
2340 /*
2341 * Make sure we don't end in the middle of a multibyte character.
2342 * Don't need to do this if the match ended at the start
2343 * of the original string.
2344 *
2345 * Let q be the match point we've found.
2346 */
2347 q = sfx ? str - l : str + l;
2348 if (q != fullstr) {
2349 memset(&mbs, 0, sizeof mbs);
2350 /*
2351 * Otherwise read characters from the start of the original
2352 * string until we reach or pass the match point. This
2353 * is rather inefficient, but in general only reading
2354 * the full string can keep track of where we are in
2355 * a character. With a prefix we could be more efficient,
2356 * but it's difficult with a suffix where the match point
2357 * moves backwards.
2358 */
2359 for (p = fullstr; p < fullstr + fulllen; ) {
2360 wchar_t wc;
2361 /*
2362 * ret must, in fact, be set by the current logic,
2363 * but gcc doesn't realise (at least some versions don't).
2364 */
2365 size_t cnt = MB_INVALID;
2366 int diff;
2367 char *p2;
2368
2369 /*
2370 * Because the string is metafied, we need to
2371 * assembled wide characters a byte at a time.
2372 */
2373 for (p2 = p; p2 < fullstr + fulllen; p2++) {
2374 char curchar = (*p2 == Meta) ? (*++p2 ^ 32) : *p2;
2375 cnt = mbrtowc(&wc, &curchar, 1, &mbs);
2376 /* Continue while character is incomplete. */
2377 if (cnt != MB_INCOMPLETE)
2378 break;
2379 }
2380 if (cnt == MB_INVALID || cnt == MB_INCOMPLETE) {
2381 /* not a valid character, give up test */
2382 break;
2383 }
2384 /* increment p2 for last byte read */
2385 diff = ++p2 - q;
2386 if (diff == 0) {
2387 /*
2388 * Prefix or suffix matches at end of multbyte character,
2389 * so OK.
2390 */
2391 break;
2392 } else if (diff > 0) {
2393 /*
2394 * The prefix or suffix finishes in the middle
2395 * of a character. Shorten it until it doesn't.
2396 */
2397 if (sfx) {
2398 /*
2399 * We need to remove the trailing part of
2400 * the character from the suffix.
2401 */
2402 l -= diff;
2403 } else {
2404 /*
2405 * We need to remove the initial part of
2406 * the character from the prefix.
2407 */
2408 l -= (q - p);
2409 }
2410 break;
2411 }
2412 /* Advance over full character */
2413 p = p2;
2414 }
2415 }
2416 #endif
2417 if (l) {
2418 /* There was a common prefix, use it. */
2419 md->len -= l; len -= l;
2420 if (sfx) {
2421 md->str -= l; str -= l;
2422 } else {
2423 md->str += l; str += l;
2424 }
2425 ret += l;
2426 } else if (md->line || md->len != md->olen || !md->astr)
2427 return ret;
2428 else {
2429 /* We still have the line string to try. */
2430 md->line = 1;
2431 md->len = md->alen;
2432 md->str = md->astr;
2433 }
2434 }
2435 return ret;
2436 }
2437
2438 /* This is used to build a common prefix or suffix sub-list. If requested
2439 * it returns the unmatched cline lists in orest and nrest. */
2440
2441 /**/
2442 static void
join_psfx(Cline ot,Cline nt,Cline * orest,Cline * nrest,int sfx)2443 join_psfx(Cline ot, Cline nt, Cline *orest, Cline *nrest, int sfx)
2444 {
2445 Cline p = NULL, o, n;
2446 struct cmdata md, omd;
2447 char **sstr = NULL;
2448 int len, join = 0, line = 0, *slen = NULL;
2449
2450 if (sfx) {
2451 o = ot->suffix; n = nt->suffix;
2452 } else {
2453 o = ot->prefix; n = nt->prefix;
2454 }
2455 if (!o) {
2456 if (orest)
2457 *orest = NULL;
2458 if (nrest)
2459 *nrest = n;
2460 if (n && n->wlen)
2461 ot->flags |= CLF_MISS;
2462
2463 return;
2464 }
2465 if (!n) {
2466 if (sfx)
2467 ot->suffix = NULL;
2468 else
2469 ot->prefix = NULL;
2470
2471 if (orest)
2472 *orest = o;
2473 else
2474 free_cline(o);
2475 if (nrest)
2476 *nrest = NULL;
2477 return;
2478 }
2479 md.cl = n;
2480 md.len = 0;
2481
2482 /* Walk through the old list. */
2483 while (o) {
2484 join = 0;
2485 memcpy(&omd, &md, sizeof(struct cmdata));
2486
2487 /* We first get the length of the prefix equal in both strings. */
2488 if (o->flags & CLF_LINE) {
2489 if ((len = sub_match(&md, o->line, o->llen, sfx)) != o->llen) {
2490 join = 1; line = 1; slen = &(o->llen); sstr = &(o->line);
2491 }
2492 } else if ((len = sub_match(&md, o->word, o->wlen, sfx)) != o->wlen) {
2493 if (o->line) {
2494 memcpy(&md, &omd, sizeof(struct cmdata));
2495 o->flags |= CLF_LINE | CLF_DIFF;
2496
2497 continue;
2498 }
2499 o->llen = o->llen - ot->slen;
2500 join = 1; line = 0; slen = &(o->wlen); sstr = &(o->word);
2501 }
2502 if (join) {
2503 /* There is a rest that is different in the two lists,
2504 * we try to build a new cline matching both strings. */
2505 Cline joinl;
2506 int jlen;
2507
2508 if ((joinl = join_sub(&md, *sstr + len, *slen - len,
2509 &jlen, sfx, !(o->flags & CLF_JOIN)))) {
2510 /* We have one, insert it into the list. */
2511 joinl->flags |= CLF_DIFF;
2512 if (len + jlen != *slen) {
2513 Cline rest;
2514
2515 rest = get_cline(NULL, 0, *sstr + (sfx ? 0 : len + jlen),
2516 *slen - len - jlen, NULL, 0, 0);
2517
2518 rest->next = o->next;
2519 joinl->next = rest;
2520 } else
2521 joinl->next = o->next;
2522
2523 if (len) {
2524 if (sfx)
2525 *sstr += *slen - len;
2526 *slen = len;
2527 o->next = joinl;
2528 } else {
2529 o->next = NULL;
2530 free_cline(o);
2531 if (p)
2532 p->next = joinl;
2533 else if (sfx)
2534 ot->suffix = joinl;
2535 else
2536 ot->prefix = joinl;
2537 }
2538 o = joinl;
2539 join = 0;
2540 }
2541 }
2542 if (join) {
2543 /* We couldn't build a cline for a common string, so we
2544 * cut the list here. */
2545 if (len) {
2546 Cline r;
2547
2548 if (orest) {
2549 if (line)
2550 r = get_cline(o->line + len, *slen - len,
2551 NULL, 0, NULL, 0, o->flags);
2552 else
2553 r = get_cline(NULL, 0, o->word + len, *slen - len,
2554 NULL, 0, o->flags);
2555
2556 r->next = o->next;
2557 *orest = r;
2558
2559 *slen = len;
2560 o->next = NULL;
2561 } else {
2562 if (sfx)
2563 *sstr += *slen - len;
2564 *slen = len;
2565 free_cline(o->next);
2566 o->next = NULL;
2567 }
2568 } else {
2569 if (p)
2570 p->next = NULL;
2571 else if (sfx)
2572 ot->suffix = NULL;
2573 else
2574 ot->prefix = NULL;
2575
2576 if (orest)
2577 *orest = o;
2578 else
2579 free_cline(o);
2580 }
2581 if (!orest || !nrest)
2582 ot->flags |= CLF_MISS;
2583
2584 if (nrest)
2585 *nrest = undo_cmdata(&md, sfx);
2586
2587 return;
2588 }
2589 p = o;
2590 o = o->next;
2591 }
2592 if (md.len || md.cl)
2593 ot->flags |= CLF_MISS;
2594 if (orest)
2595 *orest = NULL;
2596 if (nrest)
2597 *nrest = undo_cmdata(&md, sfx);
2598 }
2599
2600 /* This builds the common prefix and suffix for a mid-cline -- the one
2601 * describing the place where the prefix and the suffix meet. */
2602
2603 /**/
2604 static void
join_mid(Cline o,Cline n)2605 join_mid(Cline o, Cline n)
2606 {
2607 if (o->flags & CLF_JOIN) {
2608 /* The JOIN flag is set in the old cline struct if it was
2609 * already joined with another one. In this case the suffix
2610 * field contains the suffix from previous calls. */
2611 Cline nr;
2612
2613 join_psfx(o, n, NULL, &nr, 0);
2614
2615 n->suffix = revert_cline(nr);
2616
2617 join_psfx(o, n, NULL, NULL, 1);
2618 } else {
2619 /* This is the first time for both structs, so the prefix field
2620 * contains the whole sub-list. */
2621 Cline or, nr;
2622
2623 o->flags |= CLF_JOIN;
2624
2625 /* We let us give both rests and use them as the suffixes. */
2626 join_psfx(o, n, &or, &nr, 0);
2627
2628 if (or)
2629 or->llen = (o->slen > or->wlen ? or->wlen : o->slen);
2630 o->suffix = revert_cline(or);
2631 n->suffix = revert_cline(nr);
2632
2633 join_psfx(o, n, NULL, NULL, 1);
2634 }
2635 n->suffix = NULL;
2636 }
2637
2638 /* This turns the sequence of anchor cline structs from b to e into a
2639 * prefix sequence, puts it before the prefix of e and then tries to
2640 * join that with the prefix of a.
2641 * This is needed if some matches had a anchor match spec and others
2642 * didn't. */
2643
2644 /**/
2645 static int
sub_join(Cline a,Cline b,Cline e,int anew)2646 sub_join(Cline a, Cline b, Cline e, int anew)
2647 {
2648 if (!e->suffix && a->prefix) {
2649 Cline op = e->prefix, n = NULL, *p = &n, t, ca;
2650 int min = 0, max = 0;
2651
2652 for (; b != e; b = b->next) {
2653 if ((*p = t = b->prefix)) {
2654 while (t->next)
2655 t = t->next;
2656 p = &(t->next);
2657 }
2658 b->suffix = b->prefix = NULL;
2659 b->flags &= ~CLF_SUF;
2660 min += b->min;
2661 max += b->max;
2662 *p = b;
2663 p = &(b->next);
2664 }
2665 *p = e->prefix;
2666 ca = a->prefix;
2667
2668 while (n) {
2669 e->prefix = cp_cline(n, 1);
2670 a->prefix = cp_cline(ca, 1);
2671
2672 if (anew) {
2673 int f = e->flags;
2674
2675 join_psfx(e, a, NULL, NULL, 0);
2676 e->flags = f;
2677 if (e->prefix)
2678 return max - min;
2679 } else {
2680 int f = e->flags;
2681
2682 join_psfx(a, e, NULL, NULL, 0);
2683 e->flags = f;
2684 if (a->prefix)
2685 return max - min;
2686 }
2687 min -= n->min;
2688
2689 if (n == op)
2690 break;
2691 n = n->next;
2692 }
2693 return max - min;
2694 }
2695 return 0;
2696 }
2697
2698 /* This simplifies the cline list given as the first argument so that
2699 * it also matches the second list. */
2700
2701 /**/
2702 Cline
join_clines(Cline o,Cline n)2703 join_clines(Cline o, Cline n)
2704 {
2705 cline_setlens(n, 1);
2706
2707 /* First time called, just return the new list. On further invocations
2708 * we will get it as the first argument. */
2709 if (!o)
2710 return n;
2711 else {
2712 Cline oo = o, nn = n, po = NULL, pn = NULL, x;
2713 int diff;
2714
2715 /* Walk through the lists. */
2716 while (o && n) {
2717 /* If one of them describes a new part and the other one does
2718 * not, synchronise them by searching an old part in the
2719 * other list. */
2720 if ((o->flags & CLF_NEW) && !(n->flags & CLF_NEW)) {
2721 Cline t, tn;
2722
2723 for (t = o; (tn = t->next) &&
2724 ((tn->flags & CLF_NEW) || !cmp_anchors(tn, n, 0));
2725 t = tn);
2726 if (tn) {
2727 diff = sub_join(n, o, tn, 1);
2728
2729 if (po)
2730 po->next = tn;
2731 else
2732 oo = tn;
2733
2734 t->next = NULL;
2735 free_cline(o);
2736 x = o;
2737 o = tn;
2738
2739 if (po && po->prefix && cmp_anchors(x, po, 0)) {
2740 po->flags |= CLF_MISS;
2741 po->max += diff;
2742 } else {
2743 o->flags |= CLF_MISS;
2744 o->max += diff;
2745 }
2746 continue;
2747 }
2748 }
2749 if (!(o->flags & CLF_NEW) && (n->flags & CLF_NEW)) {
2750 Cline t, tn;
2751
2752 for (t = n; (tn = t->next) &&
2753 ((tn->flags & CLF_NEW) || !cmp_anchors(o, tn, 0));
2754 t = tn);
2755 if (tn) {
2756 int of = o->flags & CLF_MISS;
2757
2758 diff = sub_join(o, n, tn, 0);
2759 o->flags = (o->flags & ~CLF_MISS) | of;
2760
2761 if (po && po->prefix && cmp_anchors(n, pn, 0)) {
2762 po->flags |= CLF_MISS;
2763 po->max += diff;
2764 } else {
2765 o->flags |= CLF_MISS;
2766 o->max += diff;
2767 }
2768 n = tn;
2769 continue;
2770 }
2771 }
2772 /* Almost the same as above, but for the case that they
2773 * describe different types of parts (prefix, suffix, or mid). */
2774 if ((o->flags & (CLF_SUF | CLF_MID)) !=
2775 (n->flags & (CLF_SUF | CLF_MID))) {
2776 Cline t, tn;
2777
2778 for (t = n;
2779 (tn = t->next) &&
2780 (tn->flags & (CLF_SUF | CLF_MID)) !=
2781 (o->flags & (CLF_SUF | CLF_MID));
2782 t = tn);
2783 if (tn && cmp_anchors(o, tn, 1)) {
2784 sub_join(o, n, tn, 0);
2785
2786 n = tn;
2787 continue;
2788 }
2789 for (t = o;
2790 (tn = t->next) &&
2791 (tn->flags & (CLF_SUF | CLF_MID)) !=
2792 (n->flags & (CLF_SUF | CLF_MID));
2793 t = tn);
2794 if (tn && cmp_anchors(tn, n, 1)) {
2795 sub_join(n, o, tn, 1);
2796
2797 if (po)
2798 po->next = tn;
2799 else
2800 oo = tn;
2801 t->next = NULL;
2802 free_cline(o);
2803 o = tn;
2804 continue;
2805 }
2806 if (o->flags & CLF_MID) {
2807 o->flags = (o->flags & ~CLF_MID) | (n->flags & CLF_SUF);
2808 if (n->flags & CLF_SUF) {
2809 free_cline(o->prefix);
2810 o->prefix = NULL;
2811 } else {
2812 free_cline(o->suffix);
2813 o->suffix = NULL;
2814 }
2815 }
2816 break;
2817 }
2818 /* Now see if they have matching anchors. If not, cut the list. */
2819 if (!(o->flags & CLF_MID) && !cmp_anchors(o, n, 1)) {
2820 Cline t, tn, tt, to = NULL;
2821
2822 for (t = n; (tn = t->next); t = tn)
2823 if (!(tn->flags & CLF_NEW) && (tn->flags & CLF_SKIP)) {
2824 for (tt = o; (to = tt->next); tt = to)
2825 if (!(to->flags & CLF_NEW) && (to->flags & CLF_SKIP) &&
2826 cmp_anchors(tn, to, 1))
2827 break;
2828 if (to)
2829 break;
2830 }
2831 if (tn) {
2832 if (po)
2833 po->next = to;
2834 else
2835 oo = to;
2836 o = to;
2837
2838 diff = sub_join(o, n, tn, 0);
2839
2840 o->flags |= CLF_MISS;
2841 o->max += diff;
2842
2843 n = tn;
2844 po = o;
2845 o = o->next;
2846 pn = n;
2847 n = n->next;
2848 continue;
2849 } else {
2850 for (t = o; (to = t->next); t = to)
2851 if ((to->flags & CLF_SKIP) && cmp_anchors(n, to, 1))
2852 break;
2853
2854 if (to) {
2855 diff = sub_join(n, o, to, 1);
2856
2857 if (po)
2858 po->next = to;
2859 else
2860 oo = to;
2861 x = o;
2862 o = to;
2863 if (po && po->prefix && cmp_anchors(x, po, 0)) {
2864 po->flags |= CLF_MISS;
2865 po->max += diff;
2866 } else {
2867 o->flags |= CLF_MISS;
2868 o->max += diff;
2869 }
2870 continue;
2871 } else {
2872 for (tt = NULL, t = n; (tn = t->next); t = tn) {
2873 if (tn->flags & CLF_SKIP)
2874 for (tt = o; (to = tt->next); tt = to)
2875 if ((to->flags & CLF_SKIP) &&
2876 cmp_anchors(tn, to, 1))
2877 break;
2878 if (to)
2879 break;
2880 }
2881 if (to) {
2882 diff = sub_join(n, o, to, 1);
2883
2884 if (po)
2885 po->next = to;
2886 else
2887 oo = to;
2888 x = o;
2889 o = to;
2890 if (po && po->prefix && cmp_anchors(x, po, 0)) {
2891 po->flags |= CLF_MISS;
2892 po->max += diff;
2893 } else {
2894 o->flags |= CLF_MISS;
2895 o->max += diff;
2896 }
2897 continue;
2898 } else {
2899 for (tn = n; tn; tn = tn->next)
2900 if ((tn->flags & CLF_NEW) ==
2901 (o->flags & CLF_NEW) &&
2902 cmp_anchors(tn, o, 1)) break;
2903
2904 if (tn) {
2905 int of = o->flags & CLF_MISS;
2906
2907 if ((diff = sub_join(o, n, tn, 0))) {
2908 o->flags = (o->flags & ~CLF_MISS) | of;
2909 if (po && po->prefix) {
2910 po->flags |= CLF_MISS;
2911 po->max += diff;
2912 }
2913 else {
2914 o->flags |= CLF_MISS;
2915 o->max += diff;
2916 }
2917 }
2918 n = tn;
2919 po = o;
2920 o = o->next;
2921 pn = n;
2922 n = n->next;
2923 continue;
2924 }
2925 if (o->flags & CLF_SUF)
2926 break;
2927
2928 o->word = o->line = o->orig = NULL;
2929 o->wlen = 0;
2930 free_cline(o->next);
2931 o->next = NULL;
2932 o->flags |= CLF_MISS;
2933 }
2934 }
2935 }
2936 }
2937 /* Ok, they are equal, now copy the information about the
2938 * original string if needed, calculate minimum and maximum
2939 * lengths, and join the sub-lists. */
2940 if (!o->orig && !o->olen) {
2941 o->orig = n->orig;
2942 o->olen = n->olen;
2943 }
2944 if (n->min < o->min)
2945 o->min = n->min;
2946 if (n->max > o->max)
2947 o->max = n->max;
2948 if (o->flags & CLF_MID)
2949 join_mid(o, n);
2950 else
2951 join_psfx(o, n, NULL, NULL, (o->flags & CLF_SUF));
2952
2953 po = o;
2954 o = o->next;
2955 pn = n;
2956 n = n->next;
2957 }
2958 /* Free the rest of the old list. */
2959 if (o) {
2960 if (po)
2961 po->next = NULL;
2962 else
2963 oo = NULL;
2964
2965 free_cline(o);
2966 }
2967 free_cline(nn);
2968
2969 return oo;
2970 }
2971 }
2972