1 /*
2 * pattern.c - pattern matching
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1999 Peter Stephenson
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Peter Stephenson or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Peter Stephenson and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Peter Stephenson and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose. The software
24 * provided hereunder is on an "as is" basis, and Peter Stephenson and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 * Pattern matching code derived from the regexp library by Henry
29 * Spencer, which has the following copyright.
30 *
31 * Copyright (c) 1986 by University of Toronto.
32 * Written by Henry Spencer. Not derived from licensed software.
33 *
34 * Permission is granted to anyone to use this software for any
35 * purpose on any computer system, and to redistribute it freely,
36 * subject to the following restrictions:
37 *
38 * 1. The author is not responsible for the consequences of use of
39 * this software, no matter how awful, even if they arise
40 * from defects in it.
41 *
42 * 2. The origin of this software must not be misrepresented, either
43 * by explicit claim or by omission.
44 *
45 * 3. Altered versions must be plainly marked as such, and must not
46 * be misrepresented as being the original software.
47 *
48 * Eagle-eyed readers will notice this is an altered version. Incredibly
49 * sharp-eyed readers might even find bits that weren't altered.
50 *
51 *
52 * And I experienced a sense that, like certain regular
53 * expressions, seemed to match the day from beginning to end, so
54 * that I did not need to identify the parenthesised subexpression
55 * that told of dawn, nor the group of characters that indicated
56 * the moment when my grandfather returned home with news of
57 * Swann's departure for Paris; and the whole length of the month
58 * of May, as if matched by a closure, fitted into the buffer of my
59 * life with no sign of overflowing, turning the days, like a
60 * procession of insects that could consist of this or that
61 * species, into a random and unstructured repetition of different
62 * sequences, anchored from the first day of the month to the last
63 * in the same fashion as the weeks when I knew I would not see
64 * Gilberte and would search in vain for any occurrences of the
65 * string in the avenue of hawthorns by Tansonville, without my
66 * having to delimit explicitly the start or finish of the pattern.
67 *
68 * M. Proust, "In Search of Lost Files",
69 * bk I, "The Walk by Bourne's Place".
70 */
71
72 #include "zsh.mdh"
73
74 /*
75 * The following union is used mostly for alignment purposes.
76 * Normal nodes are longs, while certain nodes take a char * as an argument;
77 * here we make sure that they both work out to the same length.
78 * The compiled regexp we construct consists of upats stuck together;
79 * anything else to be added (strings, numbers) is stuck after and
80 * then aligned to a whole number of upat units.
81 *
82 * Note also that offsets are in terms of the sizes of these things.
83 */
84 union upat {
85 long l;
86 unsigned char *p;
87 };
88
89 typedef union upat *Upat;
90
91 #include "pattern.pro"
92
93 /* Number of active parenthesized expressions allowed in backreferencing */
94 #define NSUBEXP 9
95
96 /* definition number opnd? meaning */
97 #define P_END 0x00 /* no End of program. */
98 #define P_EXCSYNC 0x01 /* no Test if following exclude already failed */
99 #define P_EXCEND 0x02 /* no Test if exclude matched orig branch */
100 #define P_BACK 0x03 /* no Match "", "next" ptr points backward. */
101 #define P_EXACTLY 0x04 /* lstr Match this string. */
102 #define P_NOTHING 0x05 /* no Match empty string. */
103 #define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */
104 #define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */
105 #define P_GFLAGS 0x08 /* long Match nothing and set globbing flags */
106 #define P_ISSTART 0x09 /* no Match start of string. */
107 #define P_ISEND 0x0a /* no Match end of string. */
108 #define P_COUNTSTART 0x0b /* no Initialise P_COUNT */
109 #define P_COUNT 0x0c /* 3*long uc* node Match a number of repetitions */
110 /* numbered so we can test bit 5 for a branch */
111 #define P_BRANCH 0x20 /* node Match this alternative, or the next... */
112 #define P_WBRANCH 0x21 /* uc* node P_BRANCH, but match at least 1 char */
113 /* excludes are also branches, but have bit 4 set, too */
114 #define P_EXCLUDE 0x30 /* uc* node Exclude this from previous branch */
115 #define P_EXCLUDP 0x31 /* uc* node Exclude, using full file path so far */
116 /* numbered so we can test bit 6 so as not to match initial '.' */
117 #define P_ANY 0x40 /* no Match any one character. */
118 #define P_ANYOF 0x41 /* str Match any character in this string. */
119 #define P_ANYBUT 0x42 /* str Match any character not in this string. */
120 #define P_STAR 0x43 /* no Match any set of characters. */
121 #define P_NUMRNG 0x44 /* zr, zr Match a numeric range. */
122 #define P_NUMFROM 0x45 /* zr Match a number >= X */
123 #define P_NUMTO 0x46 /* zr Match a number <= X */
124 #define P_NUMANY 0x47 /* no Match any set of decimal digits */
125 /* spaces left for P_OPEN+n,... for backreferences */
126 #define P_OPEN 0x80 /* no Mark this point in input as start of n. */
127 #define P_CLOSE 0x90 /* no Analogous to OPEN. */
128 /*
129 * no no argument
130 * zr the range type zrange_t: may be zlong or unsigned long
131 * char a single char
132 * uc* a pointer to unsigned char, used at run time and initialised
133 * to NULL.
134 * str null-terminated, metafied string
135 * lstr length as long then string, not null-terminated, unmetafied.
136 */
137
138 /*
139 * Notes on usage:
140 * P_WBRANCH: This works like a branch and is used in complex closures,
141 * to ensure we don't succeed on a zero-length match of the pattern,
142 * since that would cause an infinite loop. We do this by recording
143 * the positions where we have already tried to match. See the
144 * P_WBRANCH test in patmatch().
145 *
146 * P_ANY, P_ANYOF: the operand is a null terminated
147 * string. Normal characters match as expected. Characters
148 * in the range Meta+PP_ALPHA..Meta+PP_UNKWN do the appropriate
149 * Posix range tests. This relies on imeta returning true for these
150 * characters. We treat unknown POSIX ranges as never matching.
151 * PP_RANGE means the next two (possibly metafied) characters form
152 * the limits of a range to test; it's too much like hard work to
153 * expand the range.
154 *
155 * P_EXCLUDE, P_EXCSYNC, PEXCEND: P_EXCLUDE appears in the pattern like
156 * P_BRANCH, but applies to the immediately preceding branch. The code in
157 * the corresponding branch is followed by a P_EXCSYNC, which simply
158 * acts as a marker that a P_EXCLUDE comes next. The P_EXCLUDE
159 * has a pointer to char embedded in it, which works
160 * like P_WBRANCH: if we get to the P_EXCSYNC, and we already matched
161 * up to the same position, fail. Thus we are forced to backtrack
162 * on closures in the P_BRANCH if the first attempt was excluded.
163 * Corresponding to P_EXCSYNC in the original branch, there is a
164 * P_EXCEND in the exclusion. If we get to this point, and we did
165 * *not* match in the original branch, the exclusion itself fails,
166 * otherwise it succeeds since we know the tail already matches,
167 * so P_EXCEND is the end of the exclusion test.
168 * The whole sorry mess looks like this, where the upper lines
169 * show the linkage of the branches, and the lower shows the linkage
170 * of their pattern arguments.
171 *
172 * --------------------- ----------------------
173 * ^ v ^ v
174 * ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail
175 * ^
176 * | |
177 * --------------------------------------
178 *
179 * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception
180 * that we prepend the path so far to the exclude pattern. This is
181 * for top level file globs, e.g. ** / *.c~*foo.c
182 * ^ I had to leave this space
183 * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long.
184 *
185 * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified
186 * closure (#cN,M) and is used to initialise the count. Executing
187 * the pattern leads back to the P_COUNT, while the next links of the
188 * P_COUNTSTART and P_COUNT lead to the tail of the pattern:
189 *
190 * ----------------
191 * v ^
192 * <COUNTSTART><COUNT>pattern<BACK> tail
193 * v v ^
194 * ------------------------
195 */
196
197 #define P_OP(p) ((p)->l & 0xff)
198 #define P_NEXT(p) ((p)->l >> 8)
199 #define P_OPERAND(p) ((p) + 1)
200 #define P_ISBRANCH(p) ((p)->l & 0x20)
201 #define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30)
202 #define P_NOTDOT(p) ((p)->l & 0x40)
203
204 /* Specific to lstr type, i.e. P_EXACTLY. */
205 #define P_LS_LEN(p) ((p)[1].l) /* can be used as lvalue */
206 #define P_LS_STR(p) ((char *)((p) + 2))
207
208 /* Specific to P_COUNT: arguments as offset in nodes from operator */
209 #define P_CT_CURRENT (1) /* Current count */
210 #define P_CT_MIN (2) /* Minimum count */
211 #define P_CT_MAX (3) /* Maximum count, -1 for none */
212 #define P_CT_PTR (4) /* Pointer to last match start */
213 #define P_CT_OPERAND (5) /* Operand of P_COUNT */
214
215 /* Flags needed when pattern is executed */
216 #define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */
217 #define P_HSTART 0x02 /* Starts with # or ##'d pattern. */
218 #define P_PURESTR 0x04 /* Can be matched with a strcmp */
219
220 #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
221 typedef zlong zrange_t;
222 #define ZRANGE_T_IS_SIGNED (1)
223 #define ZRANGE_MAX ZLONG_MAX
224 #else
225 typedef unsigned long zrange_t;
226 #define ZRANGE_MAX ULONG_MAX
227 #endif
228
229 #ifdef MULTIBYTE_SUPPORT
230 /*
231 * Handle a byte that's not part of a valid character.
232 *
233 * This range in Unicode is recommended for purposes of this
234 * kind as it corresponds to invalid characters.
235 *
236 * Note that this strictly only works if wchar_t represents
237 * Unicode code points, which isn't necessarily true; however,
238 * converting an invalid character into an unknown format is
239 * a bit tricky...
240 */
241 #define WCHAR_INVALID(ch) \
242 ((wchar_t) (0xDC00 + STOUC(ch)))
243 #endif /* MULTIBYTE_SUPPORT */
244
245 /*
246 * Array of characters corresponding to zpc_chars enum, which it must match.
247 */
248 static const char zpc_chars[ZPC_COUNT] = {
249 '/', '\0', Bar, Outpar, Tilde, Inpar, Quest, Star, Inbrack, Inang,
250 Hat, Pound, Bnullkeep, Quest, Star, '+', Bang, '!', '@'
251 };
252
253 /*
254 * Corresponding strings used in enable/disable -p.
255 * NULL means no way of turning this on or off.
256 */
257 /**/
258 mod_export const char *zpc_strings[ZPC_COUNT] = {
259 NULL, NULL, "|", NULL, "~", "(", "?", "*", "[", "<",
260 "^", "#", NULL, "?(", "*(", "+(", "!(", "\\!(", "@("
261 };
262
263 /*
264 * Corresponding array of pattern disables as set by the user
265 * using "disable -p".
266 */
267 /**/
268 mod_export char zpc_disables[ZPC_COUNT];
269
270 /*
271 * Stack of saved (compressed) zpc_disables for function scope.
272 */
273
274 static struct zpc_disables_save *zpc_disables_stack;
275
276 /*
277 * Characters which terminate a simple string (ZPC_COUNT) or
278 * an entire pattern segment (the first ZPC_SEG_COUNT).
279 * Each entry is either the corresponding character in zpc_chars
280 * or Marker which is guaranteed not to match a character in a
281 * pattern we are compiling.
282 *
283 * The complete list indicates characters that are special, so e.g.
284 * (testchar == special[ZPC_TILDE]) succeeds only if testchar is a Tilde
285 * *and* Tilde is currently special.
286 */
287
288 /**/
289 char zpc_special[ZPC_COUNT];
290
291 /* Default size for pattern buffer */
292 #define P_DEF_ALLOC 256
293
294 /* Flags used in compilation */
295 static char *patstart, *patparse; /* input pointers */
296 static int patnpar; /* () count */
297 static char *patcode; /* point of code emission */
298 static long patsize; /* size of code */
299 static char *patout; /* start of code emission string */
300 static long patalloc; /* size allocated for same */
301
302 /* Flags used in both compilation and execution */
303 static int patflags; /* flags passed down to patcompile */
304 static int patglobflags; /* globbing flags & approx */
305
306 /*
307 * Increment pointer to metafied multibyte string.
308 */
309 #ifdef MULTIBYTE_SUPPORT
310 typedef wint_t patint_t;
311
312 #define PEOF WEOF
313
314 #define METACHARINC(x) ((void)metacharinc(&x))
315
316 /*
317 * TODO: the shiftstate isn't well handled; we don't guarantee
318 * to maintain it properly between characters. If we don't
319 * need it we should use mbtowc() instead.
320 */
321 static mbstate_t shiftstate;
322
323 /*
324 * Multibyte version: it's (almost) as easy to return the
325 * value as not, so do so since we sometimes need it..
326 */
327 static wchar_t
metacharinc(char ** x)328 metacharinc(char **x)
329 {
330 char *inptr = *x;
331 char inchar;
332 size_t ret = MB_INVALID;
333 wchar_t wc;
334
335 /*
336 * Cheat if the top bit isn't set. This is second-guessing
337 * the library, but we know for sure that if the character
338 * set doesn't have the property that all bytes with the 8th
339 * bit clear are single characters then we are stuffed.
340 */
341 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*inptr) & 0x80))
342 {
343 if (itok(*inptr))
344 inchar = ztokens[*inptr++ - Pound];
345 else if (*inptr == Meta) {
346 inptr++;
347 inchar = *inptr++ ^ 32;
348 } else {
349 inchar = *inptr++;
350 }
351 *x = inptr;
352 return (wchar_t)STOUC(inchar);
353 }
354
355 while (*inptr) {
356 if (itok(*inptr))
357 inchar = ztokens[*inptr++ - Pound];
358 else if (*inptr == Meta) {
359 inptr++;
360 inchar = *inptr++ ^ 32;
361 } else {
362 inchar = *inptr++;
363 }
364 ret = mbrtowc(&wc, &inchar, 1, &shiftstate);
365
366 if (ret == MB_INVALID)
367 break;
368 if (ret == MB_INCOMPLETE)
369 continue;
370 *x = inptr;
371 return wc;
372 }
373
374 /* Error. */
375 /* Reset the shift state for next time. */
376 memset(&shiftstate, 0, sizeof(shiftstate));
377 return WCHAR_INVALID(*(*x)++);
378 }
379
380 #else
381 typedef int patint_t;
382
383 #define PEOF EOF
384
385 #define METACHARINC(x) ((void)((x) += (*(x) == Meta) ? 2 : 1))
386 #endif
387
388 /*
389 * Return unmetafied char from string (x is any char *).
390 * Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not
391 * in effect.
392 */
393 #define UNMETA(x) (*(x) == Meta ? (x)[1] ^ 32 : *(x))
394
395 /* Add n more characters, ensuring there is enough space. */
396
397 enum {
398 PA_NOALIGN = 1,
399 PA_UNMETA = 2
400 };
401
402 /**/
403 static void
patadd(char * add,int ch,long n,int paflags)404 patadd(char *add, int ch, long n, int paflags)
405 {
406 /* Make sure everything gets aligned unless we get PA_NOALIGN. */
407 long newpatsize = patsize + n;
408 if (!(paflags & PA_NOALIGN))
409 newpatsize = (newpatsize + sizeof(union upat) - 1) &
410 ~(sizeof(union upat) - 1);
411 if (patalloc < newpatsize) {
412 long newpatalloc =
413 2*(newpatsize > patalloc ? newpatsize : patalloc);
414 patout = (char *)zrealloc((char *)patout, newpatalloc);
415 patcode = patout + patsize;
416 patalloc = newpatalloc;
417 }
418 patsize = newpatsize;
419 if (add) {
420 if (paflags & PA_UNMETA) {
421 /*
422 * Unmetafy and untokenize the string as we go.
423 * The Meta characters in add aren't counted in n.
424 */
425 while (n--) {
426 if (itok(*add))
427 *patcode++ = ztokens[*add++ - Pound];
428 else if (*add == Meta) {
429 add++;
430 *patcode++ = *add++ ^ 32;
431 } else {
432 *patcode++ = *add++;
433 }
434 }
435 } else {
436 while (n--)
437 *patcode++ = *add++;
438 }
439 } else
440 *patcode++ = ch;
441 patcode = patout + patsize;
442 }
443
444 static long rn_offs;
445 /* operates on pointers to union upat, returns a pointer */
446 #define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
447 (P_OP(p) == P_BACK) ? \
448 ((p)-rn_offs) : ((p)+rn_offs) : NULL)
449
450 /*
451 * Set up zpc_special with characters that end a string segment.
452 * "Marker" cannot occur in the pattern we are compiling so
453 * is used to mark "invalid".
454 */
455 static void
patcompcharsset(void)456 patcompcharsset(void)
457 {
458 char *spp, *disp;
459 int i;
460
461 /* Initialise enabled special characters */
462 memcpy(zpc_special, zpc_chars, ZPC_COUNT);
463 /* Apply user disables from disable -p */
464 for (i = 0, spp = zpc_special, disp = zpc_disables;
465 i < ZPC_COUNT;
466 i++, spp++, disp++) {
467 if (*disp)
468 *spp = Marker;
469 }
470
471 if (!isset(EXTENDEDGLOB)) {
472 /* Extended glob characters are not active */
473 zpc_special[ZPC_TILDE] = zpc_special[ZPC_HAT] =
474 zpc_special[ZPC_HASH] = Marker;
475 }
476 if (!isset(KSHGLOB)) {
477 /*
478 * Ksh glob characters are not active.
479 * * and ? are shared with normal globbing, but for their
480 * use here we are looking for a following Inpar.
481 */
482 zpc_special[ZPC_KSH_QUEST] = zpc_special[ZPC_KSH_STAR] =
483 zpc_special[ZPC_KSH_PLUS] = zpc_special[ZPC_KSH_BANG] =
484 zpc_special[ZPC_KSH_BANG2] = zpc_special[ZPC_KSH_AT] = Marker;
485 }
486 /*
487 * Note that if we are using KSHGLOB, then we test for a following
488 * Inpar, not zpc_special[ZPC_INPAR]: the latter makes an Inpar on
489 * its own active. The zpc_special[ZPC_KSH_*] followed by any old Inpar
490 * discriminate ksh globbing.
491 */
492 if (isset(SHGLOB)) {
493 /*
494 * Grouping and numeric ranges are not valid.
495 * We do allow alternation, however; it's needed for
496 * "case". This may not be entirely consistent.
497 *
498 * Don't disable Outpar: we may need to match the end of KSHGLOB
499 * parentheses and it would be difficult to tell them apart.
500 */
501 zpc_special[ZPC_INPAR] = zpc_special[ZPC_INANG] = Marker;
502 }
503 }
504
505 /* Called before parsing a set of file matches to initialize flags */
506
507 /**/
508 void
patcompstart(void)509 patcompstart(void)
510 {
511 patcompcharsset();
512 if (isset(CASEGLOB))
513 patglobflags = 0;
514 else
515 patglobflags = GF_IGNCASE;
516 if (isset(MULTIBYTE))
517 patglobflags |= GF_MULTIBYTE;
518 }
519
520 /*
521 * Top level pattern compilation subroutine
522 * exp is a null-terminated, metafied string.
523 * inflags is an or of some PAT_* flags.
524 * endexp, if non-null, is set to a pointer to the end of the
525 * part of exp which was compiled. This is used when
526 * compiling patterns for directories which must be
527 * matched recursively.
528 */
529
530 /**/
531 mod_export Patprog
patcompile(char * exp,int inflags,char ** endexp)532 patcompile(char *exp, int inflags, char **endexp)
533 {
534 int flags = 0;
535 long len = 0;
536 long startoff;
537 Upat pscan;
538 char *lng, *strp = NULL;
539 Patprog p;
540
541 queue_signals();
542
543 startoff = sizeof(struct patprog);
544 /* Ensure alignment of start of program string */
545 startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1);
546
547 /* Allocate reasonable sized chunk if none, reduce size if too big */
548 if (patalloc != P_DEF_ALLOC)
549 patout = (char *)zrealloc(patout, patalloc = P_DEF_ALLOC);
550 patcode = patout + startoff;
551 patsize = patcode - patout;
552 patstart = patparse = exp;
553 /*
554 * Note global patnpar numbers parentheses 1..9, while patnpar
555 * in struct is actual count of parentheses.
556 */
557 patnpar = 1;
558 patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
559
560 if (!(patflags & PAT_FILE)) {
561 patcompcharsset();
562 zpc_special[ZPC_SLASH] = Marker;
563 remnulargs(patparse);
564 if (isset(MULTIBYTE))
565 patglobflags = GF_MULTIBYTE;
566 else
567 patglobflags = 0;
568 }
569 if (patflags & PAT_LCMATCHUC)
570 patglobflags |= GF_LCMATCHUC;
571 /*
572 * Have to be set now, since they get updated during compilation.
573 */
574 ((Patprog)patout)->globflags = patglobflags;
575
576 if (!(patflags & PAT_ANY)) {
577 /* Look for a really pure string, with no tokens at all. */
578 if (!(patglobflags & ~GF_MULTIBYTE)
579 #ifdef __CYGWIN__
580 /*
581 * If the OS treats files case-insensitively and we
582 * are looking at files, we don't need to use pattern
583 * matching to find the file.
584 */
585 || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE))
586 #endif
587 )
588 {
589 /*
590 * Waah! I wish I understood this.
591 * Empty metafied strings have an initial Nularg.
592 * This never corresponds to a real character in
593 * a glob pattern or string, so skip it.
594 */
595 if (*exp == Nularg)
596 exp++;
597 for (strp = exp; *strp &&
598 (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp);
599 strp++)
600 ;
601 }
602 if (!strp || (*strp && *strp != '/')) {
603 /* No, do normal compilation. */
604 strp = NULL;
605 if (patcompswitch(0, &flags) == 0) {
606 unqueue_signals();
607 return NULL;
608 }
609 } else {
610 /*
611 * Yes, copy the string, and skip compilation altogether.
612 * Null terminate for the benefit of globbing.
613 * Leave metafied both for globbing and for our own
614 * efficiency.
615 */
616 patparse = strp;
617 len = strp - exp;
618 patadd(exp, 0, len + 1, 0);
619 patout[startoff + len] = '\0';
620 patflags |= PAT_PURES;
621 }
622 }
623
624 /* end of compilation: safe to use pointers */
625 p = (Patprog)patout;
626 p->startoff = startoff;
627 p->patstartch = '\0';
628 p->globend = patglobflags;
629 p->flags = patflags;
630 p->mustoff = 0;
631 p->size = patsize;
632 p->patmlen = len;
633 p->patnpar = patnpar-1;
634
635 if (!strp) {
636 pscan = (Upat)(patout + startoff);
637
638 if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) {
639 /* only one top level choice */
640 pscan = P_OPERAND(pscan);
641
642 if (flags & P_PURESTR) {
643 /*
644 * The pattern can be matched with a simple strncmp/strcmp.
645 * Careful in case we've overwritten the node for the next ptr.
646 */
647 char *dst = patout + startoff;
648 Upat next;
649 p->flags |= PAT_PURES;
650 for (; pscan; pscan = next) {
651 next = PATNEXT(pscan);
652 if (P_OP(pscan) == P_EXACTLY) {
653 char *opnd = P_LS_STR(pscan), *mtest;
654 long oplen = P_LS_LEN(pscan), ilen;
655 int nmeta = 0;
656 /*
657 * Unfortunately we unmetafied the string
658 * and we need to put any metacharacters
659 * back now we know it's a pure string.
660 * This shouldn't happen too often, it's
661 * just that there are some cases such
662 * as . and .. in files where we really
663 * need a pure string even if there are
664 * pattern characters flying around.
665 */
666 for (mtest = opnd, ilen = oplen; ilen;
667 mtest++, ilen--)
668 if (imeta(*mtest))
669 nmeta++;
670 if (nmeta) {
671 patadd(NULL, 0, nmeta, 0);
672 p = (Patprog)patout;
673 opnd = dupstring_wlen(opnd, oplen);
674 dst = patout + startoff;
675 }
676
677 while (oplen--) {
678 if (imeta(*opnd)) {
679 *dst++ = Meta;
680 *dst++ = *opnd++ ^ 32;
681 } else {
682 *dst++ = *opnd++;
683 }
684 }
685 /* Only one string in a PAT_PURES, so now done. */
686 break;
687 }
688 }
689 p->size = dst - patout;
690 /* patmlen is really strlen. We don't need a null. */
691 p->patmlen = p->size - startoff;
692 } else {
693 /* starting point info */
694 if (P_OP(pscan) == P_EXACTLY && !p->globflags &&
695 P_LS_LEN(pscan))
696 p->patstartch = *P_LS_STR(pscan);
697 /*
698 * Find the longest literal string in something expensive.
699 * This is itself not all that cheap if we have
700 * case-insensitive matching or approximation, so don't.
701 */
702 if ((flags & P_HSTART) && !p->globflags) {
703 lng = NULL;
704 len = 0;
705 for (; pscan; pscan = PATNEXT(pscan))
706 if (P_OP(pscan) == P_EXACTLY &&
707 P_LS_LEN(pscan) >= len) {
708 lng = P_LS_STR(pscan);
709 len = P_LS_LEN(pscan);
710 }
711 if (lng) {
712 p->mustoff = lng - patout;
713 p->patmlen = len;
714 }
715 }
716 }
717 }
718 }
719
720 /*
721 * The pattern was compiled in a fixed buffer: unless told otherwise,
722 * we stick the compiled pattern on the heap. This is necessary
723 * for files where we will often be compiling multiple segments at once.
724 * But if we get the ZDUP flag we always put it in zalloc()ed memory.
725 */
726 if (patflags & PAT_ZDUP) {
727 Patprog newp = (Patprog)zalloc(patsize);
728 memcpy((char *)newp, (char *)p, patsize);
729 p = newp;
730 } else if (!(patflags & PAT_STATIC)) {
731 Patprog newp = (Patprog)zhalloc(patsize);
732 memcpy((char *)newp, (char *)p, patsize);
733 p = newp;
734 }
735
736 if (endexp)
737 *endexp = patparse;
738
739 unqueue_signals();
740 return p;
741 }
742
743 /*
744 * Main body or parenthesized subexpression in pattern
745 * Parenthesis (and any ksh_glob gubbins) will have been removed.
746 */
747
748 /**/
749 static long
patcompswitch(int paren,int * flagp)750 patcompswitch(int paren, int *flagp)
751 {
752 long starter, br, ender, excsync = 0;
753 int parno = 0;
754 int flags, gfchanged = 0;
755 long savglobflags = (long)patglobflags;
756 Upat ptr;
757
758 *flagp = 0;
759
760 if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) {
761 /*
762 * parenthesized: make an open node.
763 * We can only refer to the first nine parentheses.
764 * For any others, we just use P_OPEN on its own; there's
765 * no gain in arbitrarily limiting the number of parentheses.
766 */
767 parno = patnpar++;
768 starter = patnode(P_OPEN + parno);
769 } else
770 starter = 0;
771
772 br = patnode(P_BRANCH);
773 if (!patcompbranch(&flags, paren))
774 return 0;
775 if (patglobflags != (int)savglobflags)
776 gfchanged++;
777 if (starter)
778 pattail(starter, br);
779 else
780 starter = br;
781
782 *flagp |= flags & (P_HSTART|P_PURESTR);
783
784 while (*patparse == zpc_chars[ZPC_BAR] ||
785 (*patparse == zpc_special[ZPC_TILDE] &&
786 (patparse[1] == '/' ||
787 !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
788 int tilde = *patparse++ == zpc_special[ZPC_TILDE];
789 long gfnode = 0, newbr;
790
791 *flagp &= ~P_PURESTR;
792
793 if (tilde) {
794 union upat up;
795 /* excsync remembers the P_EXCSYNC node before a chain of
796 * exclusions: all point back to this. only the
797 * original (non-excluded) branch gets a trailing P_EXCSYNC.
798 */
799 if (!excsync) {
800 excsync = patnode(P_EXCSYNC);
801 patoptail(br, excsync);
802 }
803 /*
804 * By default, approximations are turned off in exclusions:
805 * we need to do this here as otherwise the code compiling
806 * the exclusion doesn't know if the flags have really
807 * changed if the error count gets restored.
808 */
809 patglobflags &= ~0xff;
810 if (!(patflags & PAT_FILET) || paren) {
811 br = patnode(P_EXCLUDE);
812 } else {
813 /*
814 * At top level (paren == 0) in a file glob !(patflags
815 * &PAT_FILET) do the exclusion prepending the file path
816 * so far. We need to flag this to avoid unnecessarily
817 * copying the path.
818 */
819 br = patnode(P_EXCLUDP);
820 patflags |= PAT_HAS_EXCLUDP;
821 }
822 up.p = NULL;
823 patadd((char *)&up, 0, sizeof(up), 0);
824 /* / is not treated as special if we are at top level */
825 if (!paren && zpc_special[ZPC_SLASH] == '/') {
826 tilde++;
827 zpc_special[ZPC_SLASH] = Marker;
828 }
829 } else {
830 excsync = 0;
831 br = patnode(P_BRANCH);
832 /*
833 * The position of the following statements means globflags
834 * set in the main branch carry over to the exclusion.
835 */
836 if (!paren) {
837 patglobflags = 0;
838 if (((Patprog)patout)->globflags) {
839 /*
840 * If at top level, we need to reinitialize flags to zero,
841 * since (#i)foo|bar only applies to foo and we stuck
842 * the #i into the global flags.
843 * We could have done it so that they only got set in the
844 * first branch, but it's quite convenient having any
845 * global flags set in the header and not buried in the
846 * pattern. (Or maybe it isn't and we should
847 * forget this bit and always stick in an explicit GFLAGS
848 * statement instead of using the header.)
849 * Also, this can't happen for file globs where there are
850 * no top-level |'s.
851 *
852 * No gfchanged, as nothing to follow branch at top
853 * level.
854 */
855 union upat up;
856 gfnode = patnode(P_GFLAGS);
857 up.l = patglobflags;
858 patadd((char *)&up, 0, sizeof(union upat), 0);
859 }
860 } else {
861 patglobflags = (int)savglobflags;
862 }
863 }
864 newbr = patcompbranch(&flags, paren);
865 if (tilde == 2) {
866 /* restore special treatment of / */
867 zpc_special[ZPC_SLASH] = '/';
868 }
869 if (!newbr)
870 return 0;
871 if (gfnode)
872 pattail(gfnode, newbr);
873 if (!tilde && patglobflags != (int)savglobflags)
874 gfchanged++;
875 pattail(starter, br);
876 if (excsync)
877 patoptail(br, patnode(P_EXCEND));
878 *flagp |= flags & P_HSTART;
879 }
880
881 /*
882 * Make a closing node, hooking it to the end.
883 * Note that we can't optimize P_NOTHING out here, since another
884 * branch at that point would indicate the current choices continue,
885 * which they don't.
886 */
887 ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END);
888 pattail(starter, ender);
889
890 /*
891 * Hook the tails of the branches to the closing node,
892 * except for exclusions which terminate where they are.
893 */
894 for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr))
895 if (!P_ISEXCLUDE(ptr))
896 patoptail(ptr-(Upat)patout, ender);
897
898 /* check for proper termination */
899 if ((paren && *patparse++ != Outpar) ||
900 (!paren && *patparse &&
901 !((patflags & PAT_FILE) && *patparse == '/')))
902 return 0;
903
904 if (paren && gfchanged) {
905 /*
906 * Restore old values of flags when leaving parentheses.
907 * gfchanged detects a change in any branch (except exclusions
908 * which are separate), since we need to emit this even if
909 * a later branch happened to put the flags back.
910 */
911 pattail(ender, patnode(P_GFLAGS));
912 patglobflags = (int)savglobflags;
913 patadd((char *)&savglobflags, 0, sizeof(long), 0);
914 }
915
916 return starter;
917 }
918
919 /*
920 * Compile something ended by Bar, Outpar, Tilde, or end of string.
921 * Note the BRANCH or EXCLUDE tag must already have been omitted:
922 * this returns the position of the operand of that.
923 */
924
925 /**/
926 static long
patcompbranch(int * flagp,int paren)927 patcompbranch(int *flagp, int paren)
928 {
929 long chain, latest = 0, starter;
930 int flags = 0;
931
932 *flagp = P_PURESTR;
933
934 starter = chain = 0;
935 while (!memchr(zpc_special, *patparse, ZPC_SEG_COUNT) ||
936 (*patparse == zpc_special[ZPC_TILDE] && patparse[1] != '/' &&
937 memchr(zpc_special, patparse[1], ZPC_SEG_COUNT))) {
938 if ((*patparse == zpc_special[ZPC_INPAR] &&
939 patparse[1] == zpc_special[ZPC_HASH]) ||
940 (*patparse == zpc_special[ZPC_KSH_AT] && patparse[1] == Inpar &&
941 patparse[2] == zpc_special[ZPC_HASH])) {
942 /* Globbing flags. */
943 char *pp1 = patparse;
944 int oldglobflags = patglobflags, ignore;
945 long assert;
946 patparse += (*patparse == '@') ? 3 : 2;
947 if (!patgetglobflags(&patparse, &assert, &ignore))
948 return 0;
949 if (!ignore) {
950 if (assert) {
951 /*
952 * Start/end assertion looking like flags, but
953 * actually handled as a normal node
954 */
955 latest = patnode(assert);
956 flags = 0;
957 } else {
958 if (pp1 == patstart) {
959 /* Right at start of pattern, the simplest case.
960 * Put them into the flags and don't emit anything.
961 */
962 ((Patprog)patout)->globflags = patglobflags;
963 continue;
964 } else if (!*patparse) {
965 /* Right at the end, so just leave the flags for
966 * the next Patprog in the chain to pick up.
967 */
968 break;
969 }
970 /*
971 * Otherwise, we have to stick them in as a pattern
972 * matching nothing.
973 */
974 if (oldglobflags != patglobflags) {
975 /* Flags changed */
976 union upat up;
977 latest = patnode(P_GFLAGS);
978 up.l = patglobflags;
979 patadd((char *)&up, 0, sizeof(union upat), 0);
980 } else {
981 /* No effect. */
982 continue;
983 }
984 }
985 } else if (!*patparse)
986 break;
987 else
988 continue;
989 } else if (*patparse == zpc_special[ZPC_HAT]) {
990 /*
991 * ^pat: anything but pat. For proper backtracking,
992 * etc., we turn this into (*~pat), except without the
993 * parentheses.
994 */
995 patparse++;
996 latest = patcompnot(0, &flags);
997 } else
998 latest = patcomppiece(&flags, paren);
999 if (!latest)
1000 return 0;
1001 if (!starter)
1002 starter = latest;
1003 if (!(flags & P_PURESTR))
1004 *flagp &= ~P_PURESTR;
1005 if (!chain)
1006 *flagp |= flags & P_HSTART;
1007 else
1008 pattail(chain, latest);
1009 chain = latest;
1010 }
1011 /* check if there was nothing in the loop, i.e. () */
1012 if (!chain)
1013 starter = patnode(P_NOTHING);
1014
1015 return starter;
1016 }
1017
1018 /* get glob flags, return 1 for success, 0 for failure */
1019
1020 /**/
1021 int
patgetglobflags(char ** strp,long * assertp,int * ignore)1022 patgetglobflags(char **strp, long *assertp, int *ignore)
1023 {
1024 char *nptr, *ptr = *strp;
1025 zlong ret;
1026
1027 *assertp = 0;
1028 *ignore = 1;
1029 /* (#X): assumes we are still positioned on the first X */
1030 for (; *ptr && *ptr != Outpar; ptr++) {
1031 if (*ptr == 'q') {
1032 /* Glob qualifiers, ignored in pattern code */
1033 while (*ptr && *ptr != Outpar)
1034 ptr++;
1035 break;
1036 } else {
1037 *ignore = 0;
1038 switch (*ptr) {
1039 case 'a':
1040 /* Approximate matching, max no. of errors follows */
1041 ret = zstrtol(++ptr, &nptr, 10);
1042 /*
1043 * We can't have more than 254, because we need 255 to
1044 * mark 254 errors in wbranch and exclude sync strings
1045 * (hypothetically --- hope no-one tries it).
1046 */
1047 if (ret < 0 || ret > 254 || ptr == nptr)
1048 return 0;
1049 patglobflags = (patglobflags & ~0xff) | (ret & 0xff);
1050 ptr = nptr-1;
1051 break;
1052
1053 case 'l':
1054 /* Lowercase in pattern matches lower or upper in target */
1055 patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC;
1056 break;
1057
1058 case 'i':
1059 /* Fully case insensitive */
1060 patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE;
1061 break;
1062
1063 case 'I':
1064 /* Restore case sensitivity */
1065 patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE);
1066 break;
1067
1068 case 'b':
1069 /* Make backreferences */
1070 patglobflags |= GF_BACKREF;
1071 break;
1072
1073 case 'B':
1074 /* Don't make backreferences */
1075 patglobflags &= ~GF_BACKREF;
1076 break;
1077
1078 case 'm':
1079 /* Make references to complete match */
1080 patglobflags |= GF_MATCHREF;
1081 break;
1082
1083 case 'M':
1084 /* Don't */
1085 patglobflags &= ~GF_MATCHREF;
1086 break;
1087
1088 case 's':
1089 *assertp = P_ISSTART;
1090 break;
1091
1092 case 'e':
1093 *assertp = P_ISEND;
1094 break;
1095
1096 case 'u':
1097 patglobflags |= GF_MULTIBYTE;
1098 break;
1099
1100 case 'U':
1101 patglobflags &= ~GF_MULTIBYTE;
1102 break;
1103
1104 default:
1105 return 0;
1106 }
1107 }
1108 }
1109 if (*ptr != Outpar)
1110 return 0;
1111 /* Start/end assertions must appear on their own. */
1112 if (*assertp && (*strp)[1] != Outpar)
1113 return 0;
1114 *strp = ptr + 1;
1115 return 1;
1116 }
1117
1118
1119 static const char *colon_stuffs[] = {
1120 "alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph",
1121 "lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
1122 "IFS", "IFSSPACE", "WORD", "INCOMPLETE", "INVALID", NULL
1123 };
1124
1125 /*
1126 * Handle the guts of a [:stuff:] character class element.
1127 * start is the beginning of "stuff" and len is its length.
1128 * This code is exported for the benefit of completion matching.
1129 */
1130
1131 /**/
1132 mod_export int
range_type(char * start,int len)1133 range_type(char *start, int len)
1134 {
1135 const char **csp;
1136
1137 for (csp = colon_stuffs; *csp; csp++) {
1138 if (strlen(*csp) == len && !strncmp(start, *csp, len))
1139 return (csp - colon_stuffs) + PP_FIRST;
1140 }
1141
1142 return PP_UNKWN;
1143 }
1144
1145
1146 /*
1147 * Convert the contents of a [...] or [^...] expression (just the
1148 * ... part) back into a string. This is used by compfiles -p/-P
1149 * for some reason. The compiled form (a metafied string) is
1150 * passed in rangestr.
1151 *
1152 * If outstr is non-NULL the compiled form is placed there. It
1153 * must be sufficiently long. A terminating NULL is appended.
1154 *
1155 * Return the length required, not including the terminating NULL.
1156 *
1157 * TODO: this is non-multibyte for now. It will need to be defined
1158 * appropriately with MULTIBYTE_SUPPORT when the completion matching
1159 * code catches up.
1160 */
1161
1162 /**/
1163 mod_export int
pattern_range_to_string(char * rangestr,char * outstr)1164 pattern_range_to_string(char *rangestr, char *outstr)
1165 {
1166 int len = 0;
1167
1168 while (*rangestr) {
1169 if (imeta(STOUC(*rangestr))) {
1170 int swtype = STOUC(*rangestr) - STOUC(Meta);
1171
1172 if (swtype == 0) {
1173 /* Ordindary metafied character */
1174 if (outstr)
1175 {
1176 *outstr++ = Meta;
1177 *outstr++ = rangestr[1] ^ 32;
1178 }
1179 len += 2;
1180 rangestr += 2;
1181 } else if (swtype == PP_RANGE) {
1182 /* X-Y range */
1183 int i;
1184
1185 for (i = 0; i < 2; i++) {
1186 if (*rangestr == Meta) {
1187 if (outstr) {
1188 *outstr++ = Meta;
1189 *outstr++ = rangestr[1];
1190 }
1191 len += 2;
1192 rangestr += 2;
1193 } else {
1194 if (outstr)
1195 *outstr++ = *rangestr;
1196 len++;
1197 rangestr++;
1198 }
1199
1200 if (i == 0) {
1201 if (outstr)
1202 *outstr++ = '-';
1203 len++;
1204 }
1205 }
1206 } else if (swtype >= PP_FIRST && swtype <= PP_LAST) {
1207 /* [:stuff:]; we need to output [: and :] */
1208 const char *found = colon_stuffs[swtype - PP_FIRST];
1209 int newlen = strlen(found);
1210 if (outstr) {
1211 strcpy(outstr, "[:");
1212 outstr += 2;
1213 memcpy(outstr, found, newlen);
1214 outstr += newlen;
1215 strcpy(outstr, ":]");
1216 outstr += 2;
1217 }
1218 len += newlen + 4;
1219 rangestr++;
1220 } else {
1221 /* shouldn't happen */
1222 DPUTS(1, "BUG: unknown PP_ code in pattern range");
1223 rangestr++;
1224 }
1225 } else {
1226 /* ordinary character, guaranteed no Meta handling needed */
1227 if (outstr)
1228 *outstr++ = *rangestr;
1229 len++;
1230 rangestr++;
1231 }
1232 }
1233
1234 if (outstr)
1235 *outstr = '\0';
1236 return len;
1237 }
1238
1239 /*
1240 * compile a chunk such as a literal string or a [...] followed
1241 * by a possible hash operator
1242 */
1243
1244 /**/
1245 static long
patcomppiece(int * flagp,int paren)1246 patcomppiece(int *flagp, int paren)
1247 {
1248 long starter = 0, next, op, opnd;
1249 int flags, flags2, kshchar, len, ch, patch, nmeta;
1250 int hash, count;
1251 union upat up;
1252 char *nptr, *str0, *ptr, *patprev;
1253 zrange_t from, to;
1254 char *charstart;
1255
1256 flags = 0;
1257 str0 = patprev = patparse;
1258 for (;;) {
1259 /*
1260 * Check if we have a string. First, we need to make sure
1261 * the string doesn't introduce a ksh-like parenthesized expression.
1262 */
1263 kshchar = '\0';
1264 if (*patparse && patparse[1] == Inpar) {
1265 if (*patparse == zpc_special[ZPC_KSH_PLUS])
1266 kshchar = STOUC('+');
1267 else if (*patparse == zpc_special[ZPC_KSH_BANG])
1268 kshchar = STOUC('!');
1269 else if (*patparse == zpc_special[ZPC_KSH_BANG2])
1270 kshchar = STOUC('!');
1271 else if (*patparse == zpc_special[ZPC_KSH_AT])
1272 kshchar = STOUC('@');
1273 else if (*patparse == zpc_special[ZPC_KSH_STAR])
1274 kshchar = STOUC('*');
1275 else if (*patparse == zpc_special[ZPC_KSH_QUEST])
1276 kshchar = STOUC('?');
1277 }
1278
1279 /*
1280 * If '(' is disabled as a pattern char, allow ')' as
1281 * an ordinary string character if there are no parentheses to
1282 * close. Don't allow it otherwise, it changes the syntax.
1283 */
1284 if (zpc_special[ZPC_INPAR] != Marker || *patparse != Outpar ||
1285 paren) {
1286 /*
1287 * End of string (or no string at all) if ksh-type parentheses,
1288 * or special character, unless that character is a tilde and
1289 * the character following is an end-of-segment character. Thus
1290 * tildes are not special if there is nothing following to
1291 * be excluded.
1292 *
1293 * Don't look for X()-style kshglobs at this point; we've
1294 * checked above for the case with parentheses and we don't
1295 * want to match without parentheses.
1296 */
1297 if (kshchar ||
1298 (memchr(zpc_special, *patparse, ZPC_NO_KSH_GLOB) &&
1299 (*patparse != zpc_special[ZPC_TILDE] ||
1300 patparse[1] == '/' ||
1301 !memchr(zpc_special, patparse[1], ZPC_SEG_COUNT)))) {
1302 break;
1303 }
1304 }
1305
1306 /* Remember the previous character for backtracking */
1307 patprev = patparse;
1308 METACHARINC(patparse);
1309 }
1310
1311 if (patparse > str0) {
1312 long slen = patparse - str0;
1313 int morelen;
1314
1315 /* Ordinary string: cancel kshchar lookahead */
1316 kshchar = '\0';
1317 /*
1318 * Assume it matches a simple string until we find otherwise.
1319 */
1320 flags |= P_PURESTR;
1321 DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
1322 /* more than one character matched? */
1323 morelen = (patprev > str0);
1324 /*
1325 * If we have more than one character, a following hash
1326 * or (#c...) only applies to the last, so backtrack one character.
1327 */
1328 if ((*patparse == zpc_special[ZPC_HASH] ||
1329 (*patparse == zpc_special[ZPC_INPAR] &&
1330 patparse[1] == zpc_special[ZPC_HASH] &&
1331 patparse[2] == 'c') ||
1332 (*patparse == zpc_special[ZPC_KSH_AT] &&
1333 patparse[1] == Inpar &&
1334 patparse[2] == zpc_special[ZPC_HASH] &&
1335 patparse[3] == 'c')) && morelen)
1336 patparse = patprev;
1337 /*
1338 * If len is 1, we can't have an active # following, so doesn't
1339 * matter that we don't make X in `XX#' simple.
1340 */
1341 if (!morelen)
1342 flags |= P_SIMPLE;
1343 starter = patnode(P_EXACTLY);
1344
1345 /* Get length of string without metafication. */
1346 nmeta = 0;
1347 /* inherited from domatch, but why, exactly? */
1348 if (*str0 == Nularg)
1349 str0++;
1350 for (ptr = str0; ptr < patparse; ptr++) {
1351 if (*ptr == Meta) {
1352 nmeta++;
1353 ptr++;
1354 }
1355 }
1356 slen = (patparse - str0) - nmeta;
1357 /* First add length, which is a long */
1358 patadd((char *)&slen, 0, sizeof(long), 0);
1359 /*
1360 * Then the string, not null terminated.
1361 * Unmetafy and untokenize; pass the final length,
1362 * which is what we need to allocate, i.e. not including
1363 * a count for each Meta in the string.
1364 */
1365 patadd(str0, 0, slen, PA_UNMETA);
1366 nptr = P_LS_STR((Upat)patout + starter);
1367 /*
1368 * It's much simpler to turn off pure string mode for
1369 * any case-insensitive or approximate matching; usually,
1370 * that is correct, or they wouldn't have been turned on.
1371 * However, we need to make sure we match a "." or ".."
1372 * in a file name as a pure string. There's a minor bug
1373 * that this will also apply to something like
1374 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
1375 * going to write funny patterns, you get no sympathy from me.
1376 */
1377 if (patglobflags &
1378 #ifdef __CYGWIN__
1379 /*
1380 * As above: don't use pattern matching for files
1381 * just because of case insensitivity if file system
1382 * is known to be case insensitive.
1383 *
1384 * This is known to be necessary in at least one case:
1385 * if "mount -c /" is in effect, so that drives appear
1386 * directly under / instead of the usual /cygdrive, they
1387 * aren't shown by readdir(). So it's vital we don't use
1388 * globbing to find "/c", since that'll fail.
1389 */
1390 ((patflags & PAT_FILE) ?
1391 (0xFF|GF_LCMATCHUC) :
1392 (0xFF|GF_LCMATCHUC|GF_IGNCASE))
1393 #else
1394 (0xFF|GF_LCMATCHUC|GF_IGNCASE)
1395 #endif
1396 ) {
1397 if (!(patflags & PAT_FILE))
1398 flags &= ~P_PURESTR;
1399 else if (!(nptr[0] == '.' &&
1400 (slen == 1 || (nptr[1] == '.' && slen == 2))))
1401 flags &= ~P_PURESTR;
1402 }
1403 } else {
1404 if (kshchar)
1405 patparse++;
1406
1407 patch = *patparse;
1408 METACHARINC(patparse);
1409 switch(patch) {
1410 case Quest:
1411 DPUTS(zpc_special[ZPC_QUEST] == Marker,
1412 "Treating '?' as pattern character although disabled");
1413 flags |= P_SIMPLE;
1414 starter = patnode(P_ANY);
1415 break;
1416 case Star:
1417 DPUTS(zpc_special[ZPC_STAR] == Marker,
1418 "Treating '*' as pattern character although disabled");
1419 /* kshchar is used as a sign that we can't have #'s. */
1420 kshchar = -1;
1421 starter = patnode(P_STAR);
1422 break;
1423 case Inbrack:
1424 DPUTS(zpc_special[ZPC_INBRACK] == Marker,
1425 "Treating '[' as pattern character although disabled");
1426 flags |= P_SIMPLE;
1427 if (*patparse == Hat || *patparse == Bang) {
1428 patparse++;
1429 starter = patnode(P_ANYBUT);
1430 } else
1431 starter = patnode(P_ANYOF);
1432 /*
1433 * []...] means match a "]" or other included characters.
1434 * However, to be a bit helpful and for compatibility
1435 * with other shells, don't take in that sense if
1436 * there's no further "]". That's still imperfect,
1437 * but it's all we can do --- we're required to
1438 * treat [$var]*[$var]with empty var as [ ... ]
1439 * containing "]*[".
1440 */
1441 if (*patparse == Outbrack && strchr(patparse+1, Outbrack)) {
1442 patparse++;
1443 patadd(NULL, ']', 1, PA_NOALIGN);
1444 }
1445 while (*patparse && *patparse != Outbrack) {
1446 /* Meta is not a token */
1447 if (*patparse == Inbrack && patparse[1] == ':' &&
1448 (nptr = strchr(patparse+2, ':')) &&
1449 nptr[1] == Outbrack) {
1450 /* Posix range. */
1451 patparse += 2;
1452 len = nptr - patparse;
1453 ch = range_type(patparse, len);
1454 patparse = nptr + 2;
1455 if (ch != PP_UNKWN)
1456 patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN);
1457 continue;
1458 }
1459 charstart = patparse;
1460 METACHARINC(patparse);
1461
1462 if (*patparse == Dash && patparse[1] &&
1463 patparse[1] != Outbrack) {
1464 patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN);
1465 if (itok(*charstart)) {
1466 patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
1467 PA_NOALIGN);
1468 } else {
1469 patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
1470 }
1471 charstart = ++patparse; /* skip Dash token */
1472 METACHARINC(patparse);
1473 }
1474 if (itok(*charstart)) {
1475 patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
1476 PA_NOALIGN);
1477 } else {
1478 patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
1479 }
1480 }
1481 if (*patparse != Outbrack)
1482 return 0;
1483 patparse++;
1484 /* terminate null string and fix alignment */
1485 patadd(NULL, 0, 1, 0);
1486 break;
1487 case Inpar:
1488 DPUTS(!kshchar && zpc_special[ZPC_INPAR] == Marker,
1489 "Treating '(' as pattern character although disabled");
1490 DPUTS(isset(SHGLOB) && !kshchar,
1491 "Treating bare '(' as pattern character with SHGLOB");
1492 if (kshchar == '!') {
1493 /* This is nasty, we should really either handle all
1494 * kshglobbing below or here. But most of the
1495 * others look like non-ksh patterns, while this one
1496 * doesn't, so we handle it here and leave the rest.
1497 * We treat it like an extendedglob ^, except that
1498 * it goes into parentheses.
1499 *
1500 * If we did do kshglob here, we could support
1501 * the old behaviour that things like !(foo)##
1502 * work, but it makes the code more complicated at
1503 * the expense of allowing the user to do things
1504 * they shouldn't.
1505 */
1506 if (!(starter = patcompnot(1, &flags2)))
1507 return 0;
1508 } else if (!(starter = patcompswitch(1, &flags2)))
1509 return 0;
1510 flags |= flags2 & P_HSTART;
1511 break;
1512 case Inang:
1513 /* Numeric glob */
1514 DPUTS(zpc_special[ZPC_INANG] == Marker,
1515 "Treating '<' as pattern character although disabled");
1516 DPUTS(isset(SHGLOB), "Treating <..> as numeric range with SHGLOB");
1517 len = 0; /* beginning present 1, end present 2 */
1518 if (idigit(*patparse)) {
1519 from = (zrange_t) zstrtol((char *)patparse,
1520 (char **)&nptr, 10);
1521 patparse = nptr;
1522 len |= 1;
1523 }
1524 DPUTS(!IS_DASH(*patparse), "BUG: - missing from numeric glob");
1525 patparse++;
1526 if (idigit(*patparse)) {
1527 to = (zrange_t) zstrtol((char *)patparse,
1528 (char **)&nptr, 10);
1529 patparse = nptr;
1530 len |= 2;
1531 }
1532 if (*patparse != Outang)
1533 return 0;
1534 patparse++;
1535 switch(len) {
1536 case 3:
1537 starter = patnode(P_NUMRNG);
1538 patadd((char *)&from, 0, sizeof(from), 0);
1539 patadd((char *)&to, 0, sizeof(to), 0);
1540 break;
1541 case 2:
1542 starter = patnode(P_NUMTO);
1543 patadd((char *)&to, 0, sizeof(to), 0);
1544 break;
1545 case 1:
1546 starter = patnode(P_NUMFROM);
1547 patadd((char *)&from, 0, sizeof(from), 0);
1548 break;
1549 case 0:
1550 starter = patnode(P_NUMANY);
1551 break;
1552 }
1553 /* This can't be simple, because it isn't.
1554 * Mention in manual that matching digits with [...]
1555 * is more efficient.
1556 */
1557 break;
1558 case Pound:
1559 DPUTS(zpc_special[ZPC_HASH] == Marker,
1560 "Treating '#' as pattern character although disabled");
1561 DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
1562 /*
1563 * A hash here is an error; it should follow something
1564 * repeatable.
1565 */
1566 return 0;
1567 break;
1568 case Bnullkeep:
1569 /*
1570 * Marker for restoring a backslash in output:
1571 * does not match a character.
1572 */
1573 next = patcomppiece(flagp, paren);
1574 /*
1575 * Can't match a pure string since we need to do this
1576 * as multiple chunks.
1577 */
1578 *flagp &= ~P_PURESTR;
1579 return next;
1580 break;
1581 #ifdef DEBUG
1582 default:
1583 dputs("BUG: character not handled in patcomppiece");
1584 return 0;
1585 break;
1586 #endif
1587 }
1588 }
1589
1590 count = 0;
1591 if (!(hash = (*patparse == zpc_special[ZPC_HASH])) &&
1592 !(count = ((*patparse == zpc_special[ZPC_INPAR] &&
1593 patparse[1] == zpc_special[ZPC_HASH] &&
1594 patparse[2] == 'c') ||
1595 (*patparse == zpc_special[ZPC_KSH_AT] &&
1596 patparse[1] == Inpar &&
1597 patparse[2] == zpc_special[ZPC_HASH] &&
1598 patparse[3] == 'c'))) &&
1599 (kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
1600 *flagp = flags;
1601 return starter;
1602 }
1603
1604 /* too much at once doesn't currently work */
1605 if (kshchar && (hash || count))
1606 return 0;
1607
1608 if (kshchar == '*') {
1609 op = P_ONEHASH;
1610 *flagp = P_HSTART;
1611 } else if (kshchar == '+') {
1612 op = P_TWOHASH;
1613 *flagp = P_HSTART;
1614 } else if (kshchar == '?') {
1615 op = 0;
1616 *flagp = 0;
1617 } else if (count) {
1618 op = P_COUNT;
1619 patparse += 3;
1620 *flagp = P_HSTART;
1621 } else if (*++patparse == zpc_special[ZPC_HASH]) {
1622 op = P_TWOHASH;
1623 patparse++;
1624 *flagp = P_HSTART;
1625 } else {
1626 op = P_ONEHASH;
1627 *flagp = P_HSTART;
1628 }
1629
1630 /*
1631 * Note optimizations with pointers into P_NOTHING branches: some
1632 * should logically point to next node after current piece.
1633 *
1634 * Backtracking is also encoded in a slightly obscure way: the
1635 * code emitted ensures we test the non-empty branch of complex
1636 * patterns before the empty branch on each repetition. Hence
1637 * each time we fail on a non-empty branch, we try the empty branch,
1638 * which is equivalent to backtracking.
1639 */
1640 if (op == P_COUNT) {
1641 /* (#cN,M) */
1642 union upat countargs[P_CT_OPERAND];
1643 char *opp = patparse;
1644
1645 countargs[0].l = P_COUNT;
1646 countargs[P_CT_CURRENT].l = 0L;
1647 countargs[P_CT_MIN].l = (long)zstrtol(patparse, &patparse, 10);
1648 if (patparse == opp) {
1649 /* missing number treated as zero */
1650 countargs[P_CT_MIN].l = 0L;
1651 }
1652 if (*patparse != ',' && *patparse != Comma) {
1653 /* either max = min or error */
1654 if (*patparse != Outpar)
1655 return 0;
1656 countargs[P_CT_MAX].l = countargs[P_CT_MIN].l;
1657 } else {
1658 opp = ++patparse;
1659 countargs[P_CT_MAX].l = (long)zstrtol(patparse, &patparse, 10);
1660 if (*patparse != Outpar)
1661 return 0;
1662 if (patparse == opp) {
1663 /* missing number treated as infinity: record as -1 */
1664 countargs[P_CT_MAX].l = -1L;
1665 }
1666 }
1667 patparse++;
1668 countargs[P_CT_PTR].p = NULL;
1669 /* Mark this chain as a min/max count... */
1670 patinsert(P_COUNTSTART, starter, (char *)countargs, sizeof(countargs));
1671 /*
1672 * The next of the operand is a loop back to the P_COUNT. This is
1673 * how we get recursion for the count. We don't loop back to
1674 * the P_COUNTSTART; that's used for initialising the count
1675 * and saving and restoring the count for any enclosing use
1676 * of the match.
1677 */
1678 opnd = P_OPERAND(starter) + P_CT_OPERAND;
1679 pattail(opnd, patnode(P_BACK));
1680 pattail(opnd, P_OPERAND(starter));
1681 /*
1682 * The next of the counter operators is what follows the
1683 * closure.
1684 * This handles matching of the tail.
1685 */
1686 next = patnode(P_NOTHING);
1687 pattail(starter, next);
1688 pattail(P_OPERAND(starter), next);
1689 } else if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) &&
1690 P_OP((Upat)patout+starter) == P_ANY) {
1691 /* Optimize ?# to *. Silly thing to do, since who would use
1692 * use ?# ? But it makes the later code shorter.
1693 */
1694 Upat uptr = (Upat)patout + starter;
1695 if (op == P_TWOHASH) {
1696 /* ?## becomes ?* */
1697 uptr->l = (uptr->l & ~0xff) | P_ANY;
1698 pattail(starter, patnode(P_STAR));
1699 } else {
1700 uptr->l = (uptr->l & ~0xff) | P_STAR;
1701 }
1702 } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
1703 /* Simplify, but not if we need to look for approximations. */
1704 patinsert(op, starter, NULL, 0);
1705 } else if (op == P_ONEHASH) {
1706 /* Emit x# as (x&|), where & means "self". */
1707 up.p = NULL;
1708 patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up));
1709 /* Either x */
1710 patoptail(starter, patnode(P_BACK)); /* and loop */
1711 patoptail(starter, starter); /* back */
1712 pattail(starter, patnode(P_BRANCH)); /* or */
1713 pattail(starter, patnode(P_NOTHING)); /* null. */
1714 } else if (op == P_TWOHASH) {
1715 /* Emit x## as x(&|) where & means "self". */
1716 next = patnode(P_WBRANCH); /* Either */
1717 up.p = NULL;
1718 patadd((char *)&up, 0, sizeof(up), 0);
1719 pattail(starter, next);
1720 pattail(patnode(P_BACK), starter); /* loop back */
1721 pattail(next, patnode(P_BRANCH)); /* or */
1722 pattail(starter, patnode(P_NOTHING)); /* null. */
1723 } else if (kshchar == '?') {
1724 /* Emit ?(x) as (x|) */
1725 patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
1726 pattail(starter, patnode(P_BRANCH)); /* or */
1727 next = patnode(P_NOTHING); /* null */
1728 pattail(starter, next);
1729 patoptail(starter, next);
1730 }
1731 if (*patparse == zpc_special[ZPC_HASH])
1732 return 0;
1733
1734 return starter;
1735 }
1736
1737 /*
1738 * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with
1739 * parentheses if necessary. As you see, that's really quite easy.
1740 */
1741
1742 /**/
1743 static long
patcompnot(int paren,int * flagsp)1744 patcompnot(int paren, int *flagsp)
1745 {
1746 union upat up;
1747 long excsync, br, excl, n, starter;
1748 int dummy;
1749
1750 /* Here, we're matching a star at the start. */
1751 *flagsp = P_HSTART;
1752
1753 starter = patnode(P_BRANCH);
1754 br = patnode(P_STAR);
1755 excsync = patnode(P_EXCSYNC);
1756 pattail(br, excsync);
1757 pattail(starter, excl = patnode(P_EXCLUDE));
1758 up.p = NULL;
1759 patadd((char *)&up, 0, sizeof(up), 0);
1760 if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy, 0))))
1761 return 0;
1762 pattail(br, patnode(P_EXCEND));
1763 n = patnode(P_NOTHING); /* just so much easier */
1764 pattail(excsync, n);
1765 pattail(excl, n);
1766
1767 return starter;
1768 }
1769
1770 /* Emit a node */
1771
1772 /**/
1773 static long
patnode(long op)1774 patnode(long op)
1775 {
1776 long starter = (Upat)patcode - (Upat)patout;
1777 union upat up;
1778
1779 up.l = op;
1780 patadd((char *)&up, 0, sizeof(union upat), 0);
1781 return starter;
1782 }
1783
1784 /*
1785 * insert an operator in front of an already emitted operand:
1786 * we relocate the operand. there had better be nothing else after.
1787 */
1788
1789 /**/
1790 static void
patinsert(long op,int opnd,char * xtra,int sz)1791 patinsert(long op, int opnd, char *xtra, int sz)
1792 {
1793 char *src, *dst, *opdst;
1794 union upat buf, *lptr;
1795
1796 buf.l = 0;
1797 patadd((char *)&buf, 0, sizeof(buf), 0);
1798 if (sz)
1799 patadd(xtra, 0, sz, 0);
1800 src = patcode - sizeof(union upat) - sz;
1801 dst = patcode;
1802 opdst = patout + opnd * sizeof(union upat);
1803 while (src > opdst)
1804 *--dst = *--src;
1805
1806 /* A cast can't be an lvalue */
1807 lptr = (Upat)opdst;
1808 lptr->l = op;
1809 opdst += sizeof(union upat);
1810 while (sz--)
1811 *opdst++ = *xtra++;
1812 }
1813
1814 /* set the 'next' pointer at the end of a node chain */
1815
1816 /**/
1817 static void
pattail(long p,long val)1818 pattail(long p, long val)
1819 {
1820 Upat scan, temp;
1821 long offset;
1822
1823 scan = (Upat)patout + p;
1824 for (;;) {
1825 if (!(temp = PATNEXT(scan)))
1826 break;
1827 scan = temp;
1828 }
1829
1830 offset = (P_OP(scan) == P_BACK)
1831 ? (scan - (Upat)patout) - val : val - (scan - (Upat)patout);
1832
1833 scan->l |= offset << 8;
1834 }
1835
1836 /* do pattail, but on operand of first argument; nop if operandless */
1837
1838 /**/
1839 static void
patoptail(long p,long val)1840 patoptail(long p, long val)
1841 {
1842 Upat ptr = (Upat)patout + p;
1843 int op = P_OP(ptr);
1844 if (!p || !P_ISBRANCH(ptr))
1845 return;
1846 if (op == P_BRANCH)
1847 pattail(P_OPERAND(p), val);
1848 else
1849 pattail(P_OPERAND(p) + 1, val);
1850 }
1851
1852
1853 /*
1854 * Run a pattern.
1855 */
1856 struct rpat {
1857 char *patinstart; /* Start of input string */
1858 char *patinend; /* End of input string */
1859 char *patinput; /* String input pointer */
1860 char *patinpath; /* Full path for use with ~ exclusions */
1861 int patinlen; /* Length of last successful match.
1862 * Includes count of Meta characters.
1863 */
1864
1865 char *patbeginp[NSUBEXP]; /* Pointer to backref beginnings */
1866 char *patendp[NSUBEXP]; /* Pointer to backref ends */
1867 int parsfound; /* parentheses (with backrefs) found */
1868
1869 int globdots; /* Glob initial dots? */
1870 };
1871
1872 static struct rpat pattrystate;
1873
1874 #define patinstart (pattrystate.patinstart)
1875 #define patinend (pattrystate.patinend)
1876 #define patinput (pattrystate.patinput)
1877 #define patinpath (pattrystate.patinpath)
1878 #define patinlen (pattrystate.patinlen)
1879 #define patbeginp (pattrystate.patbeginp)
1880 #define patendp (pattrystate.patendp)
1881 #define parsfound (pattrystate.parsfound)
1882 #define globdots (pattrystate.globdots)
1883
1884
1885 /*
1886 * Character functions operating on unmetafied strings.
1887 */
1888 #ifdef MULTIBYTE_SUPPORT
1889
1890 /* Get a character from the start point in a string */
1891 #define CHARREF(x, y) charref((x), (y), (int *)NULL)
1892 static wchar_t
charref(char * x,char * y,int * zmb_ind)1893 charref(char *x, char *y, int *zmb_ind)
1894 {
1895 wchar_t wc;
1896 size_t ret;
1897
1898 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
1899 return (wchar_t) STOUC(*x);
1900
1901 ret = mbrtowc(&wc, x, y-x, &shiftstate);
1902
1903 if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1904 /* Error. */
1905 /* Reset the shift state for next time. */
1906 memset(&shiftstate, 0, sizeof(shiftstate));
1907 if (zmb_ind)
1908 *zmb_ind = (ret == MB_INVALID) ? ZMB_INVALID : ZMB_INCOMPLETE;
1909 return WCHAR_INVALID(*x);
1910 }
1911
1912 if (zmb_ind)
1913 *zmb_ind = ZMB_VALID;
1914 return wc;
1915 }
1916
1917 /* Get a pointer to the next character */
1918 #define CHARNEXT(x, y) charnext((x), (y))
1919 static char *
charnext(char * x,char * y)1920 charnext(char *x, char *y)
1921 {
1922 wchar_t wc;
1923 size_t ret;
1924
1925 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
1926 return x + 1;
1927
1928 ret = mbrtowc(&wc, x, y-x, &shiftstate);
1929
1930 if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1931 /* Error. Treat as single byte. */
1932 /* Reset the shift state for next time. */
1933 memset(&shiftstate, 0, sizeof(shiftstate));
1934 return x + 1;
1935 }
1936
1937 /* Nulls here are normal characters */
1938 return x + (ret ? ret : 1);
1939 }
1940
1941 /* Increment a pointer past the current character. */
1942 #define CHARINC(x, y) ((x) = charnext((x), (y)))
1943
1944
1945 /* Get a character and increment */
1946 #define CHARREFINC(x, y, z) charrefinc(&(x), (y), (z))
1947 static wchar_t
charrefinc(char ** x,char * y,int * z)1948 charrefinc(char **x, char *y, int *z)
1949 {
1950 wchar_t wc;
1951 size_t ret;
1952
1953 if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
1954 return (wchar_t) STOUC(*(*x)++);
1955
1956 ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
1957
1958 if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1959 /* Error. Treat as single byte, but flag. */
1960 *z = 1;
1961 /* Reset the shift state for next time. */
1962 memset(&shiftstate, 0, sizeof(shiftstate));
1963 return WCHAR_INVALID(*(*x)++);
1964 }
1965
1966 /* Nulls here are normal characters */
1967 *x += ret ? ret : 1;
1968
1969 return wc;
1970 }
1971
1972
1973 /*
1974 * Counter the number of characters between two pointers, smaller first
1975 *
1976 * This is used when setting values in parameters, so we obey
1977 * the MULTIBYTE option (even if it's been overridden locally).
1978 */
1979 #define CHARSUB(x,y) charsub(x, y)
1980 static ptrdiff_t
charsub(char * x,char * y)1981 charsub(char *x, char *y)
1982 {
1983 ptrdiff_t res = 0;
1984 size_t ret;
1985 wchar_t wc;
1986
1987 if (!isset(MULTIBYTE))
1988 return y - x;
1989
1990 while (x < y) {
1991 ret = mbrtowc(&wc, x, y-x, &shiftstate);
1992
1993 if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
1994 /* Error. Treat remainder as single characters */
1995 return res + (y - x);
1996 }
1997
1998 /* Treat nulls as normal characters */
1999 if (!ret)
2000 ret = 1;
2001 res++;
2002 x += ret;
2003 }
2004
2005 return res;
2006 }
2007
2008 #else /* no MULTIBYTE_SUPPORT */
2009
2010 /* Get a character from the start point in a string */
2011 #define CHARREF(x, y) (STOUC(*(x)))
2012 /* Get a pointer to the next character */
2013 #define CHARNEXT(x, y) ((x)+1)
2014 /* Increment a pointer past the current character. */
2015 #define CHARINC(x, y) ((x)++)
2016 /* Get a character and increment */
2017 #define CHARREFINC(x, y, z) (STOUC(*(x)++))
2018 /* Counter the number of characters between two pointers, smaller first */
2019 #define CHARSUB(x,y) ((y) - (x))
2020
2021 #endif /* MULTIBYTE_SUPPORT */
2022
2023 /*
2024 * The following need to be accessed in the globbing scanner for
2025 * a multi-component file path. See horror story in glob.c.
2026 */
2027 /**/
2028 int errsfound; /* Total error count so far */
2029
2030 /**/
2031 int forceerrs; /* Forced maximum error count */
2032
2033 /*
2034 * exactpos is used to remember how far down an exact string we have
2035 * matched, if we are doing approximation and can therefore redo from
2036 * the same point; we never need to otherwise.
2037 *
2038 * exactend is a pointer to the end of the string, which isn't
2039 * null-terminated.
2040 */
2041 static char *exactpos, *exactend;
2042
2043 /**/
2044 void
pattrystart(void)2045 pattrystart(void)
2046 {
2047 forceerrs = -1;
2048 errsfound = 0;
2049 }
2050
2051 /*
2052 * Fix up string length stuff.
2053 *
2054 * If we call patallocstr() with "force" to set things up early, it's
2055 * done there, else it's done in pattryrefs(). The reason for the
2056 * difference is in the latter case we may not be relying on
2057 * patallocstr() having an effect.
2058 */
2059
2060 /**/
2061 static void
patmungestring(char ** string,int * stringlen,int * unmetalenin)2062 patmungestring(char **string, int *stringlen, int *unmetalenin)
2063 {
2064 /*
2065 * Special signalling of empty tokenised string.
2066 */
2067 if (*stringlen > 0 && **string == Nularg) {
2068 (*string)++;
2069 /*
2070 * If we don't have an unmetafied length
2071 * and need it (we may not) we'll get it later.
2072 */
2073 if (*unmetalenin > 0)
2074 (*unmetalenin)--;
2075 if (*stringlen > 0)
2076 (*stringlen)--;
2077 }
2078
2079 /* Ensure we have a metafied length */
2080 if (*stringlen < 0)
2081 *stringlen = strlen(*string);
2082 }
2083
2084 /*
2085 * Allocate memory for pattern match. Note this is specific to use
2086 * of pattern *and* trial string.
2087 *
2088 * Unmetafy a trial string for use in pattern matching, if needed.
2089 *
2090 * If it is needed, returns a heap allocated string; if not needed,
2091 * returns NULL.
2092 *
2093 * prog is the pattern to be executed.
2094 * string is the metafied trial string.
2095 * stringlen is it's length; it will be calculated if it's negative
2096 * (this is a simple strlen()).
2097 * unmetalen is the unmetafied length of the string, may be -1.
2098 * force is 1 if we always unmetafy: this is useful if we are going
2099 * to try again with different versions of the string. If this is
2100 * called from pattryrefs() we don't force unmetafication as it won't
2101 * be optimal. This option should be used if the resulting
2102 * patstralloc is going to be passed to pattrylen() / pattryrefs().
2103 * In patstralloc (supplied by caller, must last until last pattry is done)
2104 * unmetalen is the unmetafied length of the string; it will be
2105 * calculated if the input value is negative.
2106 * unmetalenp is the umetafied length of a path segment preceding
2107 * the trial string needed for file mananagement; it is calculated as
2108 * needed so does not need to be initialised.
2109 * alloced is the memory allocated on the heap --- same as return value from
2110 * function.
2111 */
2112 /**/
2113 mod_export
patallocstr(Patprog prog,char * string,int stringlen,int unmetalen,int force,Patstralloc patstralloc)2114 char *patallocstr(Patprog prog, char *string, int stringlen, int unmetalen,
2115 int force, Patstralloc patstralloc)
2116 {
2117 int needfullpath;
2118
2119 if (force)
2120 patmungestring(&string, &stringlen, &unmetalen);
2121
2122 /*
2123 * For a top-level ~-exclusion, we will need the full
2124 * path to exclude, so copy the path so far and append the
2125 * current test string.
2126 */
2127 needfullpath = (prog->flags & PAT_HAS_EXCLUDP) && pathpos;
2128
2129 /* Get the length of the full string when unmetafied. */
2130 if (unmetalen < 0)
2131 patstralloc->unmetalen = ztrsub(string + stringlen, string);
2132 else
2133 patstralloc->unmetalen = unmetalen;
2134 if (needfullpath) {
2135 patstralloc->unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
2136 if (!patstralloc->unmetalenp)
2137 needfullpath = 0;
2138 } else
2139 patstralloc->unmetalenp = 0;
2140 /* Initialise cache area */
2141 patstralloc->progstrunmeta = NULL;
2142 patstralloc->progstrunmetalen = 0;
2143
2144 DPUTS(needfullpath && (prog->flags & (PAT_PURES|PAT_ANY)),
2145 "rum sort of file exclusion");
2146 /*
2147 * Partly for efficiency, and partly for the convenience of
2148 * globbing, we don't unmetafy pure string patterns, and
2149 * there's no reason to if the pattern is just a *.
2150 */
2151 if (force ||
2152 (!(prog->flags & (PAT_PURES|PAT_ANY))
2153 && (needfullpath || patstralloc->unmetalen != stringlen))) {
2154 /*
2155 * We need to copy if we need to prepend the path so far
2156 * (in which case we copy both chunks), or if we have
2157 * Meta characters.
2158 */
2159 char *dst, *ptr;
2160 int i, icopy, ncopy;
2161
2162 dst = patstralloc->alloced =
2163 zhalloc(patstralloc->unmetalen + patstralloc->unmetalenp);
2164
2165 if (needfullpath) {
2166 /* loop twice, copy path buffer first time */
2167 ptr = pathbuf;
2168 ncopy = patstralloc->unmetalenp;
2169 } else {
2170 /* just loop once, copy string with unmetafication */
2171 ptr = string;
2172 ncopy = patstralloc->unmetalen;
2173 }
2174 for (icopy = 0; icopy < 2; icopy++) {
2175 for (i = 0; i < ncopy; i++) {
2176 if (*ptr == Meta) {
2177 ptr++;
2178 *dst++ = *ptr++ ^ 32;
2179 } else {
2180 *dst++ = *ptr++;
2181 }
2182 }
2183 if (!needfullpath)
2184 break;
2185 /* next time append test string to path so far */
2186 ptr = string;
2187 ncopy = patstralloc->unmetalen;
2188 }
2189 }
2190 else
2191 {
2192 patstralloc->alloced = NULL;
2193 }
2194
2195 return patstralloc->alloced;
2196 }
2197
2198
2199 /*
2200 * Test prog against null-terminated, metafied string.
2201 */
2202
2203 /**/
2204 mod_export int
pattry(Patprog prog,char * string)2205 pattry(Patprog prog, char *string)
2206 {
2207 return pattryrefs(prog, string, -1, -1, NULL, 0, NULL, NULL, NULL);
2208 }
2209
2210 /*
2211 * Test prog against string of given length, no null termination
2212 * but still metafied at this point. offset gives an offset
2213 * to include in reported match indices
2214 */
2215
2216 /**/
2217 mod_export int
pattrylen(Patprog prog,char * string,int len,int unmetalen,Patstralloc patstralloc,int offset)2218 pattrylen(Patprog prog, char *string, int len, int unmetalen,
2219 Patstralloc patstralloc, int offset)
2220 {
2221 return pattryrefs(prog, string, len, unmetalen, patstralloc, offset,
2222 NULL, NULL, NULL);
2223 }
2224
2225 /*
2226 * Test prog against string with given lengths. The input
2227 * string is metafied; stringlen is the raw string length, and
2228 * unmetalen the number of characters in the original string (some
2229 * of which may now be metafied). Either value may be -1
2230 * to indicate a null-terminated string which will be counted. Note
2231 * there may be a severe penalty for this if a lot of matching is done
2232 * on one string.
2233 *
2234 * If patstralloc is not NULL it is used to optimise unmetafication
2235 * of a trial string that may be passed (or any substring may be passed) to
2236 * pattryrefs multiple times or the same pattern (N.B. so patstralloc
2237 * depends on both prog *and* the trial string). This should only be
2238 * done if there is no path prefix (pathpos == 0) as otherwise the path
2239 * buffer and unmetafied string may not match. To do this,
2240 * patallocstr() is called (use force = 1 to ensure it is always
2241 * unmetafied); paststralloc points to existing storage. Memory is
2242 * on the heap.
2243 *
2244 * patstralloc->alloced and patstralloc->unmetalen contain the
2245 * unmetafied string and its length. In that case, the rules for the
2246 * earlier arguments change:
2247 * - string is an unmetafied string
2248 * - stringlen is its unmetafied (i.e. actual) length
2249 * - unmetalenin is not used.
2250 * string and stringlen may refer to arbitrary substrings of
2251 * patstralloc->alloced without any internal modification to patstralloc.
2252 *
2253 * patoffset is the position in the original string (not seen by
2254 * the pattern module) at which we are trying to match.
2255 * This is added in to the positions recorded in patbeginp and patendp
2256 * when we are looking for substrings. Currently this only happens
2257 * in the parameter substitution code. It refers to a real character
2258 * offset, i.e. is already in the form ready for presentation to the
2259 * general public --- this is necessary as we don't have the
2260 * information to convert it down here.
2261 *
2262 * Note this is a character offset, i.e. a single possibly metafied and
2263 * possibly multibyte character counts as 1.
2264 *
2265 * The last three arguments are used to report the positions for the
2266 * backreferences. On entry, *nump should contain the maximum number
2267 * of positions to report. In this case the match, mbegin, mend
2268 * arrays are not altered.
2269 *
2270 * If nump is NULL but endp is not NULL, then *endp is set to the
2271 * end position of the match, taking into account patinstart.
2272 */
2273
2274 /**/
2275 mod_export int
pattryrefs(Patprog prog,char * string,int stringlen,int unmetalenin,Patstralloc patstralloc,int patoffset,int * nump,int * begp,int * endp)2276 pattryrefs(Patprog prog, char *string, int stringlen, int unmetalenin,
2277 Patstralloc patstralloc, int patoffset,
2278 int *nump, int *begp, int *endp)
2279 {
2280 int i, maxnpos = 0, ret;
2281 int origlen;
2282 char **sp, **ep, *ptr;
2283 char *progstr = (char *)prog + prog->startoff;
2284 struct patstralloc patstralloc_struct;
2285
2286 if (nump) {
2287 maxnpos = *nump;
2288 *nump = 0;
2289 }
2290
2291 if (!patstralloc)
2292 patmungestring(&string, &stringlen, &unmetalenin);
2293 origlen = stringlen;
2294
2295 if (patstralloc) {
2296 DPUTS(!patstralloc->alloced,
2297 "External unmetafy didn't actually unmetafy.");
2298 DPUTS(patstralloc->unmetalenp,
2299 "Ooh-err: pathpos with external unmetafy. I have bad vibes.");
2300 patinpath = NULL;
2301 patinstart = string;
2302 /* stringlen is unmetafied length; unmetalenin is ignored */
2303 } else {
2304 patstralloc = &patstralloc_struct;
2305 if (patallocstr(prog, string, stringlen, unmetalenin, 0, patstralloc)) {
2306 patinstart = patstralloc->alloced + patstralloc->unmetalenp;
2307 stringlen = patstralloc->unmetalen;
2308 } else
2309 patinstart = string;
2310 if (patstralloc->unmetalenp)
2311 patinpath = patstralloc->alloced;
2312 else
2313 patinpath = NULL;
2314 }
2315
2316 patflags = prog->flags;
2317 patinend = patinstart + stringlen;
2318 /*
2319 * From now on we do not require NULL termination of
2320 * the test string. There should also be no more references
2321 * to the variable string.
2322 */
2323
2324 if (prog->flags & (PAT_PURES|PAT_ANY)) {
2325 /*
2326 * Either we are testing against a pure string,
2327 * or we can match anything at all.
2328 */
2329 int pstrlen;
2330 char *pstr;
2331 if (patstralloc->alloced)
2332 {
2333 /*
2334 * Unmetafied; we need pattern string that's also unmetafied.
2335 * We'll cache it in the patstralloc structure.
2336 * Note it's on the heap.
2337 */
2338 if (!patstralloc->progstrunmeta)
2339 {
2340 patstralloc->progstrunmeta =
2341 dupstrpfx(progstr, (int)prog->patmlen);
2342 unmetafy(patstralloc->progstrunmeta,
2343 &patstralloc->progstrunmetalen);
2344 }
2345 pstr = patstralloc->progstrunmeta;
2346 pstrlen = patstralloc->progstrunmetalen;
2347 }
2348 else
2349 {
2350 /* Metafied. */
2351 pstr = progstr;
2352 pstrlen = (int)prog->patmlen;
2353 }
2354 if (prog->flags & PAT_ANY) {
2355 /*
2356 * Optimisation for a single "*": always matches
2357 * (except for no_glob_dots, see below).
2358 */
2359 ret = 1;
2360 } else {
2361 /*
2362 * Testing a pure string. See if initial
2363 * components match.
2364 */
2365 int lendiff = stringlen - pstrlen;
2366 if (lendiff < 0) {
2367 /* No, the pattern string is too long. */
2368 ret = 0;
2369 } else if (!memcmp(pstr, patinstart, pstrlen)) {
2370 /*
2371 * Initial component matches. Matches either
2372 * if lengths are the same or we are not anchored
2373 * to the end of the string.
2374 */
2375 ret = !lendiff || (prog->flags & PAT_NOANCH);
2376 } else {
2377 /* No match. */
2378 ret = 0;
2379 }
2380 }
2381 if (ret) {
2382 /*
2383 * For files, we won't match initial "."s unless
2384 * glob_dots is set.
2385 */
2386 if ((prog->flags & PAT_NOGLD) && *patinstart == '.') {
2387 ret = 0;
2388 } else {
2389 /*
2390 * Remember the length in case used for ${..#..} etc.
2391 * In this case, we didn't unmetafy the pattern string
2392 * in the original structure, but it might be unmetafied
2393 * for use with an unmetafied test string.
2394 */
2395 patinlen = pstrlen;
2396 /* if matching files, must update globbing flags */
2397 patglobflags = prog->globend;
2398
2399 if ((patglobflags & GF_MATCHREF) &&
2400 !(patflags & PAT_FILE)) {
2401 char *str;
2402 int mlen;
2403
2404 if (patstralloc->alloced) {
2405 /*
2406 * Unmetafied: pstrlen contains unmetafied
2407 * length in bytes.
2408 */
2409 str = metafy(patinstart, pstrlen, META_DUP);
2410 mlen = CHARSUB(patinstart, patinstart + pstrlen);
2411 } else {
2412 str = ztrduppfx(patinstart, patinlen);
2413 /*
2414 * Count the characters. We're not using CHARSUB()
2415 * because the string is still metafied.
2416 */
2417 MB_METACHARINIT();
2418 mlen = MB_METASTRLEN2END(patinstart, 0,
2419 patinstart + patinlen);
2420 }
2421
2422 setsparam("MATCH", str);
2423 setiparam("MBEGIN",
2424 (zlong)(patoffset + !isset(KSHARRAYS)));
2425 setiparam("MEND",
2426 (zlong)(mlen + patoffset +
2427 !isset(KSHARRAYS) - 1));
2428 }
2429 }
2430 }
2431 } else {
2432 /*
2433 * Test for a `must match' string, unless we're scanning for a match
2434 * in which case we don't need to do this each time.
2435 */
2436 ret = 1;
2437 if (!(prog->flags & PAT_SCAN) && prog->mustoff)
2438 {
2439 char *testptr; /* start pointer into test string */
2440 char *teststop; /* last point from which we can match */
2441 char *patptr = (char *)prog + prog->mustoff;
2442 int patlen = prog->patmlen;
2443 int found = 0;
2444
2445 if (patlen > stringlen) {
2446 /* Too long, can't match. */
2447 ret = 0;
2448 } else {
2449 teststop = patinend - patlen;
2450
2451 for (testptr = patinstart; testptr <= teststop; testptr++)
2452 {
2453 if (!memcmp(testptr, patptr, patlen)) {
2454 found = 1;
2455 break;
2456 }
2457 }
2458
2459 if (!found)
2460 ret = 0;
2461 }
2462 }
2463 if (!ret)
2464 return 0;
2465
2466 patglobflags = prog->globflags;
2467 if (!(patflags & PAT_FILE)) {
2468 forceerrs = -1;
2469 errsfound = 0;
2470 }
2471 globdots = !(patflags & PAT_NOGLD);
2472 parsfound = 0;
2473
2474 patinput = patinstart;
2475
2476 exactpos = exactend = NULL;
2477 /* The only external call to patmatch --- all others are recursive */
2478 if (patmatch((Upat)progstr)) {
2479 /*
2480 * we were lazy and didn't save the globflags if an exclusion
2481 * failed, so set it now
2482 */
2483 patglobflags = prog->globend;
2484
2485 /*
2486 * Record length of successful match, including Meta
2487 * characters. Do it here so that patmatchlen() can return
2488 * it even if we delete the pattern strings.
2489 */
2490 patinlen = patinput - patinstart;
2491 /*
2492 * Optimization: if we didn't find any Meta characters
2493 * to begin with, we don't need to look for them now.
2494 *
2495 * For patstralloc passed in, we want the unmetafied length.
2496 */
2497 if (patstralloc == &patstralloc_struct &&
2498 patstralloc->unmetalen != origlen) {
2499 for (ptr = patinstart; ptr < patinput; ptr++)
2500 if (imeta(*ptr))
2501 patinlen++;
2502 }
2503
2504 /*
2505 * Should we clear backreferences and matches on a failed
2506 * match?
2507 */
2508 if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) {
2509 /*
2510 * m flag: for global match. This carries no overhead
2511 * in the pattern matching part.
2512 *
2513 * Remember the test pattern is already unmetafied.
2514 */
2515 char *str;
2516 int mlen = CHARSUB(patinstart, patinput);
2517
2518 str = metafy(patinstart, patinput - patinstart, META_DUP);
2519 setsparam("MATCH", str);
2520 setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
2521 setiparam("MEND",
2522 (zlong)(mlen + patoffset +
2523 !isset(KSHARRAYS) - 1));
2524 }
2525 if (prog->patnpar && nump) {
2526 /*
2527 * b flag: for backreferences using parentheses. Reported
2528 * directly.
2529 */
2530 *nump = prog->patnpar;
2531
2532 sp = patbeginp;
2533 ep = patendp;
2534
2535 for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
2536 if (parsfound & (1 << i)) {
2537 if (begp)
2538 *begp++ = CHARSUB(patinstart, *sp) + patoffset;
2539 if (endp)
2540 *endp++ = CHARSUB(patinstart, *ep) + patoffset
2541 - 1;
2542 } else {
2543 if (begp)
2544 *begp++ = -1;
2545 if (endp)
2546 *endp++ = -1;
2547 }
2548
2549 sp++;
2550 ep++;
2551 }
2552 } else if (prog->patnpar && !(patflags & PAT_FILE)) {
2553 /*
2554 * b flag: for backreferences using parentheses.
2555 */
2556 int palen = prog->patnpar+1;
2557 char **matcharr, **mbeginarr, **mendarr;
2558 char numbuf[DIGBUFSIZE];
2559
2560 matcharr = zshcalloc(palen*sizeof(char *));
2561 mbeginarr = zshcalloc(palen*sizeof(char *));
2562 mendarr = zshcalloc(palen*sizeof(char *));
2563
2564 sp = patbeginp;
2565 ep = patendp;
2566
2567 for (i = 0; i < prog->patnpar; i++) {
2568 if (parsfound & (1 << i)) {
2569 matcharr[i] = metafy(*sp, *ep - *sp, META_DUP);
2570 /*
2571 * mbegin and mend give indexes into the string
2572 * in the standard notation, i.e. respecting
2573 * KSHARRAYS, and with the end index giving
2574 * the last character, not one beyond.
2575 * For example, foo=foo; [[ $foo = (f)oo ]] gives
2576 * (without KSHARRAYS) indexes 1 and 1, which
2577 * corresponds to indexing as ${foo[1,1]}.
2578 */
2579 sprintf(numbuf, "%ld",
2580 (long)(CHARSUB(patinstart, *sp) +
2581 patoffset +
2582 !isset(KSHARRAYS)));
2583 mbeginarr[i] = ztrdup(numbuf);
2584 sprintf(numbuf, "%ld",
2585 (long)(CHARSUB(patinstart, *ep) +
2586 patoffset +
2587 !isset(KSHARRAYS) - 1));
2588 mendarr[i] = ztrdup(numbuf);
2589 } else {
2590 /* Pattern wasn't set: either it was in an
2591 * unmatched branch, or a hashed parenthesis
2592 * that didn't match at all.
2593 */
2594 matcharr[i] = ztrdup("");
2595 mbeginarr[i] = ztrdup("-1");
2596 mendarr[i] = ztrdup("-1");
2597 }
2598 sp++;
2599 ep++;
2600 }
2601 setaparam("match", matcharr);
2602 setaparam("mbegin", mbeginarr);
2603 setaparam("mend", mendarr);
2604 }
2605
2606 if (!nump && endp) {
2607 /*
2608 * We just need the overall end position.
2609 */
2610 *endp = CHARSUB(patinstart, patinput) + patoffset;
2611 }
2612
2613 ret = 1;
2614 } else
2615 ret = 0;
2616 }
2617
2618 return ret;
2619 }
2620
2621 /*
2622 * Return length of previous successful match. This is
2623 * in metafied bytes, i.e. includes a count of Meta characters,
2624 * unless the match was done on an unmetafied string using
2625 * a patstralloc struct, in which case it too is unmetafied.
2626 * Unusual and futile attempt at modular encapsulation.
2627 */
2628
2629 /**/
2630 int
patmatchlen(void)2631 patmatchlen(void)
2632 {
2633 return patinlen;
2634 }
2635
2636 /*
2637 * Match literal characters with case insensitivity test: the first
2638 * comes from the input string, the second the current pattern.
2639 */
2640 #ifdef MULTIBYTE_SUPPORT
2641 #define ISUPPER(x) iswupper(x)
2642 #define ISLOWER(x) iswlower(x)
2643 #define TOUPPER(x) towupper(x)
2644 #define TOLOWER(x) towlower(x)
2645 #define ISDIGIT(x) iswdigit(x)
2646 #else
2647 #define ISUPPER(x) isupper(x)
2648 #define ISLOWER(x) islower(x)
2649 #define TOUPPER(x) toupper(x)
2650 #define TOLOWER(x) tolower(x)
2651 #define ISDIGIT(x) idigit(x)
2652 #endif
2653 #define CHARMATCH(chin, chpa) (chin == chpa || \
2654 ((patglobflags & GF_IGNCASE) ? \
2655 ((ISUPPER(chin) ? TOLOWER(chin) : chin) == \
2656 (ISUPPER(chpa) ? TOLOWER(chpa) : chpa)) : \
2657 (patglobflags & GF_LCMATCHUC) ? \
2658 (ISLOWER(chpa) && TOUPPER(chpa) == chin) : 0))
2659
2660 /*
2661 * The same but caching an expression from the first argument,
2662 * Requires local charmatch_cache definition.
2663 */
2664 #define CHARMATCH_EXPR(expr, chpa) \
2665 (charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa))
2666
2667 /*
2668 * Main matching routine.
2669 *
2670 * Testing the tail end of a match is usually done by recursion, but
2671 * we try to eliminate that in favour of looping for simple cases.
2672 */
2673
2674 /**/
2675 static int
patmatch(Upat prog)2676 patmatch(Upat prog)
2677 {
2678 /* Current and next nodes */
2679 Upat scan = prog, next, opnd;
2680 char *start, *save, *chrop, *chrend, *compend;
2681 int savglobflags, op, no, min, fail = 0, saverrsfound;
2682 zrange_t from, to, comp;
2683 patint_t nextch;
2684 int q = queue_signal_level();
2685
2686 /*
2687 * To avoid overhead of saving state if there are no queued signals
2688 * waiting, we pierce the signals.h veil and examine queue state.
2689 */
2690 #define check_for_signals() do if (queue_front != queue_rear) { \
2691 int savpatflags = patflags, savpatglobflags = patglobflags; \
2692 char *savexactpos = exactpos, *savexactend = exactend; \
2693 struct rpat savpattrystate = pattrystate; \
2694 dont_queue_signals(); \
2695 restore_queue_signals(q); \
2696 exactpos = savexactpos; \
2697 exactend = savexactend; \
2698 patflags = savpatflags; \
2699 patglobflags = savpatglobflags; \
2700 pattrystate = savpattrystate; \
2701 } while (0)
2702
2703 check_for_signals();
2704
2705 while (scan && !errflag) {
2706 next = PATNEXT(scan);
2707
2708 if (!globdots && P_NOTDOT(scan) && patinput == patinstart &&
2709 patinput < patinend && *patinput == '.')
2710 return 0;
2711
2712 switch (P_OP(scan)) {
2713 case P_ANY:
2714 if (patinput == patinend)
2715 fail = 1;
2716 else
2717 CHARINC(patinput, patinend);
2718 break;
2719 case P_EXACTLY:
2720 /*
2721 * acts as nothing if *chrop is null: this is used by
2722 * approx code.
2723 */
2724 if (exactpos) {
2725 chrop = exactpos;
2726 chrend = exactend;
2727 } else {
2728 chrop = P_LS_STR(scan);
2729 chrend = chrop + P_LS_LEN(scan);
2730 }
2731 exactpos = NULL;
2732 while (chrop < chrend && patinput < patinend) {
2733 char *savpatinput = patinput;
2734 char *savchrop = chrop;
2735 int badin = 0, badpa = 0;
2736 /*
2737 * Care with character matching:
2738 * We do need to convert the character to wide
2739 * representation if possible, because we may need
2740 * to do case transformation. However, we should
2741 * be careful in case one, but not the other, wasn't
2742 * representable in the current locale---in that
2743 * case they don't match even if the returned
2744 * values (one properly converted, one raw) are
2745 * the same.
2746 */
2747 patint_t chin = CHARREFINC(patinput, patinend, &badin);
2748 patint_t chpa = CHARREFINC(chrop, chrend, &badpa);
2749 if (!CHARMATCH(chin, chpa) || badin != badpa) {
2750 fail = 1;
2751 patinput = savpatinput;
2752 chrop = savchrop;
2753 break;
2754 }
2755 }
2756 if (chrop < chrend) {
2757 exactpos = chrop;
2758 exactend = chrend;
2759 fail = 1;
2760 }
2761 break;
2762 case P_ANYOF:
2763 case P_ANYBUT:
2764 if (patinput == patinend)
2765 fail = 1;
2766 else {
2767 #ifdef MULTIBYTE_SUPPORT
2768 int zmb_ind;
2769 wchar_t cr = charref(patinput, patinend, &zmb_ind);
2770 char *scanop = (char *)P_OPERAND(scan);
2771 if (patglobflags & GF_MULTIBYTE) {
2772 if (mb_patmatchrange(scanop, cr, zmb_ind, NULL, NULL) ^
2773 (P_OP(scan) == P_ANYOF))
2774 fail = 1;
2775 else
2776 CHARINC(patinput, patinend);
2777 } else if (patmatchrange(scanop, (int)cr, NULL, NULL) ^
2778 (P_OP(scan) == P_ANYOF))
2779 fail = 1;
2780 else
2781 CHARINC(patinput, patinend);
2782 #else
2783 if (patmatchrange((char *)P_OPERAND(scan),
2784 CHARREF(patinput, patinend), NULL, NULL) ^
2785 (P_OP(scan) == P_ANYOF))
2786 fail = 1;
2787 else
2788 CHARINC(patinput, patinend);
2789 #endif
2790 }
2791 break;
2792 case P_NUMRNG:
2793 case P_NUMFROM:
2794 case P_NUMTO:
2795 /*
2796 * To do this properly, we really have to treat numbers as
2797 * closures: that's so things like <1-1000>33 will
2798 * match 633 (they didn't up to 3.1.6). To avoid making this
2799 * too inefficient, we see if there's an exact match next:
2800 * if there is, and it's not a digit, we return 1 after
2801 * the first attempt.
2802 */
2803 op = P_OP(scan);
2804 start = (char *)P_OPERAND(scan);
2805 from = to = 0;
2806 if (op != P_NUMTO) {
2807 #ifdef ZSH_64_BIT_TYPE
2808 /* We can't rely on pointer alignment being good enough. */
2809 memcpy((char *)&from, start, sizeof(zrange_t));
2810 #else
2811 from = *((zrange_t *) start);
2812 #endif
2813 start += sizeof(zrange_t);
2814 }
2815 if (op != P_NUMFROM) {
2816 #ifdef ZSH_64_BIT_TYPE
2817 memcpy((char *)&to, start, sizeof(zrange_t));
2818 #else
2819 to = *((zrange_t *) start);
2820 #endif
2821 }
2822 start = compend = patinput;
2823 comp = 0;
2824 while (patinput < patinend && idigit(*patinput)) {
2825 int out_of_range = 0;
2826 int digit = *patinput - '0';
2827 if (comp > ZRANGE_MAX / (zlong)10) {
2828 out_of_range = 1;
2829 } else {
2830 zrange_t c10 = comp ? comp * 10 : 0;
2831 if (ZRANGE_MAX - c10 < digit) {
2832 out_of_range = 1;
2833 } else {
2834 comp = c10;
2835 comp += digit;
2836 }
2837 }
2838 patinput++;
2839 compend++;
2840
2841 if (out_of_range ||
2842 (comp & ((zrange_t)1 << (sizeof(comp)*8 -
2843 #ifdef ZRANGE_T_IS_SIGNED
2844 2
2845 #else
2846 1
2847 #endif
2848 )))) {
2849 /*
2850 * Out of range (allowing for signedness, which
2851 * we need if we are using zlongs).
2852 * This is as far as we can go.
2853 * If we're doing a range "from", skip all the
2854 * remaining numbers. Otherwise, we can't
2855 * match beyond the previous point anyway.
2856 * Leave the pointer to the last calculated
2857 * position (compend) where it was before.
2858 */
2859 if (op == P_NUMFROM) {
2860 while (patinput < patinend && idigit(*patinput))
2861 patinput++;
2862 }
2863 }
2864 }
2865 save = patinput;
2866 no = 0;
2867 while (patinput > start) {
2868 /* if already too small, no power on earth can save it */
2869 if (comp < from && patinput <= compend)
2870 break;
2871 if ((op == P_NUMFROM || comp <= to) && patmatch(next)) {
2872 return 1;
2873 }
2874 if (!no && P_OP(next) == P_EXACTLY &&
2875 (!P_LS_LEN(next) ||
2876 !idigit(STOUC(*P_LS_STR(next)))) &&
2877 !(patglobflags & 0xff))
2878 return 0;
2879 patinput = --save;
2880 no++;
2881 /*
2882 * With a range start and an unrepresentable test
2883 * number, we just back down the test string without
2884 * changing the number until we get to a representable
2885 * one.
2886 */
2887 if (patinput < compend)
2888 comp /= 10;
2889 }
2890 patinput = start;
2891 fail = 1;
2892 break;
2893 case P_NUMANY:
2894 /* This is <->: any old set of digits, don't bother comparing */
2895 start = patinput;
2896 while (patinput < patinend && idigit(*patinput))
2897 patinput++;
2898 save = patinput;
2899 no = 0;
2900 while (patinput > start) {
2901 if (patmatch(next))
2902 return 1;
2903 if (!no && P_OP(next) == P_EXACTLY &&
2904 (!P_LS_LEN(next) ||
2905 !idigit(*P_LS_STR(next))) &&
2906 !(patglobflags & 0xff))
2907 return 0;
2908 patinput = --save;
2909 no++;
2910 }
2911 patinput = start;
2912 fail = 1;
2913 break;
2914 case P_NOTHING:
2915 break;
2916 case P_BACK:
2917 break;
2918 case P_GFLAGS:
2919 patglobflags = P_OPERAND(scan)->l;
2920 break;
2921 case P_OPEN:
2922 case P_OPEN+1:
2923 case P_OPEN+2:
2924 case P_OPEN+3:
2925 case P_OPEN+4:
2926 case P_OPEN+5:
2927 case P_OPEN+6:
2928 case P_OPEN+7:
2929 case P_OPEN+8:
2930 case P_OPEN+9:
2931 no = P_OP(scan) - P_OPEN;
2932 save = patinput;
2933
2934 if (patmatch(next)) {
2935 /*
2936 * Don't set patbeginp if some later invocation of
2937 * the same parentheses already has.
2938 */
2939 if (no && !(parsfound & (1 << (no - 1)))) {
2940 patbeginp[no-1] = save;
2941 parsfound |= 1 << (no - 1);
2942 }
2943 return 1;
2944 } else
2945 return 0;
2946 break;
2947 case P_CLOSE:
2948 case P_CLOSE+1:
2949 case P_CLOSE+2:
2950 case P_CLOSE+3:
2951 case P_CLOSE+4:
2952 case P_CLOSE+5:
2953 case P_CLOSE+6:
2954 case P_CLOSE+7:
2955 case P_CLOSE+8:
2956 case P_CLOSE+9:
2957 no = P_OP(scan) - P_CLOSE;
2958 save = patinput;
2959
2960 if (patmatch(next)) {
2961 if (no && !(parsfound & (1 << (no + 15)))) {
2962 patendp[no-1] = save;
2963 parsfound |= 1 << (no + 15);
2964 }
2965 return 1;
2966 } else
2967 return 0;
2968 break;
2969 case P_EXCSYNC:
2970 /* See the P_EXCLUDE code below for where syncptr comes from */
2971 {
2972 unsigned char *syncptr;
2973 Upat after;
2974 after = P_OPERAND(scan);
2975 DPUTS(!P_ISEXCLUDE(after),
2976 "BUG: EXCSYNC not followed by EXCLUDE.");
2977 DPUTS(!P_OPERAND(after)->p,
2978 "BUG: EXCSYNC not handled by EXCLUDE");
2979 syncptr = P_OPERAND(after)->p + (patinput - patinstart);
2980 /*
2981 * If we already matched from here, this time we fail.
2982 * See WBRANCH code for story about error count.
2983 */
2984 if (*syncptr && errsfound + 1 >= *syncptr)
2985 return 0;
2986 /*
2987 * Else record that we (possibly) matched this time.
2988 * No harm if we don't: then the previous test will just
2989 * short cut the attempted match that is bound to fail.
2990 * We never try to exclude something that has already
2991 * failed anyway.
2992 */
2993 *syncptr = errsfound + 1;
2994 }
2995 break;
2996 case P_EXCEND:
2997 /*
2998 * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE
2999 * branch. Actually, we don't bother following it: all we
3000 * need to know is that we successfully matched so far up
3001 * to the end of the asserted pattern; the endpoint
3002 * in the target string is nulled out.
3003 */
3004 if (!(fail = (patinput < patinend)))
3005 return 1;
3006 break;
3007 case P_BRANCH:
3008 case P_WBRANCH:
3009 /* P_EXCLUDE shouldn't occur without a P_BRANCH */
3010 if (!P_ISBRANCH(next)) {
3011 /* no choice, avoid recursion */
3012 DPUTS(P_OP(scan) == P_WBRANCH,
3013 "BUG: WBRANCH with no alternative.");
3014 next = P_OPERAND(scan);
3015 } else {
3016 do {
3017 save = patinput;
3018 savglobflags = patglobflags;
3019 saverrsfound = errsfound;
3020 if (P_ISEXCLUDE(next)) {
3021 /*
3022 * The strategy is to test the asserted pattern,
3023 * recording via P_EXCSYNC how far the part to
3024 * be excluded matched. We then set the
3025 * length of the test string to that
3026 * point and see if the exclusion as far as
3027 * P_EXCEND also matches that string.
3028 * We need to keep testing the asserted pattern
3029 * by backtracking, since the first attempt
3030 * may be excluded while a later attempt may not.
3031 * For this we keep a pointer just after
3032 * the P_EXCLUDE which is tested by the P_EXCSYNC
3033 * to see if we matched there last time, in which
3034 * case we fail. If there is nothing to backtrack
3035 * over, that doesn't matter: we should fail anyway.
3036 * The pointer also tells us where the asserted
3037 * pattern matched for use by the exclusion.
3038 *
3039 * It's hard to allocate space for this
3040 * beforehand since we may need to do it
3041 * recursively.
3042 *
3043 * P.S. in case you were wondering, this code
3044 * is horrible.
3045 */
3046 Upat syncstrp;
3047 char *origpatinend;
3048 unsigned char *oldsyncstr;
3049 char *matchpt = NULL;
3050 int ret, savglobdots, matchederrs = 0;
3051 int savparsfound = parsfound;
3052 DPUTS(P_OP(scan) == P_WBRANCH,
3053 "BUG: excluded WBRANCH");
3054 syncstrp = P_OPERAND(next);
3055 /*
3056 * Unlike WBRANCH, each test at the same exclude
3057 * sync point (due to an external loop) is separate,
3058 * i.e testing (foo~bar)# is no different from
3059 * (foo~bar)(foo~bar)... from the exclusion point
3060 * of view, so we use a different sync string.
3061 */
3062 oldsyncstr = syncstrp->p;
3063 syncstrp->p = (unsigned char *)
3064 zshcalloc((patinend - patinstart) + 1);
3065 origpatinend = patinend;
3066 while ((ret = patmatch(P_OPERAND(scan)))) {
3067 unsigned char *syncpt;
3068 char *savpatinstart;
3069 int savforce = forceerrs;
3070 int savpatflags = patflags, synclen;
3071 forceerrs = -1;
3072 savglobdots = globdots;
3073 matchederrs = errsfound;
3074 matchpt = patinput; /* may not be end */
3075 globdots = 1; /* OK to match . first */
3076 /* Find the point where the scan
3077 * matched the part to be excluded: because
3078 * of backtracking, the one
3079 * most recently matched will be the first.
3080 * (Luckily, backtracking is done after all
3081 * possibilities for approximation have been
3082 * checked.)
3083 */
3084 for (syncpt = syncstrp->p; !*syncpt; syncpt++)
3085 ;
3086 synclen = syncpt - syncstrp->p;
3087 if (patinstart + synclen != patinend) {
3088 /*
3089 * Temporarily mark the string as
3090 * ending at this point.
3091 */
3092 DPUTS(patinstart + synclen > matchpt,
3093 "BUG: EXCSYNC failed");
3094
3095 patinend = patinstart + synclen;
3096 /*
3097 * If this isn't really the end of the string,
3098 * remember this for the (#e) assertion.
3099 */
3100 patflags |= PAT_NOTEND;
3101 }
3102 savpatinstart = patinstart;
3103 next = PATNEXT(scan);
3104 while (next && P_ISEXCLUDE(next)) {
3105 patinput = save;
3106 /*
3107 * turn off approximations in exclusions:
3108 * note we keep remaining patglobflags
3109 * set by asserted branch (or previous
3110 * excluded branches, for consistency).
3111 */
3112 patglobflags &= ~0xff;
3113 errsfound = 0;
3114 opnd = P_OPERAND(next) + 1;
3115 if (P_OP(next) == P_EXCLUDP && patinpath) {
3116 /*
3117 * Top level exclusion with a file,
3118 * applies to whole path so add the
3119 * segments already matched.
3120 * We copied these in front of the
3121 * test pattern, so patinend doesn't
3122 * need moving.
3123 */
3124 DPUTS(patinput != patinstart,
3125 "BUG: not at start excluding path");
3126 patinput = patinstart = patinpath;
3127 }
3128 if (patmatch(opnd)) {
3129 ret = 0;
3130 /*
3131 * Another subtlety: if we exclude the
3132 * match, any parentheses just found
3133 * become invalidated.
3134 */
3135 parsfound = savparsfound;
3136 }
3137 if (patinpath) {
3138 patinput = savpatinstart +
3139 (patinput - patinstart);
3140 patinstart = savpatinstart;
3141 }
3142 if (!ret)
3143 break;
3144 next = PATNEXT(next);
3145 }
3146 /*
3147 * Restore original end position.
3148 */
3149 patinend = origpatinend;
3150 patflags = savpatflags;
3151 globdots = savglobdots;
3152 forceerrs = savforce;
3153 if (ret)
3154 break;
3155 patinput = save;
3156 patglobflags = savglobflags;
3157 errsfound = saverrsfound;
3158 }
3159 zfree((char *)syncstrp->p,
3160 (patinend - patinstart) + 1);
3161 syncstrp->p = oldsyncstr;
3162 if (ret) {
3163 patinput = matchpt;
3164 errsfound = matchederrs;
3165 return 1;
3166 }
3167 while ((scan = PATNEXT(scan)) &&
3168 P_ISEXCLUDE(scan))
3169 ;
3170 } else {
3171 int ret = 1, pfree = 0;
3172 Upat ptrp = NULL;
3173 unsigned char *ptr;
3174 if (P_OP(scan) == P_WBRANCH) {
3175 /*
3176 * This is where we make sure that we are not
3177 * repeatedly matching zero-length strings in
3178 * a closure, which would cause an infinite loop,
3179 * and also remove exponential behaviour in
3180 * backtracking nested closures.
3181 * The P_WBRANCH operator leaves a space for a
3182 * uchar *, initialized to NULL, which is
3183 * turned into a string the same length as the
3184 * target string. Every time we match from a
3185 * particular point in the target string, we
3186 * stick a 1 at the corresponding point here.
3187 * If we come round to the same branch again, and
3188 * there is already a 1, then the test fails.
3189 */
3190 opnd = P_OPERAND(scan);
3191 ptrp = opnd++;
3192 if (!ptrp->p) {
3193 ptrp->p = (unsigned char *)
3194 zshcalloc((patinend - patinstart) + 1);
3195 pfree = 1;
3196 }
3197 ptr = ptrp->p + (patinput - patinstart);
3198
3199 /*
3200 * Without approximation, this is just a
3201 * single bit test. With approximation, we
3202 * need to know how many errors there were
3203 * last time we made the test. If errsfound
3204 * is now smaller than it was, hence we can
3205 * make more approximations in the remaining
3206 * code, we continue with the test.
3207 * (This is why the max number of errors is
3208 * 254, not 255.)
3209 */
3210 if (*ptr && errsfound + 1 >= *ptr)
3211 ret = 0;
3212 *ptr = errsfound + 1;
3213 } else
3214 opnd = P_OPERAND(scan);
3215 if (ret)
3216 ret = patmatch(opnd);
3217 if (pfree) {
3218 zfree((char *)ptrp->p,
3219 (patinend - patinstart) + 1);
3220 ptrp->p = NULL;
3221 }
3222 if (ret)
3223 return 1;
3224 scan = PATNEXT(scan);
3225 }
3226 patinput = save;
3227 patglobflags = savglobflags;
3228 errsfound = saverrsfound;
3229 DPUTS(P_OP(scan) == P_WBRANCH,
3230 "BUG: WBRANCH not first choice.");
3231 next = PATNEXT(scan);
3232 } while (scan && P_ISBRANCH(scan));
3233 return 0;
3234 }
3235 break;
3236 case P_STAR:
3237 /* Handle specially for speed, although really P_ONEHASH+P_ANY */
3238 while (P_OP(next) == P_STAR) {
3239 /*
3240 * If there's another * following we can optimise it
3241 * out. Chains of *'s can give pathologically bad
3242 * performance.
3243 */
3244 scan = next;
3245 next = PATNEXT(scan);
3246 }
3247 /*FALLTHROUGH*/
3248 case P_ONEHASH:
3249 case P_TWOHASH:
3250 /*
3251 * This is just simple cases, matching one character.
3252 * With approximations, we still handle * this way, since
3253 * no approximation is ever necessary, but other closures
3254 * are handled by the more complicated branching method
3255 */
3256 op = P_OP(scan);
3257 /* Note that no counts possibly metafied characters */
3258 start = patinput;
3259 {
3260 char *lastcharstart;
3261 /*
3262 * Array to record the start of characters for
3263 * backtracking.
3264 */
3265 VARARR(char, charstart, patinend-patinput);
3266 memset(charstart, 0, patinend-patinput);
3267
3268 if (op == P_STAR) {
3269 for (no = 0; patinput < patinend;
3270 CHARINC(patinput, patinend))
3271 {
3272 charstart[patinput-start] = 1;
3273 no++;
3274 }
3275 /* simple optimization for reasonably common case */
3276 if (P_OP(next) == P_END)
3277 return 1;
3278 } else {
3279 DPUTS(patglobflags & 0xff,
3280 "BUG: wrong backtracking with approximation.");
3281 if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
3282 patinput == patinstart && patinput < patinend &&
3283 CHARREF(patinput, patinend) == ZWC('.'))
3284 return 0;
3285 no = patrepeat(P_OPERAND(scan), charstart);
3286 }
3287 min = (op == P_TWOHASH) ? 1 : 0;
3288 /*
3289 * Lookahead to avoid useless matches. This is not possible
3290 * with approximation.
3291 */
3292 if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) &&
3293 !(patglobflags & 0xff)) {
3294 char *nextop = P_LS_STR(next);
3295 #ifdef MULTIBYTE_SUPPORT
3296 /* else second argument of CHARREF isn't used */
3297 int nextlen = P_LS_LEN(next);
3298 #endif
3299 /*
3300 * If that P_EXACTLY is last (common in simple patterns,
3301 * such as *.c), then it can be only be matched at one
3302 * point in the test string, so record that.
3303 */
3304 if (P_OP(PATNEXT(next)) == P_END &&
3305 !(patflags & PAT_NOANCH)) {
3306 int ptlen = patinend - patinput;
3307 int lenmatch = patinend -
3308 (min ? CHARNEXT(start, patinend) : start);
3309 /* Are we in the right range? */
3310 if (P_LS_LEN(next) > lenmatch ||
3311 P_LS_LEN(next) < ptlen)
3312 return 0;
3313 /* Yes, just position appropriately and test. */
3314 patinput += ptlen - P_LS_LEN(next);
3315 /*
3316 * Here we will need to be careful that patinput is not
3317 * in the middle of a multibyte character.
3318 */
3319 /* Continue loop with P_EXACTLY test. */
3320 break;
3321 }
3322 nextch = CHARREF(nextop, nextop + nextlen);
3323 } else
3324 nextch = PEOF;
3325 savglobflags = patglobflags;
3326 saverrsfound = errsfound;
3327 lastcharstart = charstart + (patinput - start);
3328 if (no >= min) {
3329 for (;;) {
3330 patint_t charmatch_cache;
3331 if (nextch == PEOF ||
3332 (patinput < patinend &&
3333 CHARMATCH_EXPR(CHARREF(patinput, patinend),
3334 nextch))) {
3335 if (patmatch(next))
3336 return 1;
3337 }
3338 if (--no < min)
3339 break;
3340 /* find start of previous full character */
3341 while (!*--lastcharstart)
3342 DPUTS(lastcharstart < charstart,
3343 "lastcharstart invalid");
3344 patinput = start + (lastcharstart-charstart);
3345 patglobflags = savglobflags;
3346 errsfound = saverrsfound;
3347 }
3348 }
3349 }
3350 /*
3351 * As with branches, the patmatch(next) stuff for *
3352 * handles approximation, so we don't need to try
3353 * anything here.
3354 */
3355 return 0;
3356 case P_ISSTART:
3357 if (patinput != patinstart || (patflags & PAT_NOTSTART))
3358 fail = 1;
3359 break;
3360 case P_ISEND:
3361 if (patinput < patinend || (patflags & PAT_NOTEND))
3362 fail = 1;
3363 break;
3364 case P_COUNTSTART:
3365 {
3366 /*
3367 * Save and restore the current count and the
3368 * start pointer in case the pattern has been
3369 * executed by a previous repetition of a
3370 * closure.
3371 */
3372 long *curptr = &P_OPERAND(scan)[P_CT_CURRENT].l;
3373 long savecount = *curptr;
3374 unsigned char *saveptr = scan[P_CT_PTR].p;
3375 int ret;
3376
3377 *curptr = 0L;
3378 ret = patmatch(P_OPERAND(scan));
3379 *curptr = savecount;
3380 scan[P_CT_PTR].p = saveptr;
3381 return ret;
3382 }
3383 case P_COUNT:
3384 {
3385 /* (#cN,M): execution is relatively straightforward */
3386 long cur = scan[P_CT_CURRENT].l;
3387 long min = scan[P_CT_MIN].l;
3388 long max = scan[P_CT_MAX].l;
3389
3390 if (cur && cur >= min &&
3391 (unsigned char *)patinput == scan[P_CT_PTR].p) {
3392 /*
3393 * Not at the first attempt to match so
3394 * the previous attempt managed zero length.
3395 * We can do this indefinitely so there's
3396 * no point in going on. Simply try to
3397 * match the remainder of the pattern.
3398 */
3399 return patmatch(next);
3400 }
3401 scan[P_CT_PTR].p = (unsigned char *)patinput;
3402
3403 if (max < 0 || cur < max) {
3404 char *patinput_thistime = patinput;
3405 scan[P_CT_CURRENT].l = cur + 1;
3406 if (patmatch(scan + P_CT_OPERAND))
3407 return 1;
3408 scan[P_CT_CURRENT].l = cur;
3409 patinput = patinput_thistime;
3410 }
3411 if (cur < min)
3412 return 0;
3413 return patmatch(next);
3414 }
3415 case P_END:
3416 if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH))))
3417 return 1;
3418 break;
3419 #ifdef DEBUG
3420 default:
3421 dputs("BUG: bad operand in patmatch.");
3422 return 0;
3423 break;
3424 #endif
3425 }
3426
3427 if (fail) {
3428 if (errsfound < (patglobflags & 0xff) &&
3429 (forceerrs == -1 || errsfound < forceerrs)) {
3430 /*
3431 * Approximation code. There are four possibilities
3432 *
3433 * 1. omit character from input string
3434 * 2. transpose characters in input and pattern strings
3435 * 3. omit character in both input and pattern strings
3436 * 4. omit character from pattern string.
3437 *
3438 * which we try in that order.
3439 *
3440 * Of these, 2, 3 and 4 require an exact match string
3441 * (P_EXACTLY) while 1, 2 and 3 require that we not
3442 * have reached the end of the input string.
3443 *
3444 * Note in each case after making the approximation we
3445 * need to retry the *same* pattern; this is what
3446 * requires exactpos, a slightly doleful way of
3447 * communicating with the exact character matcher.
3448 */
3449 char *savexact = exactpos;
3450 save = patinput;
3451 savglobflags = patglobflags;
3452 saverrsfound = ++errsfound;
3453 fail = 0;
3454
3455 DPUTS(P_OP(scan) != P_EXACTLY && exactpos,
3456 "BUG: non-exact match has set exactpos");
3457
3458 /* Try omitting a character from the input string */
3459 if (patinput < patinend) {
3460 CHARINC(patinput, patinend);
3461 /* If we are not on an exact match, then this is
3462 * our last gasp effort, so we can optimize out
3463 * the recursive call.
3464 */
3465 if (P_OP(scan) != P_EXACTLY)
3466 continue;
3467 if (patmatch(scan))
3468 return 1;
3469 }
3470
3471 if (P_OP(scan) == P_EXACTLY) {
3472 char *nextexact = savexact;
3473 DPUTS(!savexact,
3474 "BUG: exact match has not set exactpos");
3475 CHARINC(nextexact, exactend);
3476
3477 if (save < patinend) {
3478 char *nextin = save;
3479 CHARINC(nextin, patinend);
3480 patglobflags = savglobflags;
3481 errsfound = saverrsfound;
3482 exactpos = savexact;
3483
3484 /*
3485 * Try swapping two characters in patinput and
3486 * exactpos
3487 */
3488 if (save < patinend && nextin < patinend &&
3489 nextexact < exactend) {
3490 patint_t cin0 = CHARREF(save, patinend);
3491 patint_t cpa0 = CHARREF(exactpos, exactend);
3492 patint_t cin1 = CHARREF(nextin, patinend);
3493 patint_t cpa1 = CHARREF(nextexact, exactend);
3494
3495 if (CHARMATCH(cin0, cpa1) &&
3496 CHARMATCH(cin1, cpa0)) {
3497 patinput = nextin;
3498 CHARINC(patinput, patinend);
3499 exactpos = nextexact;
3500 CHARINC(exactpos, exactend);
3501 if (patmatch(scan))
3502 return 1;
3503
3504 patglobflags = savglobflags;
3505 errsfound = saverrsfound;
3506 }
3507 }
3508
3509 /*
3510 * Try moving up both strings.
3511 */
3512 patinput = nextin;
3513 exactpos = nextexact;
3514 if (patmatch(scan))
3515 return 1;
3516
3517 patinput = save;
3518 patglobflags = savglobflags;
3519 errsfound = saverrsfound;
3520 exactpos = savexact;
3521 }
3522
3523 DPUTS(exactpos == exactend, "approximating too far");
3524 /*
3525 * Try moving up the exact match pattern.
3526 * This must be the last attempt, so just loop
3527 * instead of calling recursively.
3528 */
3529 CHARINC(exactpos, exactend);
3530 continue;
3531 }
3532 }
3533 exactpos = NULL;
3534 return 0;
3535 }
3536
3537 scan = next;
3538
3539 /* Allow handlers to run once per loop */
3540 check_for_signals();
3541 }
3542
3543 return 0;
3544 }
3545
3546
3547 /**/
3548 #ifdef MULTIBYTE_SUPPORT
3549
3550 /*
3551 * See if character ch matches a pattern range specification.
3552 * The null-terminated specification is in range; the test
3553 * character is in ch.
3554 *
3555 * zmb is one of the enum defined above charref(), for indicating
3556 * incomplete or invalid multibyte characters.
3557 *
3558 * indptr is used by completion matching, which is why this
3559 * function is exported. If indptr is not NULL we set *indptr
3560 * to the index of the character in the range string, adjusted
3561 * in the case of "A-B" ranges such that A would count as its
3562 * normal index (say IA), B would count as IA + (B-A), and any
3563 * character within the range as appropriate. We're not strictly
3564 * guaranteed this fits within a wint_t, but if this is Unicode
3565 * in 32 bits we have a fair amount of distance left over.
3566 *
3567 * mtp is used in the same circumstances. *mtp returns the match type:
3568 * 0 for a standard character, else the PP_ index. It's not
3569 * useful if the match failed.
3570 */
3571
3572 /**/
3573 mod_export int
mb_patmatchrange(char * range,wchar_t ch,int zmb_ind,wint_t * indptr,int * mtp)3574 mb_patmatchrange(char *range, wchar_t ch, int zmb_ind, wint_t *indptr, int *mtp)
3575 {
3576 wchar_t r1, r2;
3577
3578 if (indptr)
3579 *indptr = 0;
3580 /*
3581 * Careful here: unlike other strings, range is a NULL-terminated,
3582 * metafied string, because we need to treat the Posix and hyphenated
3583 * ranges specially.
3584 */
3585 while (*range) {
3586 if (imeta(STOUC(*range))) {
3587 int swtype = STOUC(*range++) - STOUC(Meta);
3588 if (mtp)
3589 *mtp = swtype;
3590 switch (swtype) {
3591 case 0:
3592 /* ordinary metafied character */
3593 range--;
3594 if (metacharinc(&range) == ch)
3595 return 1;
3596 break;
3597 case PP_ALPHA:
3598 if (iswalpha(ch))
3599 return 1;
3600 break;
3601 case PP_ALNUM:
3602 if (iswalnum(ch))
3603 return 1;
3604 break;
3605 case PP_ASCII:
3606 if ((ch & ~0x7f) == 0)
3607 return 1;
3608 break;
3609 case PP_BLANK:
3610 #if !defined(HAVE_ISWBLANK) && !defined(iswblank)
3611 /*
3612 * iswblank() is GNU and C99. There's a remote chance that some
3613 * systems still don't support it (but would support the other ones
3614 * if MULTIBYTE_SUPPORT is enabled).
3615 */
3616 #define iswblank(c) (c == L' ' || c == L'\t')
3617 #endif
3618 if (iswblank(ch))
3619 return 1;
3620 break;
3621 case PP_CNTRL:
3622 if (iswcntrl(ch))
3623 return 1;
3624 break;
3625 case PP_DIGIT:
3626 if (iswdigit(ch))
3627 return 1;
3628 break;
3629 case PP_GRAPH:
3630 if (iswgraph(ch))
3631 return 1;
3632 break;
3633 case PP_LOWER:
3634 if (iswlower(ch))
3635 return 1;
3636 break;
3637 case PP_PRINT:
3638 if (WC_ISPRINT(ch))
3639 return 1;
3640 break;
3641 case PP_PUNCT:
3642 if (iswpunct(ch))
3643 return 1;
3644 break;
3645 case PP_SPACE:
3646 if (iswspace(ch))
3647 return 1;
3648 break;
3649 case PP_UPPER:
3650 if (iswupper(ch))
3651 return 1;
3652 break;
3653 case PP_XDIGIT:
3654 if (iswxdigit(ch))
3655 return 1;
3656 break;
3657 case PP_IDENT:
3658 if (wcsitype(ch, IIDENT))
3659 return 1;
3660 break;
3661 case PP_IFS:
3662 if (wcsitype(ch, ISEP))
3663 return 1;
3664 break;
3665 case PP_IFSSPACE:
3666 /* must be ASCII space character */
3667 if (ch < 128 && iwsep((int)ch))
3668 return 1;
3669 break;
3670 case PP_WORD:
3671 if (wcsitype(ch, IWORD))
3672 return 1;
3673 break;
3674 case PP_RANGE:
3675 r1 = metacharinc(&range);
3676 r2 = metacharinc(&range);
3677 if (r1 <= ch && ch <= r2) {
3678 if (indptr)
3679 *indptr += ch - r1;
3680 return 1;
3681 }
3682 /* Careful not to screw up counting with bogus range */
3683 if (indptr && r1 < r2) {
3684 /*
3685 * This gets incremented again below to get
3686 * us past the range end. This is correct.
3687 */
3688 *indptr += r2 - r1;
3689 }
3690 break;
3691 case PP_INCOMPLETE:
3692 if (zmb_ind == ZMB_INCOMPLETE)
3693 return 1;
3694 break;
3695 case PP_INVALID:
3696 if (zmb_ind == ZMB_INVALID)
3697 return 1;
3698 break;
3699 case PP_UNKWN:
3700 DPUTS(1, "BUG: unknown posix range passed through.\n");
3701 break;
3702 default:
3703 DPUTS(1, "BUG: unknown metacharacter in range.");
3704 break;
3705 }
3706 } else if (metacharinc(&range) == ch) {
3707 if (mtp)
3708 *mtp = 0;
3709 return 1;
3710 }
3711 if (indptr)
3712 (*indptr)++;
3713 }
3714 return 0;
3715 }
3716
3717
3718 /*
3719 * This is effectively the reverse of mb_patmatchrange().
3720 * Given a range descriptor of the same form, and an index into it,
3721 * try to determine the character that is matched. If the index
3722 * points to a [:...:] generic style match, set chr to WEOF and
3723 * return the type in mtp instead. Return 1 if successful, 0 if
3724 * there was no corresponding index. Note all pointer arguments
3725 * must be non-null.
3726 */
3727
3728 /**/
3729 mod_export int
mb_patmatchindex(char * range,wint_t ind,wint_t * chr,int * mtp)3730 mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
3731 {
3732 wchar_t r1, r2, rchr;
3733 wint_t rdiff;
3734
3735 *chr = WEOF;
3736 *mtp = 0;
3737
3738 while (*range) {
3739 if (imeta(STOUC(*range))) {
3740 int swtype = STOUC(*range++) - STOUC(Meta);
3741 switch (swtype) {
3742 case 0:
3743 range--;
3744 rchr = metacharinc(&range);
3745 if (!ind) {
3746 *chr = (wint_t) rchr;
3747 return 1;
3748 }
3749 break;
3750
3751 case PP_ALPHA:
3752 case PP_ALNUM:
3753 case PP_ASCII:
3754 case PP_BLANK:
3755 case PP_CNTRL:
3756 case PP_DIGIT:
3757 case PP_GRAPH:
3758 case PP_LOWER:
3759 case PP_PRINT:
3760 case PP_PUNCT:
3761 case PP_SPACE:
3762 case PP_UPPER:
3763 case PP_XDIGIT:
3764 case PP_IDENT:
3765 case PP_IFS:
3766 case PP_IFSSPACE:
3767 case PP_WORD:
3768 case PP_INCOMPLETE:
3769 case PP_INVALID:
3770 if (!ind) {
3771 *mtp = swtype;
3772 return 1;
3773 }
3774 break;
3775
3776 case PP_RANGE:
3777 r1 = metacharinc(&range);
3778 r2 = metacharinc(&range);
3779 rdiff = (wint_t)r2 - (wint_t)r1;
3780 if (rdiff >= ind) {
3781 *chr = (wint_t)r1 + ind;
3782 return 1;
3783 }
3784 /* note the extra decrement to ind below */
3785 ind -= rdiff;
3786 break;
3787 case PP_UNKWN:
3788 DPUTS(1, "BUG: unknown posix range passed through.\n");
3789 break;
3790 default:
3791 DPUTS(1, "BUG: unknown metacharacter in range.");
3792 break;
3793 }
3794 } else {
3795 rchr = metacharinc(&range);
3796 if (!ind) {
3797 *chr = (wint_t)rchr;
3798 return 1;
3799 }
3800 }
3801 if (!ind--)
3802 break;
3803 }
3804
3805 /* No corresponding index. */
3806 return 0;
3807 }
3808
3809 /**/
3810 #endif /* MULTIBYTE_SUPPORT */
3811
3812 /*
3813 * Identical function to mb_patmatchrange() above for single-byte
3814 * characters.
3815 */
3816
3817 /**/
3818 mod_export int
patmatchrange(char * range,int ch,int * indptr,int * mtp)3819 patmatchrange(char *range, int ch, int *indptr, int *mtp)
3820 {
3821 int r1, r2;
3822
3823 if (indptr)
3824 *indptr = 0;
3825 /*
3826 * Careful here: unlike other strings, range is a NULL-terminated,
3827 * metafied string, because we need to treat the Posix and hyphenated
3828 * ranges specially.
3829 */
3830 for (; *range; range++) {
3831 if (imeta(STOUC(*range))) {
3832 int swtype = STOUC(*range) - STOUC(Meta);
3833 if (mtp)
3834 *mtp = swtype;
3835 switch (swtype) {
3836 case 0:
3837 if (STOUC(*++range ^ 32) == ch)
3838 return 1;
3839 break;
3840 case PP_ALPHA:
3841 if (isalpha(ch))
3842 return 1;
3843 break;
3844 case PP_ALNUM:
3845 if (isalnum(ch))
3846 return 1;
3847 break;
3848 case PP_ASCII:
3849 if ((ch & ~0x7f) == 0)
3850 return 1;
3851 break;
3852 case PP_BLANK:
3853 #if !defined(HAVE_ISBLANK) && !defined(isblank)
3854 /*
3855 * isblank() is GNU and C99. There's a remote chance that some
3856 * systems still don't support it.
3857 */
3858 #define isblank(c) (c == ' ' || c == '\t')
3859 #endif
3860 if (isblank(ch))
3861 return 1;
3862 break;
3863 case PP_CNTRL:
3864 if (iscntrl(ch))
3865 return 1;
3866 break;
3867 case PP_DIGIT:
3868 if (isdigit(ch))
3869 return 1;
3870 break;
3871 case PP_GRAPH:
3872 if (isgraph(ch))
3873 return 1;
3874 break;
3875 case PP_LOWER:
3876 if (islower(ch))
3877 return 1;
3878 break;
3879 case PP_PRINT:
3880 if (ZISPRINT(ch))
3881 return 1;
3882 break;
3883 case PP_PUNCT:
3884 if (ispunct(ch))
3885 return 1;
3886 break;
3887 case PP_SPACE:
3888 if (isspace(ch))
3889 return 1;
3890 break;
3891 case PP_UPPER:
3892 if (isupper(ch))
3893 return 1;
3894 break;
3895 case PP_XDIGIT:
3896 if (isxdigit(ch))
3897 return 1;
3898 break;
3899 case PP_IDENT:
3900 if (iident(ch))
3901 return 1;
3902 break;
3903 case PP_IFS:
3904 if (isep(ch))
3905 return 1;
3906 break;
3907 case PP_IFSSPACE:
3908 if (iwsep(ch))
3909 return 1;
3910 break;
3911 case PP_WORD:
3912 if (iword(ch))
3913 return 1;
3914 break;
3915 case PP_RANGE:
3916 range++;
3917 r1 = STOUC(UNMETA(range));
3918 METACHARINC(range);
3919 r2 = STOUC(UNMETA(range));
3920 if (*range == Meta)
3921 range++;
3922 if (r1 <= ch && ch <= r2) {
3923 if (indptr)
3924 *indptr += ch - r1;
3925 return 1;
3926 }
3927 if (indptr && r1 < r2)
3928 *indptr += r2 - r1;
3929 break;
3930 case PP_INCOMPLETE:
3931 case PP_INVALID:
3932 /* Never true if not in multibyte mode */
3933 break;
3934 case PP_UNKWN:
3935 DPUTS(1, "BUG: unknown posix range passed through.\n");
3936 break;
3937 default:
3938 DPUTS(1, "BUG: unknown metacharacter in range.");
3939 break;
3940 }
3941 } else if (STOUC(*range) == ch) {
3942 if (mtp)
3943 *mtp = 0;
3944 return 1;
3945 }
3946 if (indptr)
3947 (*indptr)++;
3948 }
3949 return 0;
3950 }
3951
3952
3953 /**/
3954 #ifndef MULTIBYTE_SUPPORT
3955
3956 /*
3957 * Identical function to mb_patmatchindex() above for single-byte
3958 * characters. Here -1 represents a character that needs a special type.
3959 *
3960 * Unlike patmatchrange, we only need this in ZLE, which always
3961 * uses MULTIBYTE_SUPPORT if compiled in; hence we don't use
3962 * this function in that case.
3963 */
3964
3965 /**/
3966 mod_export int
patmatchindex(char * range,int ind,int * chr,int * mtp)3967 patmatchindex(char *range, int ind, int *chr, int *mtp)
3968 {
3969 int r1, r2, rdiff, rchr;
3970
3971 *chr = -1;
3972 *mtp = 0;
3973
3974 for (; *range; range++) {
3975 if (imeta(STOUC(*range))) {
3976 int swtype = STOUC(*range) - STOUC(Meta);
3977 switch (swtype) {
3978 case 0:
3979 /* ordinary metafied character */
3980 rchr = STOUC(*++range) ^ 32;
3981 if (!ind) {
3982 *chr = rchr;
3983 return 1;
3984 }
3985 break;
3986
3987 case PP_ALPHA:
3988 case PP_ALNUM:
3989 case PP_ASCII:
3990 case PP_BLANK:
3991 case PP_CNTRL:
3992 case PP_DIGIT:
3993 case PP_GRAPH:
3994 case PP_LOWER:
3995 case PP_PRINT:
3996 case PP_PUNCT:
3997 case PP_SPACE:
3998 case PP_UPPER:
3999 case PP_XDIGIT:
4000 case PP_IDENT:
4001 case PP_IFS:
4002 case PP_IFSSPACE:
4003 case PP_WORD:
4004 case PP_INCOMPLETE:
4005 case PP_INVALID:
4006 if (!ind) {
4007 *mtp = swtype;
4008 return 1;
4009 }
4010 break;
4011
4012 case PP_RANGE:
4013 range++;
4014 r1 = STOUC(UNMETA(range));
4015 METACHARINC(range);
4016 r2 = STOUC(UNMETA(range));
4017 if (*range == Meta)
4018 range++;
4019 rdiff = r2 - r1;
4020 if (rdiff >= ind) {
4021 *chr = r1 + ind;
4022 return 1;
4023 }
4024 /* note the extra decrement to ind below */
4025 ind -= rdiff;
4026 break;
4027 case PP_UNKWN:
4028 DPUTS(1, "BUG: unknown posix range passed through.\n");
4029 break;
4030 default:
4031 DPUTS(1, "BUG: unknown metacharacter in range.");
4032 break;
4033 }
4034 } else {
4035 if (!ind) {
4036 *chr = STOUC(*range);
4037 return 1;
4038 }
4039 }
4040 if (!ind--)
4041 break;
4042 }
4043
4044 /* No corresponding index. */
4045 return 0;
4046 }
4047
4048 /**/
4049 #endif /* MULTIBYTE_SUPPORT */
4050
4051 /*
4052 * Repeatedly match something simple and say how many times.
4053 * charstart is an array parallel to that starting at patinput
4054 * and records the start of (possibly multibyte) characters
4055 * to aid in later backtracking.
4056 */
4057
4058 /**/
patrepeat(Upat p,char * charstart)4059 static int patrepeat(Upat p, char *charstart)
4060 {
4061 int count = 0;
4062 patint_t tch, charmatch_cache;
4063 char *scan, *opnd;
4064
4065 scan = patinput;
4066 opnd = (char *)P_OPERAND(p);
4067
4068 switch(P_OP(p)) {
4069 #ifdef DEBUG
4070 case P_ANY:
4071 dputs("BUG: ?# did not get optimized to *");
4072 return 0;
4073 break;
4074 #endif
4075 case P_EXACTLY:
4076 DPUTS(P_LS_LEN(p) != 1, "closure following more than one character");
4077 tch = CHARREF(P_LS_STR(p), P_LS_STR(p) + P_LS_LEN(p));
4078 while (scan < patinend &&
4079 CHARMATCH_EXPR(CHARREF(scan, patinend), tch)) {
4080 charstart[scan-patinput] = 1;
4081 count++;
4082 CHARINC(scan, patinend);
4083 }
4084 break;
4085 case P_ANYOF:
4086 case P_ANYBUT:
4087 while (scan < patinend) {
4088 #ifdef MULTIBYTE_SUPPORT
4089 int zmb_ind;
4090 wchar_t cr = charref(scan, patinend, &zmb_ind);
4091 if (patglobflags & GF_MULTIBYTE) {
4092 if (mb_patmatchrange(opnd, cr, zmb_ind, NULL, NULL) ^
4093 (P_OP(p) == P_ANYOF))
4094 break;
4095 } else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^
4096 (P_OP(p) == P_ANYOF))
4097 break;
4098 #else
4099 if (patmatchrange(opnd, CHARREF(scan, patinend), NULL, NULL) ^
4100 (P_OP(p) == P_ANYOF))
4101 break;
4102 #endif
4103 charstart[scan-patinput] = 1;
4104 count++;
4105 CHARINC(scan, patinend);
4106 }
4107 break;
4108 #ifdef DEBUG
4109 default:
4110 dputs("BUG: something very strange is happening in patrepeat");
4111 return 0;
4112 break;
4113 #endif
4114 }
4115
4116 patinput = scan;
4117 return count;
4118 }
4119
4120 /* Free a patprog. */
4121
4122 /**/
4123 mod_export void
freepatprog(Patprog prog)4124 freepatprog(Patprog prog)
4125 {
4126 if (prog && prog != dummy_patprog1 && prog != dummy_patprog2)
4127 zfree(prog, prog->size);
4128 }
4129
4130 /* Disable or reenable a pattern character */
4131
4132 /**/
4133 int
pat_enables(const char * cmd,char ** patp,int enable)4134 pat_enables(const char *cmd, char **patp, int enable)
4135 {
4136 int ret = 0;
4137 const char **stringp;
4138 char *disp;
4139
4140 if (!*patp) {
4141 int done = 0;
4142 for (stringp = zpc_strings, disp = zpc_disables;
4143 stringp < zpc_strings + ZPC_COUNT;
4144 stringp++, disp++) {
4145 if (!*stringp)
4146 continue;
4147 if (enable ? *disp : !*disp)
4148 continue;
4149 if (done)
4150 putc(' ', stdout);
4151 printf("'%s'", *stringp);
4152 done = 1;
4153 }
4154 if (done)
4155 putc('\n', stdout);
4156 return 0;
4157 }
4158
4159 for (; *patp; patp++) {
4160 for (stringp = zpc_strings, disp = zpc_disables;
4161 stringp < zpc_strings + ZPC_COUNT;
4162 stringp++, disp++) {
4163 if (*stringp && !strcmp(*stringp, *patp)) {
4164 *disp = (char)!enable;
4165 break;
4166 }
4167 }
4168 if (stringp == zpc_strings + ZPC_COUNT) {
4169 zerrnam(cmd, "invalid pattern: %s", *patp);
4170 ret = 1;
4171 }
4172 }
4173
4174 return ret;
4175 }
4176
4177 /*
4178 * Save the current state of pattern disables, returning the saved value.
4179 */
4180
4181 /**/
4182 unsigned int
savepatterndisables(void)4183 savepatterndisables(void)
4184 {
4185 unsigned int disables, bit;
4186 char *disp;
4187
4188 disables = 0;
4189 for (bit = 1, disp = zpc_disables;
4190 disp < zpc_disables + ZPC_COUNT;
4191 bit <<= 1, disp++) {
4192 if (*disp)
4193 disables |= bit;
4194 }
4195 return disables;
4196 }
4197
4198 /*
4199 * Function scope saving pattern enables.
4200 */
4201
4202 /**/
4203 void
startpatternscope(void)4204 startpatternscope(void)
4205 {
4206 Zpc_disables_save newdis;
4207
4208 newdis = (Zpc_disables_save)zalloc(sizeof(*newdis));
4209 newdis->next = zpc_disables_stack;
4210 newdis->disables = savepatterndisables();
4211
4212 zpc_disables_stack = newdis;
4213 }
4214
4215 /*
4216 * Restore completely the state of pattern disables.
4217 */
4218
4219 /**/
4220 void
restorepatterndisables(unsigned int disables)4221 restorepatterndisables(unsigned int disables)
4222 {
4223 char *disp;
4224 unsigned int bit;
4225
4226 for (bit = 1, disp = zpc_disables;
4227 disp < zpc_disables + ZPC_COUNT;
4228 bit <<= 1, disp++) {
4229 if (disables & bit)
4230 *disp = 1;
4231 else
4232 *disp = 0;
4233 }
4234 }
4235
4236 /*
4237 * Function scope to restore pattern enables if localpatterns is turned on.
4238 */
4239
4240 /**/
4241 void
endpatternscope(void)4242 endpatternscope(void)
4243 {
4244 Zpc_disables_save olddis;
4245
4246 olddis = zpc_disables_stack;
4247 zpc_disables_stack = olddis->next;
4248
4249 if (isset(LOCALPATTERNS))
4250 restorepatterndisables(olddis->disables);
4251
4252 zfree(olddis, sizeof(*olddis));
4253 }
4254
4255 /* Reinitialise pattern disables */
4256
4257 /**/
4258 void
clearpatterndisables(void)4259 clearpatterndisables(void)
4260 {
4261 memset(zpc_disables, 0, ZPC_COUNT);
4262 }
4263
4264
4265 /* Check to see if str is eligible for filename generation. */
4266
4267 /**/
4268 mod_export int
haswilds(char * str)4269 haswilds(char *str)
4270 {
4271 char *start;
4272
4273 /* `[' and `]' are legal even if bad patterns are usually not. */
4274 if ((*str == Inbrack || *str == Outbrack) && !str[1])
4275 return 0;
4276
4277 /* If % is immediately followed by ?, then that ? is *
4278 * not treated as a wildcard. This is so you don't have *
4279 * to escape job references such as %?foo. */
4280 if (str[0] == '%' && str[1] == Quest)
4281 str[1] = '?';
4282
4283 /*
4284 * Note that at this point zpc_special has not been set up.
4285 */
4286 start = str;
4287 for (; *str; str++) {
4288 switch (*str) {
4289 case Inpar:
4290 if ((!isset(SHGLOB) && !zpc_disables[ZPC_INPAR]) ||
4291 (str > start && isset(KSHGLOB) &&
4292 ((str[-1] == Quest && !zpc_disables[ZPC_KSH_QUEST]) ||
4293 (str[-1] == Star && !zpc_disables[ZPC_KSH_STAR]) ||
4294 (str[-1] == '+' && !zpc_disables[ZPC_KSH_PLUS]) ||
4295 (str[-1] == Bang && !zpc_disables[ZPC_KSH_BANG]) ||
4296 (str[-1] == '!' && !zpc_disables[ZPC_KSH_BANG2]) ||
4297 (str[-1] == '@' && !zpc_disables[ZPC_KSH_AT]))))
4298 return 1;
4299 break;
4300
4301 case Bar:
4302 if (!zpc_disables[ZPC_BAR])
4303 return 1;
4304 break;
4305
4306 case Star:
4307 if (!zpc_disables[ZPC_STAR])
4308 return 1;
4309 break;
4310
4311 case Inbrack:
4312 if (!zpc_disables[ZPC_INBRACK])
4313 return 1;
4314 break;
4315
4316 case Inang:
4317 if (!zpc_disables[ZPC_INANG])
4318 return 1;
4319 break;
4320
4321 case Quest:
4322 if (!zpc_disables[ZPC_QUEST])
4323 return 1;
4324 break;
4325
4326 case Pound:
4327 if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HASH])
4328 return 1;
4329 break;
4330
4331 case Hat:
4332 if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HAT])
4333 return 1;
4334 break;
4335 }
4336 }
4337 return 0;
4338 }
4339