1 /*
2  * glob.c - filename generation
3  *
4  * This file is part of zsh, the Z shell.
5  *
6  * Copyright (c) 1992-1997 Paul Falstad
7  * All rights reserved.
8  *
9  * Permission is hereby granted, without written agreement and without
10  * license or royalty fees, to use, copy, modify, and distribute this
11  * software and to distribute modified versions of this software for any
12  * purpose, provided that the above copyright notice and the following
13  * two paragraphs appear in all copies of this software.
14  *
15  * In no event shall Paul Falstad or the Zsh Development Group be liable
16  * to any party for direct, indirect, special, incidental, or consequential
17  * damages arising out of the use of this software and its documentation,
18  * even if Paul Falstad and the Zsh Development Group have been advised of
19  * the possibility of such damage.
20  *
21  * Paul Falstad and the Zsh Development Group specifically disclaim any
22  * warranties, including, but not limited to, the implied warranties of
23  * merchantability and fitness for a particular purpose.  The software
24  * provided hereunder is on an "as is" basis, and Paul Falstad and the
25  * Zsh Development Group have no obligation to provide maintenance,
26  * support, updates, enhancements, or modifications.
27  *
28  */
29 
30 #include "zsh.mdh"
31 #include "glob.pro"
32 
33 #if defined(OFF_T_IS_64_BIT) && defined(__GNUC__)
34 # define ALIGN64 __attribute__((aligned(8)))
35 #else
36 # define ALIGN64
37 #endif
38 
39 /* flag for CSHNULLGLOB */
40 
41 typedef struct gmatch *Gmatch;
42 
43 struct gmatch {
44     /* Metafied file name */
45     char *name;
46     /* Unmetafied file name; embedded nulls can't occur in file names */
47     char *uname;
48     /*
49      * Array of sort strings:  one for each GS_EXEC sort type in
50      * the glob qualifiers.
51      */
52     char **sortstrs;
53     off_t size ALIGN64;
54     long atime;
55     long mtime;
56     long ctime;
57     long links;
58     off_t _size ALIGN64;
59     long _atime;
60     long _mtime;
61     long _ctime;
62     long _links;
63 #ifdef GET_ST_ATIME_NSEC
64     long ansec;
65     long _ansec;
66 #endif
67 #ifdef GET_ST_MTIME_NSEC
68     long mnsec;
69     long _mnsec;
70 #endif
71 #ifdef GET_ST_CTIME_NSEC
72     long cnsec;
73     long _cnsec;
74 #endif
75 };
76 
77 #define GS_NAME   1
78 #define GS_DEPTH  2
79 #define GS_EXEC	  4
80 
81 #define GS_SHIFT_BASE	8
82 
83 #define GS_SIZE  (GS_SHIFT_BASE)
84 #define GS_ATIME (GS_SHIFT_BASE << 1)
85 #define GS_MTIME (GS_SHIFT_BASE << 2)
86 #define GS_CTIME (GS_SHIFT_BASE << 3)
87 #define GS_LINKS (GS_SHIFT_BASE << 4)
88 
89 #define GS_SHIFT  5
90 #define GS__SIZE  (GS_SIZE << GS_SHIFT)
91 #define GS__ATIME (GS_ATIME << GS_SHIFT)
92 #define GS__MTIME (GS_MTIME << GS_SHIFT)
93 #define GS__CTIME (GS_CTIME << GS_SHIFT)
94 #define GS__LINKS (GS_LINKS << GS_SHIFT)
95 
96 #define GS_DESC  (GS_SHIFT_BASE << (2*GS_SHIFT))
97 #define GS_NONE  (GS_SHIFT_BASE << (2*GS_SHIFT+1))
98 
99 #define GS_NORMAL (GS_SIZE | GS_ATIME | GS_MTIME | GS_CTIME | GS_LINKS)
100 #define GS_LINKED (GS_NORMAL << GS_SHIFT)
101 
102 /**/
103 int badcshglob;
104 
105 /**/
106 int pathpos;		/* position in pathbuf (needed by pattern code) */
107 
108 /*
109  * pathname buffer (needed by pattern code).
110  * It is currently believed the string in here is stored metafied and is
111  * unmetafied temporarily as needed by system calls.
112  */
113 
114 /**/
115 char *pathbuf;
116 
117 typedef struct stat *Statptr;	 /* This makes the Ultrix compiler happy.  Go figure. */
118 
119 /* modifier for unit conversions */
120 
121 #define TT_DAYS 0
122 #define TT_HOURS 1
123 #define TT_MINS 2
124 #define TT_WEEKS 3
125 #define TT_MONTHS 4
126 #define TT_SECONDS 5
127 
128 #define TT_BYTES 0
129 #define TT_POSIX_BLOCKS 1
130 #define TT_KILOBYTES 2
131 #define TT_MEGABYTES 3
132 #define TT_GIGABYTES 4
133 #define TT_TERABYTES 5
134 
135 
136 typedef int (*TestMatchFunc) _((char *, struct stat *, off_t, char *));
137 
138 struct qual {
139     struct qual *next;		/* Next qualifier, must match                */
140     struct qual *or;		/* Alternative set of qualifiers to match    */
141     TestMatchFunc func;		/* Function to call to test match            */
142     off_t data ALIGN64;		/* Argument passed to function               */
143     int sense;			/* Whether asserting or negating             */
144     int amc;			/* Flag for which time to test (a, m, c)     */
145     int range;			/* Whether to test <, > or = (as per signum) */
146     int units;			/* Multiplier for time or size, respectively */
147     char *sdata;		/* currently only: expression to eval        */
148 };
149 
150 /* Prefix, suffix for doing zle trickery */
151 
152 /**/
153 mod_export char *glob_pre, *glob_suf;
154 
155 /* Element of a glob sort */
156 struct globsort {
157     /* Sort type */
158     int tp;
159     /* Sort code to eval, if type is GS_EXEC */
160     char *exec;
161 };
162 
163 /* Maximum entries in sort array */
164 #define MAX_SORTS	(12)
165 
166 /* struct to easily save/restore current state */
167 
168 struct globdata {
169     int gd_pathpos;
170     char *gd_pathbuf;
171 
172     int gd_matchsz;		/* size of matchbuf                     */
173     int gd_matchct;		/* number of matches found              */
174     int gd_pathbufsz;		/* size of pathbuf			*/
175     int gd_pathbufcwd;		/* where did we chdir()'ed		*/
176     Gmatch gd_matchbuf;		/* array of matches                     */
177     Gmatch gd_matchptr;		/* &matchbuf[matchct]                   */
178     char *gd_colonmod;		/* colon modifiers in qualifier list    */
179 
180     /* Qualifiers pertaining to current pattern */
181     struct qual *gd_quals;
182 
183     /* Other state values for current pattern */
184     int gd_qualct, gd_qualorct;
185     int gd_range, gd_amc, gd_units;
186     int gd_gf_nullglob, gd_gf_markdirs, gd_gf_noglobdots, gd_gf_listtypes;
187     int gd_gf_numsort;
188     int gd_gf_follow, gd_gf_sorts, gd_gf_nsorts;
189     struct globsort gd_gf_sortlist[MAX_SORTS];
190     LinkList gd_gf_pre_words, gd_gf_post_words;
191 
192     char *gd_glob_pre, *gd_glob_suf;
193 };
194 
195 /* The variable with the current globbing state and convenience macros */
196 
197 static struct globdata curglobdata;
198 
199 #define matchsz       (curglobdata.gd_matchsz)
200 #define matchct       (curglobdata.gd_matchct)
201 #define pathbufsz     (curglobdata.gd_pathbufsz)
202 #define pathbufcwd    (curglobdata.gd_pathbufcwd)
203 #define matchbuf      (curglobdata.gd_matchbuf)
204 #define matchptr      (curglobdata.gd_matchptr)
205 #define colonmod      (curglobdata.gd_colonmod)
206 #define quals         (curglobdata.gd_quals)
207 #define qualct        (curglobdata.gd_qualct)
208 #define qualorct      (curglobdata.gd_qualorct)
209 #define g_range       (curglobdata.gd_range)
210 #define g_amc         (curglobdata.gd_amc)
211 #define g_units       (curglobdata.gd_units)
212 #define gf_nullglob   (curglobdata.gd_gf_nullglob)
213 #define gf_markdirs   (curglobdata.gd_gf_markdirs)
214 #define gf_noglobdots (curglobdata.gd_gf_noglobdots)
215 #define gf_listtypes  (curglobdata.gd_gf_listtypes)
216 #define gf_numsort    (curglobdata.gd_gf_numsort)
217 #define gf_follow     (curglobdata.gd_gf_follow)
218 #define gf_sorts      (curglobdata.gd_gf_sorts)
219 #define gf_nsorts     (curglobdata.gd_gf_nsorts)
220 #define gf_sortlist   (curglobdata.gd_gf_sortlist)
221 #define gf_pre_words  (curglobdata.gd_gf_pre_words)
222 #define gf_post_words (curglobdata.gd_gf_post_words)
223 
224 /* and macros for save/restore */
225 
226 #define save_globstate(N) \
227   do { \
228     queue_signals(); \
229     memcpy(&(N), &curglobdata, sizeof(struct globdata)); \
230     (N).gd_pathpos = pathpos; \
231     (N).gd_pathbuf = pathbuf; \
232     (N).gd_glob_pre = glob_pre; \
233     (N).gd_glob_suf = glob_suf; \
234     pathbuf = NULL; \
235     unqueue_signals(); \
236   } while (0)
237 
238 #define restore_globstate(N) \
239   do { \
240     queue_signals(); \
241     zfree(pathbuf, pathbufsz); \
242     memcpy(&curglobdata, &(N), sizeof(struct globdata)); \
243     pathpos = (N).gd_pathpos; \
244     pathbuf = (N).gd_pathbuf; \
245     glob_pre = (N).gd_glob_pre; \
246     glob_suf = (N).gd_glob_suf; \
247     unqueue_signals(); \
248   } while (0)
249 
250 /* pathname component in filename patterns */
251 
252 struct complist {
253     Complist next;
254     Patprog pat;
255     int closure;		/* 1 if this is a (foo/)# */
256     int follow; 		/* 1 to go thru symlinks */
257 };
258 
259 /* Add a component to pathbuf: This keeps track of how    *
260  * far we are into a file name, since each path component *
261  * must be matched separately.                            */
262 
263 /**/
264 static void
addpath(char * s,int l)265 addpath(char *s, int l)
266 {
267     DPUTS(!pathbuf, "BUG: pathbuf not initialised");
268     while (pathpos + l + 1 >= pathbufsz)
269 	pathbuf = zrealloc(pathbuf, pathbufsz *= 2);
270     while (l--)
271 	pathbuf[pathpos++] = *s++;
272     pathbuf[pathpos++] = '/';
273     pathbuf[pathpos] = '\0';
274 }
275 
276 /* stat the filename s appended to pathbuf.  l should be true for lstat,    *
277  * false for stat.  If st is NULL, the file is only checked for existence.  *
278  * s == "" is treated as s == ".".  This is necessary since on most systems *
279  * foo/ can be used to reference a non-directory foo.  Returns nonzero if   *
280  * the file does not exists.                                                */
281 
282 /**/
283 static int
statfullpath(const char * s,struct stat * st,int l)284 statfullpath(const char *s, struct stat *st, int l)
285 {
286     char buf[PATH_MAX+1];
287 
288     DPUTS(strlen(s) + !*s + pathpos - pathbufcwd >= PATH_MAX,
289 	  "BUG: statfullpath(): pathname too long");
290     strcpy(buf, pathbuf + pathbufcwd);
291     strcpy(buf + pathpos - pathbufcwd, s);
292     if (!*s && *buf) {
293 	/*
294 	 * Don't add the '.' if the path so far is empty, since
295 	 * then we get bogus empty strings inserted as files.
296 	 */
297 	buf[pathpos - pathbufcwd] = '.';
298 	buf[pathpos - pathbufcwd + 1] = '\0';
299 	l = 0;
300     }
301     unmetafy(buf, NULL);
302     if (!st) {
303 	char lbuf[1];
304 	return access(buf, F_OK) && (!l || readlink(buf, lbuf, 1) < 0);
305     }
306     return l ? lstat(buf, st) : stat(buf, st);
307 }
308 
309 /* This may be set by qualifier functions to an array of strings to insert
310  * into the list instead of the original string. */
311 
312 static char **inserts;
313 
314 /* add a match to the list */
315 
316 /**/
317 static void
insert(char * s,int checked)318 insert(char *s, int checked)
319 {
320     struct stat buf, buf2, *bp;
321     char *news = s;
322     int statted = 0;
323 
324     queue_signals();
325     inserts = NULL;
326 
327     if (gf_listtypes || gf_markdirs) {
328 	/* Add the type marker to the end of the filename */
329 	mode_t mode;
330 	checked = statted = 1;
331 	if (statfullpath(s, &buf, 1)) {
332 	    unqueue_signals();
333 	    return;
334 	}
335 	mode = buf.st_mode;
336 	if (gf_follow) {
337 	    if (!S_ISLNK(mode) || statfullpath(s, &buf2, 0))
338 		memcpy(&buf2, &buf, sizeof(buf));
339 	    statted |= 2;
340 	    mode = buf2.st_mode;
341 	}
342 	if (gf_listtypes || S_ISDIR(mode)) {
343 	    int ll = strlen(s);
344 
345 	    news = (char *) hcalloc(ll + 2);
346 	    strcpy(news, s);
347 	    news[ll] = file_type(mode);
348 	    news[ll + 1] = '\0';
349 	}
350     }
351     if (qualct || qualorct) {
352 	/* Go through the qualifiers, rejecting the file if appropriate */
353 	struct qual *qo, *qn;
354 
355 	if (!statted && statfullpath(s, &buf, 1)) {
356 	    unqueue_signals();
357 	    return;
358 	}
359 	news = dyncat(pathbuf, news);
360 
361 	statted = 1;
362 	qo = quals;
363 	for (qn = qo; qn && qn->func;) {
364 	    g_range = qn->range;
365 	    g_amc = qn->amc;
366 	    g_units = qn->units;
367 	    if ((qn->sense & 2) && !(statted & 2)) {
368 		/* If (sense & 2), we're following links */
369 		if (!S_ISLNK(buf.st_mode) || statfullpath(s, &buf2, 0))
370 		    memcpy(&buf2, &buf, sizeof(buf));
371 		statted |= 2;
372 	    }
373 	    bp = (qn->sense & 2) ? &buf2 : &buf;
374 	    /* Reject the file if the function returned zero *
375 	     * and the sense was positive (sense&1 == 0), or *
376 	     * vice versa.                                   */
377 	    if ((!((qn->func) (news, bp, qn->data, qn->sdata))
378 		 ^ qn->sense) & 1) {
379 		/* Try next alternative, or return if there are no more */
380 		if (!(qo = qo->or)) {
381 		    unqueue_signals();
382 		    return;
383 		}
384 		qn = qo;
385 		continue;
386 	    }
387 	    qn = qn->next;
388 	}
389     } else if (!checked) {
390 	if (statfullpath(s, &buf, 1)) {
391 	    unqueue_signals();
392 	    return;
393 	}
394 	statted = 1;
395 	news = dyncat(pathbuf, news);
396     } else
397 	news = dyncat(pathbuf, news);
398 
399     while (!inserts || (news = dupstring(*inserts++))) {
400 	if (colonmod) {
401 	    /* Handle the remainder of the qualifier:  e.g. (:r:s/foo/bar/). */
402 	    char *mod = colonmod;
403 	    modify(&news, &mod, 1);
404 	}
405 	if (!statted && (gf_sorts & GS_NORMAL)) {
406 	    statfullpath(s, &buf, 1);
407 	    statted = 1;
408 	}
409 	if (!(statted & 2) && (gf_sorts & GS_LINKED)) {
410 	    if (statted) {
411 		if (!S_ISLNK(buf.st_mode) || statfullpath(s, &buf2, 0))
412 		    memcpy(&buf2, &buf, sizeof(buf));
413 	    } else if (statfullpath(s, &buf2, 0))
414 		statfullpath(s, &buf2, 1);
415 	    statted |= 2;
416 	}
417 	matchptr->name = news;
418 	if (statted & 1) {
419 	    matchptr->size = buf.st_size;
420 	    matchptr->atime = buf.st_atime;
421 	    matchptr->mtime = buf.st_mtime;
422 	    matchptr->ctime = buf.st_ctime;
423 	    matchptr->links = buf.st_nlink;
424 #ifdef GET_ST_ATIME_NSEC
425 	    matchptr->ansec = GET_ST_ATIME_NSEC(buf);
426 #endif
427 #ifdef GET_ST_MTIME_NSEC
428 	    matchptr->mnsec = GET_ST_MTIME_NSEC(buf);
429 #endif
430 #ifdef GET_ST_CTIME_NSEC
431 	    matchptr->cnsec = GET_ST_CTIME_NSEC(buf);
432 #endif
433 	}
434 	if (statted & 2) {
435 	    matchptr->_size = buf2.st_size;
436 	    matchptr->_atime = buf2.st_atime;
437 	    matchptr->_mtime = buf2.st_mtime;
438 	    matchptr->_ctime = buf2.st_ctime;
439 	    matchptr->_links = buf2.st_nlink;
440 #ifdef GET_ST_ATIME_NSEC
441 	    matchptr->_ansec = GET_ST_ATIME_NSEC(buf2);
442 #endif
443 #ifdef GET_ST_MTIME_NSEC
444 	    matchptr->_mnsec = GET_ST_MTIME_NSEC(buf2);
445 #endif
446 #ifdef GET_ST_CTIME_NSEC
447 	    matchptr->_cnsec = GET_ST_CTIME_NSEC(buf2);
448 #endif
449 	}
450 	matchptr++;
451 
452 	if (++matchct == matchsz) {
453 	    matchbuf = (Gmatch)zrealloc((char *)matchbuf,
454 					sizeof(struct gmatch) * (matchsz *= 2));
455 
456 	    matchptr = matchbuf + matchct;
457 	}
458 	if (!inserts)
459 	    break;
460     }
461     unqueue_signals();
462     return;
463 }
464 
465 /* Do the globbing:  scanner is called recursively *
466  * with successive bits of the path until we've    *
467  * tried all of it.                                */
468 
469 /**/
470 static void
scanner(Complist q,int shortcircuit)471 scanner(Complist q, int shortcircuit)
472 {
473     Patprog p;
474     int closure;
475     int pbcwdsav = pathbufcwd;
476     int errssofar = errsfound;
477     struct dirsav ds;
478 
479     if (!q || errflag)
480 	return;
481     init_dirsav(&ds);
482 
483     if ((closure = q->closure)) {
484 	/* (foo/)# - match zero or more dirs */
485 	if (q->closure == 2)	/* (foo/)## - match one or more dirs */
486 	    q->closure = 1;
487 	else {
488 	    scanner(q->next, shortcircuit);
489 	    if (shortcircuit && shortcircuit == matchct)
490 		return;
491 	}
492     }
493     p = q->pat;
494     /* Now the actual matching for the current path section. */
495     if (p->flags & PAT_PURES) {
496 	/*
497 	 * It's a straight string to the end of the path section.
498 	 */
499 	char *str = (char *)p + p->startoff;
500 	int l = p->patmlen;
501 
502 	if (l + !l + pathpos - pathbufcwd >= PATH_MAX) {
503 	    int err;
504 
505 	    if (l >= PATH_MAX)
506 		return;
507 	    err = lchdir(unmeta(pathbuf + pathbufcwd), &ds, 0);
508 	    if (err == -1)
509 		return;
510 	    if (err) {
511 		zerr("current directory lost during glob");
512 		return;
513 	    }
514 	    pathbufcwd = pathpos;
515 	}
516 	if (q->next) {
517 	    /* Not the last path section. Just add it to the path. */
518 	    int oppos = pathpos;
519 
520 	    if (!errflag) {
521 		int add = 1;
522 
523 		if (q->closure && *pathbuf) {
524 		    if (!strcmp(str, "."))
525 			add = 0;
526 		    else if (!strcmp(str, "..")) {
527 			struct stat sc, sr;
528 
529 			add = (stat("/", &sr) || stat(unmeta(pathbuf), &sc) ||
530 			       sr.st_ino != sc.st_ino ||
531 			       sr.st_dev != sc.st_dev);
532 		    }
533 		}
534 		if (add) {
535 		    addpath(str, l);
536 		    if (!closure || !statfullpath("", NULL, 1)) {
537 			scanner((q->closure) ? q : q->next, shortcircuit);
538 			if (shortcircuit && shortcircuit == matchct)
539 			    return;
540 		    }
541 		    pathbuf[pathpos = oppos] = '\0';
542 		}
543 	    }
544 	} else {
545 	    if (str[l])
546 		str = dupstrpfx(str, l);
547 	    insert(str, 0);
548 	    if (shortcircuit && shortcircuit == matchct)
549 		return;
550 	}
551     } else {
552 	/* Do pattern matching on current path section. */
553 	char *fn = pathbuf[pathbufcwd] ? unmeta(pathbuf + pathbufcwd) : ".";
554 	int dirs = !!q->next;
555 	DIR *lock = opendir(fn);
556 	char *subdirs = NULL;
557 	int subdirlen = 0;
558 
559 	if (lock == NULL)
560 	    return;
561 	while ((fn = zreaddir(lock, 1)) && !errflag) {
562 	    /* prefix and suffix are zle trickery */
563 	    if (!dirs && !colonmod &&
564 		((glob_pre && !strpfx(glob_pre, fn))
565 		 || (glob_suf && !strsfx(glob_suf, fn))))
566 		continue;
567 	    errsfound = errssofar;
568 	    if (pattry(p, fn)) {
569 		/* if this name matches the pattern... */
570 		if (pbcwdsav == pathbufcwd &&
571 		    strlen(fn) + pathpos - pathbufcwd >= PATH_MAX) {
572 		    int err;
573 
574 		    DPUTS(pathpos == pathbufcwd,
575 			  "BUG: filename longer than PATH_MAX");
576 		    err = lchdir(unmeta(pathbuf + pathbufcwd), &ds, 0);
577 		    if (err == -1)
578 			break;
579 		    if (err) {
580 			zerr("current directory lost during glob");
581 			break;
582 		    }
583 		    pathbufcwd = pathpos;
584 		}
585 		if (dirs) {
586 		    int l;
587 
588 		    /*
589 		     * If not the last component in the path:
590 		     *
591 		     * If we made an approximation in the new path segment,
592 		     * then it is possible we made too many errors.  For
593 		     * example, (ab)#(cb)# will match the directory abcb
594 		     * with one error if allowed to, even though it can
595 		     * match with none.  This will stop later parts of the
596 		     * path matching, so we need to check by reducing the
597 		     * maximum number of errors and seeing if the directory
598 		     * still matches.  Luckily, this is not a terribly
599 		     * common case, since complex patterns typically occur
600 		     * in the last part of the path which is not affected
601 		     * by this problem.
602 		     */
603 		    if (errsfound > errssofar) {
604 			forceerrs = errsfound - 1;
605 			while (forceerrs >= errssofar) {
606 			    errsfound = errssofar;
607 			    if (!pattry(p, fn))
608 				break;
609 			    forceerrs = errsfound - 1;
610 			}
611 			errsfound = forceerrs + 1;
612 			forceerrs = -1;
613 		    }
614 		    if (closure) {
615 			/* if matching multiple directories */
616 			struct stat buf;
617 
618 			if (statfullpath(fn, &buf, !q->follow)) {
619 			    if (errno != ENOENT && errno != EINTR &&
620 				errno != ENOTDIR && !errflag) {
621 				zwarn("%e: %s", errno, fn);
622 			    }
623 			    continue;
624 			}
625 			if (!S_ISDIR(buf.st_mode))
626 			    continue;
627 		    }
628 		    l = strlen(fn) + 1;
629 		    subdirs = hrealloc(subdirs, subdirlen, subdirlen + l
630 				       + sizeof(int));
631 		    strcpy(subdirs + subdirlen, fn);
632 		    subdirlen += l;
633 		    /* store the count of errors made so far, too */
634 		    memcpy(subdirs + subdirlen, (char *)&errsfound,
635 			   sizeof(int));
636 		    subdirlen += sizeof(int);
637 		} else {
638 		    /* if the last filename component, just add it */
639 		    insert(fn, 1);
640 		    if (shortcircuit && shortcircuit == matchct) {
641 			closedir(lock);
642 			return;
643 		    }
644 		}
645 	    }
646 	}
647 	closedir(lock);
648 	if (subdirs) {
649 	    int oppos = pathpos;
650 
651 	    for (fn = subdirs; fn < subdirs+subdirlen; ) {
652 		int l = strlen(fn);
653 		addpath(fn, l);
654 		fn += l + 1;
655 		memcpy((char *)&errsfound, fn, sizeof(int));
656 		fn += sizeof(int);
657 		/* scan next level */
658 		scanner((q->closure) ? q : q->next, shortcircuit);
659 		if (shortcircuit && shortcircuit == matchct)
660 		    return;
661 		pathbuf[pathpos = oppos] = '\0';
662 	    }
663 	    hrealloc(subdirs, subdirlen, 0);
664 	}
665     }
666     if (pbcwdsav < pathbufcwd) {
667 	if (restoredir(&ds))
668 	    zerr("current directory lost during glob");
669 	zsfree(ds.dirname);
670 	if (ds.dirfd >= 0)
671 	    close(ds.dirfd);
672 	pathbufcwd = pbcwdsav;
673     }
674     return;
675 }
676 
677 /* This function tokenizes a zsh glob pattern */
678 
679 /**/
680 static Complist
parsecomplist(char * instr)681 parsecomplist(char *instr)
682 {
683     Patprog p1;
684     Complist l1;
685     char *str;
686     int compflags = gf_noglobdots ? (PAT_FILE|PAT_NOGLD) : PAT_FILE;
687 
688     if (instr[0] == Star && instr[1] == Star) {
689 	int shortglob = 0;
690         if (instr[2] == '/' || (instr[2] == Star && instr[3] == '/')
691 	    || (shortglob = isset(GLOBSTARSHORT))) {
692 	    /* Match any number of directories. */
693 	    int follow;
694 
695 	    /* with three stars, follow symbolic links */
696 	    follow = (instr[2] == Star);
697 	    /*
698 	     * With GLOBSTARSHORT, leave a star in place for the
699 	     * pattern inside the directory.
700 	     */
701 	    instr += ((shortglob ? 1 : 3) + follow);
702 
703 	    /* Now get the next path component if there is one. */
704 	    l1 = (Complist) zhalloc(sizeof *l1);
705 	    if ((l1->next = parsecomplist(instr)) == NULL) {
706 		errflag |= ERRFLAG_ERROR;
707 		return NULL;
708 	    }
709 	    l1->pat = patcompile(NULL, compflags | PAT_ANY, NULL);
710 	    l1->closure = 1;		/* ...zero or more times. */
711 	    l1->follow = follow;
712 	    return l1;
713 	}
714     }
715 
716     /* Parse repeated directories such as (dir/)# and (dir/)## */
717     if (*(str = instr) == zpc_special[ZPC_INPAR] &&
718 	!skipparens(Inpar, Outpar, (char **)&str) &&
719         *str == zpc_special[ZPC_HASH] && str[-2] == '/') {
720 	instr++;
721 	if (!(p1 = patcompile(instr, compflags, &instr)))
722 	    return NULL;
723 	if (instr[0] == '/' && instr[1] == Outpar && instr[2] == Pound) {
724 	    int pdflag = 0;
725 
726 	    instr += 3;
727 	    if (*instr == Pound) {
728 		pdflag = 1;
729 		instr++;
730 	    }
731 	    l1 = (Complist) zhalloc(sizeof *l1);
732 	    l1->pat = p1;
733 	    /* special case (/)# to avoid infinite recursion */
734 	    l1->closure = (*((char *)p1 + p1->startoff)) ? 1 + pdflag : 0;
735 	    l1->follow = 0;
736 	    l1->next = parsecomplist(instr);
737 	    return (l1->pat) ? l1 : NULL;
738 	}
739     } else {
740 	/* parse single path component */
741 	if (!(p1 = patcompile(instr, compflags|PAT_FILET, &instr)))
742 	    return NULL;
743 	/* then do the remaining path components */
744 	if (*instr == '/' || !*instr) {
745 	    int ef = *instr == '/';
746 
747 	    l1 = (Complist) zhalloc(sizeof *l1);
748 	    l1->pat = p1;
749 	    l1->closure = 0;
750 	    l1->next = ef ? parsecomplist(instr+1) : NULL;
751 	    return (ef && !l1->next) ? NULL : l1;
752 	}
753     }
754     errflag |= ERRFLAG_ERROR;
755     return NULL;
756 }
757 
758 /* turn a string into a Complist struct:  this has path components */
759 
760 /**/
761 static Complist
parsepat(char * str)762 parsepat(char *str)
763 {
764     long assert;
765     int ignore;
766 
767     patcompstart();
768     /*
769      * Check for initial globbing flags, so that they don't form
770      * a bogus path component.
771      */
772     if ((*str == zpc_special[ZPC_INPAR] && str[1] == zpc_special[ZPC_HASH]) ||
773 	(*str == zpc_special[ZPC_KSH_AT] && str[1] == Inpar &&
774 	 str[2] == zpc_special[ZPC_HASH])) {
775 	str += (*str == Inpar) ? 2 : 3;
776 	if (!patgetglobflags(&str, &assert, &ignore))
777 	    return NULL;
778     }
779 
780     /* Now there is no (#X) in front, we can check the path. */
781     if (!pathbuf)
782 	pathbuf = zalloc(pathbufsz = PATH_MAX+1);
783     DPUTS(pathbufcwd, "BUG: glob changed directory");
784     if (*str == '/') {		/* pattern has absolute path */
785 	str++;
786 	pathbuf[0] = '/';
787 	pathbuf[pathpos = 1] = '\0';
788     } else			/* pattern is relative to pwd */
789 	pathbuf[pathpos = 0] = '\0';
790 
791     return parsecomplist(str);
792 }
793 
794 /* get number after qualifier */
795 
796 /**/
797 static off_t
qgetnum(char ** s)798 qgetnum(char **s)
799 {
800     off_t v = 0;
801 
802     if (!idigit(**s)) {
803 	zerr("number expected");
804 	return 0;
805     }
806     while (idigit(**s))
807 	v = v * 10 + *(*s)++ - '0';
808     return v;
809 }
810 
811 /* get mode spec after qualifier */
812 
813 /**/
814 static zlong
qgetmodespec(char ** s)815 qgetmodespec(char **s)
816 {
817     zlong yes = 0, no = 0, val, mask, t;
818     char *p = *s, c, how, end;
819 
820     if ((c = *p) == '=' || c == Equals || c == '+' || c == '-' ||
821 	c == '?' || c == Quest || (c >= '0' && c <= '7')) {
822 	end = 0;
823 	c = 0;
824     } else {
825 	end = (c == '<' ? '>' :
826 	       (c == '[' ? ']' :
827 		(c == '{' ? '}' :
828 		 (c == Inang ? Outang :
829 		  (c == Inbrack ? Outbrack :
830 		   (c == Inbrace ? Outbrace : c))))));
831 	p++;
832     }
833     do {
834 	mask = 0;
835 	while (((c = *p) == 'u' || c == 'g' || c == 'o' || c == 'a') && end) {
836 	    switch (c) {
837 	    case 'o': mask |= 01007; break;
838 	    case 'g': mask |= 02070; break;
839 	    case 'u': mask |= 04700; break;
840 	    case 'a': mask |= 07777; break;
841 	    }
842 	    p++;
843 	}
844 	how = ((c == '+' || c == '-') ? c : '=');
845 	if (c == '+' || c == '-' || c == '=' || c == Equals)
846 	    p++;
847 	val = 0;
848 	if (mask) {
849 	    while ((c = *p++) != ',' && c != end) {
850 		switch (c) {
851 		case 'x': val |= 00111; break;
852 		case 'w': val |= 00222; break;
853 		case 'r': val |= 00444; break;
854 		case 's': val |= 06000; break;
855 		case 't': val |= 01000; break;
856 		case '0': case '1': case '2': case '3':
857 		case '4': case '5': case '6': case '7':
858 		    t = ((zlong) c - '0');
859 		    val |= t | (t << 3) | (t << 6);
860 		    break;
861 		default:
862 		    zerr("invalid mode specification");
863 		    return 0;
864 		}
865 	    }
866 	    if (how == '=' || how == '+') {
867 		yes |= val & mask;
868 		val = ~val;
869 	    }
870 	    if (how == '=' || how == '-')
871 		no |= val & mask;
872 	} else if (!(end && c == end) && c != ',' && c) {
873 	    t = 07777;
874 	    while ((c = *p) == '?' || c == Quest ||
875 		   (c >= '0' && c <= '7')) {
876 		if (c == '?' || c == Quest) {
877 		    t = (t << 3) | 7;
878 		    val <<= 3;
879 		} else {
880 		    t <<= 3;
881 		    val = (val << 3) | ((zlong) c - '0');
882 		}
883 		p++;
884 	    }
885 	    if (end && c != end && c != ',') {
886 		zerr("invalid mode specification");
887 		return 0;
888 	    }
889 	    if (how == '=') {
890 		yes = (yes & ~t) | val;
891 		no = (no & ~t) | (~val & ~t);
892 	    } else if (how == '+')
893 		yes |= val;
894 	    else
895 		no |= val;
896 	} else {
897 	    zerr("invalid mode specification");
898 	    return 0;
899         }
900     } while (end && c != end);
901 
902     *s = p;
903     return ((yes & 07777) | ((no & 07777) << 12));
904 }
905 
906 static int
gmatchcmp(Gmatch a,Gmatch b)907 gmatchcmp(Gmatch a, Gmatch b)
908 {
909     int i;
910     off_t r = 0L;
911     struct globsort *s;
912     char **asortstrp = NULL, **bsortstrp = NULL;
913 
914     for (i = gf_nsorts, s = gf_sortlist; i; i--, s++) {
915 	switch (s->tp & ~GS_DESC) {
916 	case GS_NAME:
917 	    r = zstrcmp(b->uname, a->uname,
918 			gf_numsort ? SORTIT_NUMERICALLY : 0);
919 	    break;
920 	case GS_DEPTH:
921 	    {
922 		char *aptr = a->name, *bptr = b->name;
923 		int slasha = 0, slashb = 0;
924 		/* Count slashes.  Trailing slashes don't count. */
925 		while (*aptr && *aptr == *bptr)
926 		    aptr++, bptr++;
927 		/* Like I just said... */
928 		if ((!*aptr || !*bptr) && aptr > a->name && aptr[-1] == '/')
929 		    aptr--, bptr--;
930 		if (*aptr)
931 		    for (; aptr[1]; aptr++)
932 			if (*aptr == '/') {
933 			    slasha = 1;
934 			    break;
935 			}
936 		if (*bptr)
937 		    for (; bptr[1]; bptr++)
938 			if (*bptr == '/') {
939 			    slashb = 1;
940 			    break;
941 			}
942 		r = slasha - slashb;
943 	    }
944 	    break;
945 	case GS_EXEC:
946 	    if (!asortstrp) {
947 		asortstrp = a->sortstrs;
948 		bsortstrp = b->sortstrs;
949 	    } else {
950 		asortstrp++;
951 		bsortstrp++;
952 	    }
953 	    r = zstrcmp(*bsortstrp, *asortstrp,
954 			gf_numsort ? SORTIT_NUMERICALLY : 0);
955 	    break;
956 	case GS_SIZE:
957 	    r = b->size - a->size;
958 	    break;
959 	case GS_ATIME:
960 	    r = a->atime - b->atime;
961 #ifdef GET_ST_ATIME_NSEC
962             if (!r)
963               r = a->ansec - b->ansec;
964 #endif
965 	    break;
966 	case GS_MTIME:
967 	    r = a->mtime - b->mtime;
968 #ifdef GET_ST_MTIME_NSEC
969             if (!r)
970               r = a->mnsec - b->mnsec;
971 #endif
972 	    break;
973 	case GS_CTIME:
974 	    r = a->ctime - b->ctime;
975 #ifdef GET_ST_CTIME_NSEC
976             if (!r)
977               r = a->cnsec - b->cnsec;
978 #endif
979 	    break;
980 	case GS_LINKS:
981 	    r = b->links - a->links;
982 	    break;
983 	case GS__SIZE:
984 	    r = b->_size - a->_size;
985 	    break;
986 	case GS__ATIME:
987 	    r = a->_atime - b->_atime;
988 #ifdef GET_ST_ATIME_NSEC
989             if (!r)
990               r = a->_ansec - b->_ansec;
991 #endif
992 	    break;
993 	case GS__MTIME:
994 	    r = a->_mtime - b->_mtime;
995 #ifdef GET_ST_MTIME_NSEC
996             if (!r)
997               r = a->_mnsec - b->_mnsec;
998 #endif
999 	    break;
1000 	case GS__CTIME:
1001 	    r = a->_ctime - b->_ctime;
1002 #ifdef GET_ST_CTIME_NSEC
1003             if (!r)
1004               r = a->_cnsec - b->_cnsec;
1005 #endif
1006 	    break;
1007 	case GS__LINKS:
1008 	    r = b->_links - a->_links;
1009 	    break;
1010 	}
1011 	if (r)
1012 	    return (s->tp & GS_DESC) ?
1013 	      (r < 0L ? 1 : -1) :
1014 	      (r > 0L ? 1 : -1);
1015     }
1016     return 0;
1017 }
1018 
1019 /*
1020  * Duplicate a list of qualifiers using the `next' linkage (not the
1021  * `or' linkage).  Return the head element and set *last (if last non-NULL)
1022  * to point to the last element of the new list.  All allocation is on the
1023  * heap (or off the heap?)
1024  */
dup_qual_list(struct qual * orig,struct qual ** lastp)1025 static struct qual *dup_qual_list(struct qual *orig, struct qual **lastp)
1026 {
1027     struct qual *qfirst = NULL, *qlast = NULL;
1028 
1029     while (orig) {
1030 	struct qual *qnew = (struct qual *)zhalloc(sizeof(struct qual));
1031 	*qnew = *orig;
1032 	qnew->next = qnew->or = NULL;
1033 
1034 	if (!qfirst)
1035 	    qfirst = qnew;
1036 	if (qlast)
1037 	    qlast->next = qnew;
1038 	qlast = qnew;
1039 
1040 	orig = orig->next;
1041     }
1042 
1043     if (lastp)
1044 	*lastp = qlast;
1045     return qfirst;
1046 }
1047 
1048 
1049 /*
1050  * Get a glob string for execution, following e, P or + qualifiers.
1051  * Pointer is character after the e, P or +.
1052  */
1053 
1054 /**/
1055 static char *
glob_exec_string(char ** sp)1056 glob_exec_string(char **sp)
1057 {
1058     char sav, *tt, *sdata, *s = *sp;
1059     int plus;
1060 
1061     if (s[-1] == '+') {
1062 	plus = 0;
1063 	tt = itype_end(s, IIDENT, 0);
1064 	if (tt == s)
1065 	{
1066 	    zerr("missing identifier after `+'");
1067 	    return NULL;
1068 	}
1069     } else {
1070 	tt = get_strarg(s, &plus);
1071 	if (!*tt)
1072 	{
1073 	    zerr("missing end of string");
1074 	    return NULL;
1075 	}
1076     }
1077 
1078     sav = *tt;
1079     *tt = '\0';
1080     sdata = dupstring(s + plus);
1081     untokenize(sdata);
1082     *tt = sav;
1083     if (sav)
1084 	*sp = tt + plus;
1085     else
1086 	*sp = tt;
1087 
1088     return sdata;
1089 }
1090 
1091 /*
1092  * Insert a glob match.
1093  * If there were words to prepend given by the P glob qualifier, do so.
1094  */
1095 static void
insert_glob_match(LinkList list,LinkNode next,char * data)1096 insert_glob_match(LinkList list, LinkNode next, char *data)
1097 {
1098     if (gf_pre_words) {
1099 	LinkNode added;
1100 	for (added = firstnode(gf_pre_words); added; incnode(added)) {
1101 	    next = insertlinknode(list, next, dupstring(getdata(added)));
1102 	}
1103     }
1104 
1105     next = insertlinknode(list, next, data);
1106 
1107     if (gf_post_words) {
1108 	LinkNode added;
1109 	for (added = firstnode(gf_post_words); added; incnode(added)) {
1110 	    next = insertlinknode(list, next, dupstring(getdata(added)));
1111 	}
1112     }
1113 }
1114 
1115 /*
1116  * Return
1117  *   1 if str ends in bare glob qualifiers
1118  *   2 if str ends in non-bare glob qualifiers (#q)
1119  *   0 otherwise.
1120  *
1121  * str is the string to check.
1122  * sl is its length (to avoid recalculation).
1123  * nobareglob is 1 if bare glob qualifiers are not allowed.
1124  * *sp, if sp is not null, will be a pointer to the opening parenthesis.
1125  */
1126 
1127 /**/
1128 int
checkglobqual(char * str,int sl,int nobareglob,char ** sp)1129 checkglobqual(char *str, int sl, int nobareglob, char **sp)
1130 {
1131     char *s;
1132     int paren, ret = 1;
1133 
1134     if (str[sl - 1] != Outpar)
1135 	return 0;
1136 
1137     /* Check these are really qualifiers, not a set of *
1138      * alternatives or exclusions.  We can be more     *
1139      * lenient with an explicit (#q) than with a bare  *
1140      * set of qualifiers.                              */
1141     paren = 0;
1142     for (s = str + sl - 2; *s && (*s != Inpar || paren); s--) {
1143 	switch (*s) {
1144 	case Outpar:
1145 	    paren++; /*FALLTHROUGH*/
1146 	case Bar:
1147 	    if (!zpc_disables[ZPC_BAR])
1148 		nobareglob = 1;
1149 	    break;
1150 	case Tilde:
1151 	    if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_TILDE])
1152 		nobareglob = 1;
1153 	    break;
1154 	case Inpar:
1155 	    paren--;
1156 	    break;
1157 	}
1158 	if (s == str)
1159 	    break;
1160     }
1161     if (*s != Inpar)
1162 	return 0;
1163     if (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HASH] && s[1] == Pound) {
1164 	if (s[2] != 'q')
1165 	    return 0;
1166 	ret = 2;
1167     } else if (nobareglob)
1168 	return 0;
1169 
1170     if (sp)
1171 	*sp = s;
1172 
1173     return ret;
1174 }
1175 
1176 /* Main entry point to the globbing code for filename globbing. *
1177  * np points to a node in the list which will be expanded  *
1178  * into a series of nodes.                                      */
1179 
1180 /**/
1181 void
zglob(LinkList list,LinkNode np,int nountok)1182 zglob(LinkList list, LinkNode np, int nountok)
1183 {
1184     struct qual *qo, *qn, *ql;
1185     LinkNode node = prevnode(np);
1186     char *str;				/* the pattern                   */
1187     int sl;				/* length of the pattern         */
1188     Complist q;				/* pattern after parsing         */
1189     char *ostr = (char *)getdata(np);	/* the pattern before the parser */
1190 					/* chops it up                   */
1191     int first = 0, end = -1;		/* index of first match to return */
1192 					/* and index+1 of the last match */
1193     struct globdata saved;		/* saved glob state              */
1194     int nobareglob = !isset(BAREGLOBQUAL);
1195     int shortcircuit = 0;		/* How many files to match;      */
1196 					/* 0 means no limit              */
1197 
1198     if (unset(GLOBOPT) || !haswilds(ostr) || unset(EXECOPT)) {
1199 	if (!nountok)
1200 	    untokenize(ostr);
1201 	return;
1202     }
1203     save_globstate(saved);
1204 
1205     str = dupstring(ostr);
1206     uremnode(list, np);
1207 
1208     /* quals will hold the complete list of qualifiers (file static). */
1209     quals = NULL;
1210     /*
1211      * qualct and qualorct indicate we have qualifiers in the last
1212      * alternative, or a set of alternatives, respectively.  They
1213      * are not necessarily an accurate count, however.
1214      */
1215     qualct = qualorct = 0;
1216     /*
1217      * colonmod is a concatenated list of all colon modifiers found in
1218      * all sets of qualifiers.
1219      */
1220     colonmod = NULL;
1221     /* The gf_* flags are qualifiers which are applied globally. */
1222     gf_nullglob = isset(NULLGLOB);
1223     gf_markdirs = isset(MARKDIRS);
1224     gf_listtypes = gf_follow = 0;
1225     gf_noglobdots = unset(GLOBDOTS);
1226     gf_numsort = isset(NUMERICGLOBSORT);
1227     gf_sorts = gf_nsorts = 0;
1228     gf_pre_words = gf_post_words = NULL;
1229 
1230     /* Check for qualifiers */
1231     while (!nobareglob ||
1232 	   (isset(EXTENDEDGLOB) && !zpc_disables[ZPC_HASH])) {
1233 	struct qual *newquals;
1234 	char *s;
1235 	int sense, qualsfound;
1236 	off_t data;
1237 	char *sdata, *newcolonmod, *ptr;
1238 	int (*func) _((char *, Statptr, off_t, char *));
1239 
1240 	/*
1241 	 * Initialise state variables for current file pattern.
1242 	 * newquals is the root for the linked list of all qualifiers.
1243 	 * qo is the root of the current list of alternatives.
1244 	 * ql is the end of the current alternative where the `next' will go.
1245 	 * qn is the current qualifier node to be added.
1246 	 *
1247 	 * Here is an attempt at a diagram.  An `or' is added horizontally
1248 	 * to the top line, a `next' at the bottom of the right hand line.
1249 	 * `qn' is usually NULL unless a new `or' has just been added.
1250 	 *
1251 	 * quals -> x  -> x -> qo
1252 	 *          |     |    |
1253 	 *          x     x    x
1254 	 *          |          |
1255 	 *          x          ql
1256 	 *
1257 	 * In fact, after each loop the complete set is in the file static
1258 	 * `quals'.  Then, if we have a second set of qualifiers, we merge
1259 	 * the lists together.  This is only tricky if one or both have an
1260 	 * `or' in them; then we need to distribute over all alternatives.
1261 	 */
1262 	newquals = qo = qn = ql = NULL;
1263 
1264 	sl = strlen(str);
1265 	if (!(qualsfound = checkglobqual(str, sl, nobareglob, &s)))
1266 	    break;
1267 
1268 	/* Real qualifiers found. */
1269 	nobareglob = 1;
1270 	sense = 0;	   /* bit 0 for match (0)/don't match (1)   */
1271 			   /* bit 1 for follow links (2), don't (0) */
1272 	data = 0;	   /* Any numerical argument required       */
1273 	sdata = NULL;	   /* Any list argument required            */
1274 	newcolonmod = NULL; /* Contains trailing colon modifiers    */
1275 
1276 	str[sl-1] = 0;
1277 	*s++ = 0;
1278 	if (qualsfound == 2)
1279 	    s += 2;
1280 	for (ptr = s; *ptr; ptr++)
1281 	    if (*ptr == Dash)
1282 		*ptr = '-';
1283 	while (*s && !newcolonmod) {
1284 	    func = (int (*) _((char *, Statptr, off_t, char *)))0;
1285 	    if (*s == ',') {
1286 		/* A comma separates alternative sets of qualifiers */
1287 		s++;
1288 		sense = 0;
1289 		if (qualct) {
1290 		    qn = (struct qual *)hcalloc(sizeof *qn);
1291 		    qo->or = qn;
1292 		    qo = qn;
1293 		    qualorct++;
1294 		    qualct = 0;
1295 		    ql = NULL;
1296 		}
1297 	    } else {
1298 		switch (*s++) {
1299 		case ':':
1300 		    /* Remaining arguments are history-type     *
1301 		     * colon substitutions, handled separately. */
1302 		    newcolonmod = s - 1;
1303 		    untokenize(newcolonmod);
1304 		    if (colonmod) {
1305 			/* remember we're searching backwards */
1306 			colonmod = dyncat(newcolonmod, colonmod);
1307 		    } else
1308 			colonmod = newcolonmod;
1309 		    break;
1310 		case Hat:
1311 		case '^':
1312 		    /* Toggle sense:  go from positive to *
1313 		     * negative match and vice versa.     */
1314 		    sense ^= 1;
1315 		    break;
1316 		case '-':
1317 		case Dash:
1318 		    /* Toggle matching of symbolic links */
1319 		    sense ^= 2;
1320 		    break;
1321 		case '@':
1322 		    /* Match symbolic links */
1323 		    func = qualislnk;
1324 		    break;
1325 		case Equals:
1326 		case '=':
1327 		    /* Match sockets */
1328 		    func = qualissock;
1329 		    break;
1330 		case 'p':
1331 		    /* Match named pipes */
1332 		    func = qualisfifo;
1333 		    break;
1334 		case '/':
1335 		    /* Match directories */
1336 		    func = qualisdir;
1337 		    break;
1338 		case '.':
1339 		    /* Match regular files */
1340 		    func = qualisreg;
1341 		    break;
1342 		case '%':
1343 		    /* Match special files: block, *
1344 		     * character or any device     */
1345 		    if (*s == 'b')
1346 			s++, func = qualisblk;
1347 		    else if (*s == 'c')
1348 			s++, func = qualischr;
1349 		    else
1350 			func = qualisdev;
1351 		    break;
1352 		case Star:
1353 		    /* Match executable plain files */
1354 		    func = qualiscom;
1355 		    break;
1356 		case 'R':
1357 		    /* Match world-readable files */
1358 		    func = qualflags;
1359 		    data = 0004;
1360 		    break;
1361 		case 'W':
1362 		    /* Match world-writeable files */
1363 		    func = qualflags;
1364 		    data = 0002;
1365 		    break;
1366 		case 'X':
1367 		    /* Match world-executable files */
1368 		    func = qualflags;
1369 		    data = 0001;
1370 		    break;
1371 		case 'A':
1372 		    func = qualflags;
1373 		    data = 0040;
1374 		    break;
1375 		case 'I':
1376 		    func = qualflags;
1377 		    data = 0020;
1378 		    break;
1379 		case 'E':
1380 		    func = qualflags;
1381 		    data = 0010;
1382 		    break;
1383 		case 'r':
1384 		    /* Match files readable by current process */
1385 		    func = qualflags;
1386 		    data = 0400;
1387 		    break;
1388 		case 'w':
1389 		    /* Match files writeable by current process */
1390 		    func = qualflags;
1391 		    data = 0200;
1392 		    break;
1393 		case 'x':
1394 		    /* Match files executable by current process */
1395 		    func = qualflags;
1396 		    data = 0100;
1397 		    break;
1398 		case 's':
1399 		    /* Match setuid files */
1400 		    func = qualflags;
1401 		    data = 04000;
1402 		    break;
1403 		case 'S':
1404 		    /* Match setgid files */
1405 		    func = qualflags;
1406 		    data = 02000;
1407 		    break;
1408 		case 't':
1409 		    func = qualflags;
1410 		    data = 01000;
1411 		    break;
1412 		case 'd':
1413 		    /* Match device files by device number  *
1414 		     * (as given by stat's st_dev element). */
1415 		    func = qualdev;
1416 		    data = qgetnum(&s);
1417 		    break;
1418 		case 'l':
1419 		    /* Match files with the given no. of hard links */
1420 		    func = qualnlink;
1421 		    g_amc = -1;
1422 		    goto getrange;
1423 		case 'U':
1424 		    /* Match files owned by effective user ID */
1425 		    func = qualuid;
1426 		    data = geteuid();
1427 		    break;
1428 		case 'G':
1429 		    /* Match files owned by effective group ID */
1430 		    func = qualgid;
1431 		    data = getegid();
1432 		    break;
1433 		case 'u':
1434 		    /* Match files owned by given user id */
1435 		    func = qualuid;
1436 		    /* either the actual uid... */
1437 		    if (idigit(*s))
1438 			data = qgetnum(&s);
1439 		    else {
1440 			/* ... or a user name */
1441 			char sav, *tt;
1442 			int arglen;
1443 
1444 			/* Find matching delimiters */
1445 			tt = get_strarg(s, &arglen);
1446 			if (!*tt) {
1447 			    zerr("missing delimiter for 'u' glob qualifier");
1448 			    data = 0;
1449 			} else {
1450 #ifdef USE_GETPWNAM
1451 			    struct passwd *pw;
1452 			    sav = *tt;
1453 			    *tt = '\0';
1454 
1455 			    if ((pw = getpwnam(s + arglen)))
1456 				data = pw->pw_uid;
1457 			    else {
1458 				zerr("unknown username '%s'", s + arglen);
1459 				data = 0;
1460 			    }
1461 			    *tt = sav;
1462 #else /* !USE_GETPWNAM */
1463 			    sav = *tt;
1464 			    *tt = '\0';
1465 			    zerr("unable to resolve non-numeric username '%s'", s + arglen);
1466 			    *tt = sav;
1467 			    data = 0;
1468 #endif /* !USE_GETPWNAM */
1469 			    if (sav)
1470 				s = tt + arglen;
1471 			    else
1472 				s = tt;
1473 			}
1474 		    }
1475 		    break;
1476 		case 'g':
1477 		    /* Given gid or group id... works like `u' */
1478 		    func = qualgid;
1479 		    /* either the actual gid... */
1480 		    if (idigit(*s))
1481 			data = qgetnum(&s);
1482 		    else {
1483 			/* ...or a delimited group name. */
1484 			char sav, *tt;
1485 			int arglen;
1486 
1487 			tt = get_strarg(s, &arglen);
1488 			if (!*tt) {
1489 			    zerr("missing delimiter for 'g' glob qualifier");
1490 			    data = 0;
1491 			} else {
1492 #ifdef USE_GETGRNAM
1493 			    struct group *gr;
1494 			    sav = *tt;
1495 			    *tt = '\0';
1496 
1497 			    if ((gr = getgrnam(s + arglen)))
1498 				data = gr->gr_gid;
1499 			    else {
1500 				zerr("unknown group");
1501 				data = 0;
1502 			    }
1503 			    *tt = sav;
1504 #else /* !USE_GETGRNAM */
1505 			    sav = *tt;
1506 			    zerr("unknown group");
1507 			    data = 0;
1508 #endif /* !USE_GETGRNAM */
1509 			    if (sav)
1510 				s = tt + arglen;
1511 			    else
1512 				s = tt;
1513 			}
1514 		    }
1515 		    break;
1516 		case 'f':
1517 		    /* Match modes with chmod-spec. */
1518 		    func = qualmodeflags;
1519 		    data = qgetmodespec(&s);
1520 		    break;
1521 		case 'F':
1522 		    func = qualnonemptydir;
1523 		    break;
1524 		case 'M':
1525 		    /* Mark directories with a / */
1526 		    if ((gf_markdirs = !(sense & 1)))
1527 			gf_follow = sense & 2;
1528 		    break;
1529 		case 'T':
1530 		    /* Mark types in a `ls -F' type fashion */
1531 		    if ((gf_listtypes = !(sense & 1)))
1532 			gf_follow = sense & 2;
1533 		    break;
1534 		case 'N':
1535 		    /* Nullglob:  remove unmatched patterns. */
1536 		    gf_nullglob = !(sense & 1);
1537 		    break;
1538 		case 'D':
1539 		    /* Glob dots: match leading dots implicitly */
1540 		    gf_noglobdots = sense & 1;
1541 		    break;
1542 		case 'n':
1543 		    /* Numeric glob sort */
1544 		    gf_numsort = !(sense & 1);
1545 		    break;
1546 		case 'Y':
1547 		{
1548 		    /* Short circuit: limit number of matches */
1549 		    const char *s_saved = s;
1550 		    shortcircuit = !(sense & 1);
1551 		    if (shortcircuit) {
1552 			/* Parse the argument. */
1553 			data = qgetnum(&s);
1554 			if ((shortcircuit = data) != data) {
1555 			    /* Integer overflow */
1556 			    zerr("value too big: Y%s", s_saved);
1557 			    restore_globstate(saved);
1558 			    return;
1559 			}
1560 		    }
1561 		    break;
1562 		}
1563 		case 'a':
1564 		    /* Access time in given range */
1565 		    g_amc = 0;
1566 		    func = qualtime;
1567 		    goto getrange;
1568 		case 'm':
1569 		    /* Modification time in given range */
1570 		    g_amc = 1;
1571 		    func = qualtime;
1572 		    goto getrange;
1573 		case 'c':
1574 		    /* Inode creation time in given range */
1575 		    g_amc = 2;
1576 		    func = qualtime;
1577 		    goto getrange;
1578 		case 'L':
1579 		    /* File size (Length) in given range */
1580 		    func = qualsize;
1581 		    g_amc = -1;
1582 		    /* Get size multiplier */
1583 		    g_units = TT_BYTES;
1584 		    if (*s == 'p' || *s == 'P')
1585 			g_units = TT_POSIX_BLOCKS, ++s;
1586 		    else if (*s == 'k' || *s == 'K')
1587 			g_units = TT_KILOBYTES, ++s;
1588 		    else if (*s == 'm' || *s == 'M')
1589 			g_units = TT_MEGABYTES, ++s;
1590 #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
1591                     else if (*s == 'g' || *s == 'G')
1592                         g_units = TT_GIGABYTES, ++s;
1593                     else if (*s == 't' || *s == 'T')
1594                         g_units = TT_TERABYTES, ++s;
1595 #endif
1596 		  getrange:
1597 		    /* Get time multiplier */
1598 		    if (g_amc >= 0) {
1599 			g_units = TT_DAYS;
1600 			if (*s == 'h')
1601 			    g_units = TT_HOURS, ++s;
1602 			else if (*s == 'm')
1603 			    g_units = TT_MINS, ++s;
1604 			else if (*s == 'w')
1605 			    g_units = TT_WEEKS, ++s;
1606 			else if (*s == 'M')
1607 			    g_units = TT_MONTHS, ++s;
1608 			else if (*s == 's')
1609 			    g_units = TT_SECONDS, ++s;
1610 			else if (*s == 'd')
1611 			    ++s;
1612 		    }
1613 		    /* See if it's greater than, equal to, or less than */
1614 		    if ((g_range = *s == '+' ? 1 : IS_DASH(*s) ? -1 : 0))
1615 			++s;
1616 		    data = qgetnum(&s);
1617 		    break;
1618 
1619 		case 'o':
1620 		case 'O':
1621 		{
1622 		    int t;
1623 		    char *send;
1624 
1625 		    if (gf_nsorts == MAX_SORTS) {
1626 			zerr("too many glob sort specifiers");
1627 			restore_globstate(saved);
1628 			return;
1629 		    }
1630 
1631 		    /* usually just one character */
1632 		    send = s+1;
1633 		    switch (*s) {
1634 		    case 'n': t = GS_NAME; break;
1635 		    case 'L': t = GS_SIZE; break;
1636 		    case 'l': t = GS_LINKS; break;
1637 		    case 'a': t = GS_ATIME; break;
1638 		    case 'm': t = GS_MTIME; break;
1639 		    case 'c': t = GS_CTIME; break;
1640 		    case 'd': t = GS_DEPTH; break;
1641 		    case 'N': t = GS_NONE; break;
1642 		    case 'e':
1643 		    case '+':
1644 		    {
1645 			t = GS_EXEC;
1646 			if ((gf_sortlist[gf_nsorts].exec =
1647 			     glob_exec_string(&send)) == NULL)
1648 			{
1649 			    restore_globstate(saved);
1650 			    return;
1651 			}
1652 			break;
1653 		    }
1654 		    default:
1655 			zerr("unknown sort specifier");
1656 			restore_globstate(saved);
1657 			return;
1658 		    }
1659 		    if ((sense & 2) &&
1660 			(t & (GS_SIZE|GS_ATIME|GS_MTIME|GS_CTIME|GS_LINKS)))
1661 			t <<= GS_SHIFT; /* HERE: GS_EXEC? */
1662 		    if (t != GS_EXEC) {
1663 			if (gf_sorts & t) {
1664 			    zerr("doubled sort specifier");
1665 			    restore_globstate(saved);
1666 			    return;
1667 			}
1668 		    }
1669 		    gf_sorts |= t;
1670 		    gf_sortlist[gf_nsorts++].tp = t |
1671 			(((sense & 1) ^ (s[-1] == 'O')) ? GS_DESC : 0);
1672 		    s = send;
1673 		    break;
1674 		}
1675 		case '+':
1676 		case 'e':
1677 		{
1678 		    char *tt;
1679 
1680 		    tt = glob_exec_string(&s);
1681 
1682 		    if (tt == NULL) {
1683 			data = 0;
1684 		    } else {
1685 			func = qualsheval;
1686 			sdata = tt;
1687 		    }
1688 		    break;
1689 		}
1690 		case '[':
1691 		case Inbrack:
1692 		{
1693 		    char *os = --s;
1694 		    struct value v;
1695 
1696 		    v.isarr = SCANPM_WANTVALS;
1697 		    v.pm = NULL;
1698 		    v.end = -1;
1699 		    v.flags = 0;
1700 		    if (getindex(&s, &v, 0) || s == os) {
1701 			zerr("invalid subscript");
1702 			restore_globstate(saved);
1703 			return;
1704 		    }
1705 		    first = v.start;
1706 		    end = v.end;
1707 		    break;
1708 		}
1709 		case 'P':
1710 		{
1711 		    char *tt;
1712 		    tt = glob_exec_string(&s);
1713 
1714 		    if (tt != NULL)
1715 		    {
1716 			LinkList *words = sense & 1 ? &gf_post_words : &gf_pre_words;
1717 			if (!*words)
1718 			    *words = newlinklist();
1719 			addlinknode(*words, tt);
1720 		    }
1721 		    break;
1722 		}
1723 		default:
1724 		    untokenize(--s);
1725 		    zerr("unknown file attribute: %c", *s);
1726 		    restore_globstate(saved);
1727 		    return;
1728 		}
1729 	    }
1730 	    if (func) {
1731 		/* Requested test is performed by function func */
1732 		if (!qn)
1733 		    qn = (struct qual *)hcalloc(sizeof *qn);
1734 		if (ql)
1735 		    ql->next = qn;
1736 		ql = qn;
1737 		if (!newquals)
1738 		    newquals = qo = qn;
1739 		qn->func = func;
1740 		qn->sense = sense;
1741 		qn->data = data;
1742 		qn->sdata = sdata;
1743 		qn->range = g_range;
1744 		qn->units = g_units;
1745 		qn->amc = g_amc;
1746 
1747 		qn = NULL;
1748 		qualct++;
1749 	    }
1750 	    if (errflag) {
1751 		restore_globstate(saved);
1752 		return;
1753 	    }
1754 	}
1755 
1756 	if (quals && newquals) {
1757 	    /* Merge previous group of qualifiers with new set. */
1758 	    if (quals->or || newquals->or) {
1759 		/* The hard case. */
1760 		struct qual *qorhead = NULL, *qortail = NULL;
1761 		/*
1762 		 * Distribute in the most trivial way, by creating
1763 		 * all possible combinations of the two sets and chaining
1764 		 * these into one long set of alternatives given
1765 		 * by qorhead and qortail.
1766 		 */
1767 		for (qn = newquals; qn; qn = qn->or) {
1768 		    for (qo = quals; qo; qo = qo->or) {
1769 			struct qual *qfirst, *qlast;
1770 			int islast = !qn->or && !qo->or;
1771 			/* Generate first set of qualifiers... */
1772 			if (islast) {
1773 			    /* Last time round:  don't bother copying. */
1774 			    qfirst = qn;
1775 			    for (qlast = qfirst; qlast->next;
1776 				 qlast = qlast->next)
1777 				;
1778 			} else
1779 			    qfirst = dup_qual_list(qn, &qlast);
1780 			/* ... link into new `or' chain ... */
1781 			if (!qorhead)
1782 			    qorhead = qfirst;
1783 			if (qortail)
1784 			    qortail->or = qfirst;
1785 			qortail = qfirst;
1786 			/* ... and concatenate second set. */
1787 			qlast->next = islast ? qo : dup_qual_list(qo, NULL);
1788 		    }
1789 		}
1790 		quals = qorhead;
1791 	    } else {
1792 		/*
1793 		 * Easy: we can just chain the qualifiers together.
1794 		 * This is an optimisation; the code above will work, too.
1795 		 * We retain the original left to right ordering --- remember
1796 		 * we are searching for sets of qualifiers from the right.
1797 		 */
1798 		qn = newquals;
1799 		for ( ; newquals->next; newquals = newquals->next)
1800 		    ;
1801 		newquals->next = quals;
1802 		quals = qn;
1803 	    }
1804 	} else if (newquals)
1805 	    quals = newquals;
1806     }
1807     q = parsepat(str);
1808     if (!q || errflag) {	/* if parsing failed */
1809 	restore_globstate(saved);
1810 	if (unset(BADPATTERN)) {
1811 	    if (!nountok)
1812 		untokenize(ostr);
1813 	    insertlinknode(list, node, ostr);
1814 	    return;
1815 	}
1816 	errflag &= ~ERRFLAG_ERROR;
1817 	zerr("bad pattern: %s", ostr);
1818 	return;
1819     }
1820     if (!gf_nsorts) {
1821 	gf_sortlist[0].tp = gf_sorts = (shortcircuit ? GS_NONE : GS_NAME);
1822 	gf_nsorts = 1;
1823     }
1824     /* Initialise receptacle for matched files, *
1825      * expanded by insert() where necessary.    */
1826     matchptr = matchbuf = (Gmatch)zalloc((matchsz = 16) *
1827 					 sizeof(struct gmatch));
1828     matchct = 0;
1829     pattrystart();
1830 
1831     /* The actual processing takes place here: matches go into  *
1832      * matchbuf.  This is the only top-level call to scanner(). */
1833     scanner(q, shortcircuit);
1834 
1835     /* Deal with failures to match depending on options */
1836     if (matchct)
1837 	badcshglob |= 2;	/* at least one cmd. line expansion O.K. */
1838     else if (!gf_nullglob) {
1839 	if (isset(CSHNULLGLOB)) {
1840 	    badcshglob |= 1;	/* at least one cmd. line expansion failed */
1841 	} else if (isset(NOMATCH)) {
1842 	    zerr("no matches found: %s", ostr);
1843 	    zfree(matchbuf, 0);
1844 	    restore_globstate(saved);
1845 	    return;
1846 	} else {
1847 	    /* treat as an ordinary string */
1848 	    untokenize(matchptr->name = dupstring(ostr));
1849 	    matchptr++;
1850 	    matchct = 1;
1851 	}
1852     }
1853 
1854     if (!(gf_sortlist[0].tp & GS_NONE)) {
1855 	/*
1856 	 * Get the strings to use for sorting by executing
1857 	 * the code chunk.  We allow more than one of these.
1858 	 */
1859 	int nexecs = 0;
1860 	struct globsort *sortp;
1861 	struct globsort *lastsortp = gf_sortlist + gf_nsorts;
1862 	Gmatch gmptr;
1863 
1864 	/* First find out if there are any GS_EXECs, counting them. */
1865 	for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
1866 	{
1867 	    if (sortp->tp & GS_EXEC)
1868 		nexecs++;
1869 	}
1870 
1871 	if (nexecs) {
1872 	    Gmatch tmpptr;
1873 	    int iexec = 0;
1874 
1875 	    /* Yes; allocate enough space for strings for each */
1876 	    for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
1877 		tmpptr->sortstrs = (char **)zhalloc(nexecs*sizeof(char*));
1878 
1879 	    /* Loop over each one, incrementing iexec */
1880 	    for (sortp = gf_sortlist; sortp < lastsortp; sortp++)
1881 	    {
1882 		/* Ignore unless this is a GS_EXEC */
1883 		if (sortp->tp & GS_EXEC) {
1884 		    Eprog prog;
1885 
1886 		    if ((prog = parse_string(sortp->exec, 0))) {
1887 			int ef = errflag, lv = lastval;
1888 
1889 			/* Parsed OK, execute for each name */
1890 			for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++) {
1891 			    setsparam("REPLY", ztrdup(tmpptr->name));
1892 			    execode(prog, 1, 0, "globsort");
1893 			    if (!errflag)
1894 				tmpptr->sortstrs[iexec] =
1895 				    dupstring(getsparam("REPLY"));
1896 			    else
1897 				tmpptr->sortstrs[iexec] = tmpptr->name;
1898 			}
1899 
1900 			/* Retain any user interrupt error status */
1901 			errflag = ef | (errflag & ERRFLAG_INT);
1902 			lastval = lv;
1903 		    } else {
1904 			/* Failed, let's be safe */
1905 			for (tmpptr = matchbuf; tmpptr < matchptr; tmpptr++)
1906 			    tmpptr->sortstrs[iexec] = tmpptr->name;
1907 		    }
1908 
1909 		    iexec++;
1910 		}
1911 	    }
1912 	}
1913 
1914 	/*
1915 	 * Where necessary, create unmetafied version of names
1916 	 * for comparison.  If no Meta characters just point
1917 	 * to original string.  All on heap.
1918 	 */
1919 	for (gmptr = matchbuf; gmptr < matchptr; gmptr++)
1920 	{
1921 	    if (strchr(gmptr->name, Meta))
1922 	    {
1923 		int dummy;
1924 		gmptr->uname = dupstring(gmptr->name);
1925 		unmetafy(gmptr->uname, &dummy);
1926 	    } else {
1927 		gmptr->uname = gmptr->name;
1928 	    }
1929 	}
1930 
1931 	/* Sort arguments in to lexical (and possibly numeric) order. *
1932 	 * This is reversed to facilitate insertion into the list.    */
1933 	qsort((void *) & matchbuf[0], matchct, sizeof(struct gmatch),
1934 	      (int (*) _((const void *, const void *)))gmatchcmp);
1935     }
1936 
1937     if (first < 0) {
1938 	first += matchct;
1939 	if (first < 0)
1940 	    first = 0;
1941     }
1942     if (end < 0)
1943 	end += matchct + 1;
1944     else if (end > matchct)
1945 	end = matchct;
1946     if ((end -= first) > 0) {
1947 	if (gf_sortlist[0].tp & GS_NONE) {
1948 	    /* Match list was never reversed, so insert back to front. */
1949 	    matchptr = matchbuf + matchct - first - 1;
1950 	    while (end-- > 0) {
1951 		/* insert matches in the arg list */
1952 		insert_glob_match(list, node, matchptr->name);
1953 		matchptr--;
1954 	    }
1955 	} else {
1956 	    matchptr = matchbuf + matchct - first - end;
1957 	    while (end-- > 0) {
1958 		/* insert matches in the arg list */
1959 		insert_glob_match(list, node, matchptr->name);
1960 		matchptr++;
1961 	    }
1962 	}
1963     } else if (!badcshglob && !isset(NOMATCH) && matchct == 1) {
1964 	insert_glob_match(list, node, (--matchptr)->name);
1965     }
1966     zfree(matchbuf, 0);
1967 
1968     restore_globstate(saved);
1969 }
1970 
1971 /* Return the trailing character for marking file types */
1972 
1973 /**/
1974 mod_export char
file_type(mode_t filemode)1975 file_type(mode_t filemode)
1976 {
1977     if(S_ISBLK(filemode))
1978 	return '#';
1979     else if(S_ISCHR(filemode))
1980 	return '%';
1981     else if(S_ISDIR(filemode))
1982 	return '/';
1983     else if(S_ISFIFO(filemode))
1984 	return '|';
1985     else if(S_ISLNK(filemode))
1986 	return '@';
1987     else if(S_ISREG(filemode))
1988 	return (filemode & S_IXUGO) ? '*' : ' ';
1989     else if(S_ISSOCK(filemode))
1990 	return '=';
1991     else
1992 	return '?';
1993 }
1994 
1995 /* check to see if str is eligible for brace expansion */
1996 
1997 /**/
1998 mod_export int
hasbraces(char * str)1999 hasbraces(char *str)
2000 {
2001     char *lbr, *mbr, *comma;
2002 
2003     if (isset(BRACECCL)) {
2004 	/* In this case, any properly formed brace expression  *
2005 	 * will match and expand to the characters in between. */
2006 	int bc, c;
2007 
2008 	for (bc = 0; (c = *str); ++str)
2009 	    if (c == Inbrace) {
2010 		if (!bc && str[1] == Outbrace)
2011 		    *str++ = '{', *str = '}';
2012 		else
2013 		    bc++;
2014 	    } else if (c == Outbrace) {
2015 		if (!bc)
2016 		    *str = '}';
2017 		else if (!--bc)
2018 		    return 1;
2019 	    }
2020 	return 0;
2021     }
2022     /* Otherwise we need to look for... */
2023     lbr = mbr = comma = NULL;
2024     for (;;) {
2025 	switch (*str++) {
2026 	case Inbrace:
2027 	    if (!lbr) {
2028 		if (bracechardots(str-1, NULL, NULL))
2029 		    return 1;
2030 		lbr = str - 1;
2031 		if (IS_DASH(*str))
2032 		    str++;
2033 		while (idigit(*str))
2034 		    str++;
2035 		if (*str == '.' && str[1] == '.') {
2036 		    str++; str++;
2037 		    if (IS_DASH(*str))
2038 			str++;
2039 		    while (idigit(*str))
2040 			str++;
2041 		    if (*str == Outbrace &&
2042 			(idigit(lbr[1]) || idigit(str[-1])))
2043 			return 1;
2044 		    else if (*str == '.' && str[1] == '.') {
2045 			str++; str++;
2046 			if (IS_DASH(*str))
2047 			    str++;
2048 			while (idigit(*str))
2049 			    str++;
2050 			if (*str == Outbrace &&
2051 			    (idigit(lbr[1]) || idigit(str[-1])))
2052 			    return 1;
2053 		    }
2054 		}
2055 	    } else {
2056 		char *s = --str;
2057 
2058 		if (skipparens(Inbrace, Outbrace, &str)) {
2059 		    *lbr = *s = '{';
2060 		    if (comma)
2061 			str = comma;
2062 		    if (mbr && mbr < str)
2063 			str = mbr;
2064 		    lbr = mbr = comma = NULL;
2065 		} else if (!mbr)
2066 		    mbr = s;
2067 	    }
2068 	    break;
2069 	case Outbrace:
2070 	    if (!lbr)
2071 		str[-1] = '}';
2072 	    else if (comma)
2073 		return 1;
2074 	    else {
2075 		*lbr = '{';
2076 		str[-1] = '}';
2077 		if (mbr)
2078 		    str = mbr;
2079 		mbr = lbr = NULL;
2080 	    }
2081 	    break;
2082 	case Comma:
2083 	    if (!lbr)
2084 		str[-1] = ',';
2085 	    else if (!comma)
2086 		comma = str - 1;
2087 	    break;
2088 	case '\0':
2089 	    if (lbr)
2090 		*lbr = '{';
2091 	    if (!mbr && !comma)
2092 		return 0;
2093 	    if (comma)
2094 		str = comma;
2095 	    if (mbr && mbr < str)
2096 		str = mbr;
2097 	    lbr = mbr = comma = NULL;
2098 	    break;
2099 	}
2100     }
2101 }
2102 
2103 /* expand stuff like >>*.c */
2104 
2105 /**/
2106 int
xpandredir(struct redir * fn,LinkList redirtab)2107 xpandredir(struct redir *fn, LinkList redirtab)
2108 {
2109     char *nam;
2110     struct redir *ff;
2111     int ret = 0;
2112     local_list1(fake);
2113 
2114     /* Stick the name in a list... */
2115     init_list1(fake, fn->name);
2116     /* ...which undergoes all the usual shell expansions */
2117     prefork(&fake, isset(MULTIOS) ? 0 : PREFORK_SINGLE, NULL);
2118     /* Globbing is only done for multios. */
2119     if (!errflag && isset(MULTIOS))
2120 	globlist(&fake, 0);
2121     if (errflag)
2122 	return 0;
2123     if (nonempty(&fake) && !nextnode(firstnode(&fake))) {
2124 	/* Just one match, the usual case. */
2125 	char *s = peekfirst(&fake);
2126 	fn->name = s;
2127 	untokenize(s);
2128 	if (fn->type == REDIR_MERGEIN || fn->type == REDIR_MERGEOUT) {
2129 	    if (IS_DASH(s[0]) && !s[1])
2130 		fn->type = REDIR_CLOSE;
2131 	    else if (s[0] == 'p' && !s[1])
2132 		fn->fd2 = -2;
2133 	    else {
2134 		while (idigit(*s))
2135 		    s++;
2136 		if (!*s && s > fn->name)
2137 		    fn->fd2 = zstrtol(fn->name, NULL, 10);
2138 		else if (fn->type == REDIR_MERGEIN)
2139 		    zerr("file number expected");
2140 		else
2141 		    fn->type = REDIR_ERRWRITE;
2142 	    }
2143 	}
2144     } else if (fn->type == REDIR_MERGEIN)
2145 	zerr("file number expected");
2146     else {
2147 	if (fn->type == REDIR_MERGEOUT)
2148 	    fn->type = REDIR_ERRWRITE;
2149 	while ((nam = (char *)ugetnode(&fake))) {
2150 	    /* Loop over matches, duplicating the *
2151 	     * redirection for each file found.   */
2152 	    ff = (struct redir *) zhalloc(sizeof *ff);
2153 	    *ff = *fn;
2154 	    ff->name = nam;
2155 	    addlinknode(redirtab, ff);
2156 	    ret = 1;
2157 	}
2158     }
2159     return ret;
2160 }
2161 
2162 /*
2163  * Check for a brace expansion of the form {<char>..<char>}.
2164  * On input str must be positioned at an Inbrace, but the sequence
2165  * of characters beyond that has not necessarily been checked.
2166  * Return 1 if found else 0.
2167  *
2168  * The other parameters are optionaland if the function returns 1 are
2169  * used to return:
2170  * - *c1p: the first character in the expansion.
2171  * - *c2p: the final character in the expansion.
2172  */
2173 
2174 /**/
2175 static int
bracechardots(char * str,convchar_t * c1p,convchar_t * c2p)2176 bracechardots(char *str, convchar_t *c1p, convchar_t *c2p)
2177 {
2178     convchar_t cstart, cend;
2179     char *pnext = str + 1, *pconv, convstr[2];
2180     if (itok(*pnext)) {
2181 	if (*pnext == Inbrace)
2182 	    return 0;
2183 	convstr[0] = ztokens[*pnext - Pound];
2184 	convstr[1] = '\0';
2185 	pconv = convstr;
2186     } else
2187 	pconv = pnext;
2188     MB_METACHARINIT();
2189     pnext += MB_METACHARLENCONV(pconv, &cstart);
2190     if (
2191 #ifdef MULTIBYTE_SUPPORT
2192 	cstart == WEOF ||
2193 #else
2194 	!cstart ||
2195 #endif
2196 	pnext[0] != '.' || pnext[1] != '.')
2197 	return 0;
2198     pnext += 2;
2199     if (!*pnext)
2200 	return 0;
2201     if (itok(*pnext)) {
2202 	if (*pnext == Inbrace)
2203 	    return 0;
2204 	convstr[0] = ztokens[*pnext - Pound];
2205 	convstr[1] = '\0';
2206 	pconv = convstr;
2207     } else
2208 	pconv = pnext;
2209     MB_METACHARINIT();
2210     pnext += MB_METACHARLENCONV(pconv, &cend);
2211     if (
2212 #ifdef MULTIBYTE_SUPPORT
2213 	cend == WEOF ||
2214 #else
2215 	!cend ||
2216 #endif
2217 	*pnext != Outbrace)
2218 	return 0;
2219     if (c1p)
2220 	*c1p = cstart;
2221     if (c2p)
2222 	*c2p = cend;
2223     return 1;
2224 }
2225 
2226 /* brace expansion */
2227 
2228 /**/
2229 mod_export void
xpandbraces(LinkList list,LinkNode * np)2230 xpandbraces(LinkList list, LinkNode *np)
2231 {
2232     LinkNode node = (*np), last = prevnode(node);
2233     char *str = (char *)getdata(node), *str3 = str, *str2;
2234     int prev, bc, comma, dotdot;
2235 
2236     for (; *str != Inbrace; str++);
2237     /* First, match up braces and see what we have. */
2238     for (str2 = str, bc = comma = dotdot = 0; *str2; ++str2)
2239 	if (*str2 == Inbrace)
2240 	    ++bc;
2241 	else if (*str2 == Outbrace) {
2242 	    if (--bc == 0)
2243 		break;
2244 	} else if (bc == 1) {
2245 	    if (*str2 == Comma)
2246 		++comma;	/* we have {foo,bar} */
2247 	    else if (*str2 == '.' && str2[1] == '.') {
2248 		dotdot++;	/* we have {num1..num2} */
2249 		++str2;
2250 	    }
2251 	}
2252     DPUTS(bc, "BUG: unmatched brace in xpandbraces()");
2253     if (!comma && dotdot) {
2254 	/* Expand range like 0..10 numerically: comma or recursive
2255 	   brace expansion take precedence. */
2256 	char *dots, *p, *dots2 = NULL;
2257 	LinkNode olast = last;
2258 	/* Get the first number of the range */
2259 	zlong rstart, rend;
2260 	int err = 0, rev = 0, rincr = 1;
2261 	int wid1, wid2, wid3, strp;
2262 	convchar_t cstart, cend;
2263 
2264 	if (bracechardots(str, &cstart, &cend)) {
2265 	    int lenalloc;
2266 	    /*
2267 	     * This is a character range.
2268 	     */
2269 	    if (cend < cstart) {
2270 		convchar_t ctmp = cend;
2271 		cend = cstart;
2272 		cstart = ctmp;
2273 		rev = 1;
2274 	    }
2275 	    uremnode(list, node);
2276 	    strp = str - str3;
2277 	    lenalloc = strp + strlen(str2+1) + 1;
2278 	    do {
2279 #ifdef MULTIBYTE_SUPPORT
2280 		char *ncptr;
2281 		int nclen;
2282 		mb_charinit();
2283 		ncptr = wcs_nicechar(cend, NULL, NULL);
2284 		nclen = strlen(ncptr);
2285 		p = zhalloc(lenalloc + nclen);
2286 		memcpy(p, str3, strp);
2287 		memcpy(p + strp, ncptr, nclen);
2288 		strcpy(p + strp + nclen, str2 + 1);
2289 #else
2290 		p = zhalloc(lenalloc + 1);
2291 		memcpy(p, str3, strp);
2292 		sprintf(p + strp, "%c", cend);
2293 		strcat(p + strp, str2 + 1);
2294 #endif
2295 		insertlinknode(list, last, p);
2296 		if (rev)	/* decreasing:  add in reverse order. */
2297 		    last = nextnode(last);
2298 	    } while (cend-- > cstart);
2299 	    *np = nextnode(olast);
2300 	    return;
2301 	}
2302 
2303 	/* Get the first number of the range */
2304 	rstart = zstrtol(str+1,&dots,10);
2305 	rend = 0;
2306 	wid1 = (dots - str) - 1;
2307 	wid2 = (str2 - dots) - 2;
2308 	wid3 = 0;
2309 	strp = str - str3;
2310 
2311 	if (dots == str + 1 || *dots != '.' || dots[1] != '.')
2312 	    err++;
2313 	else {
2314 	    /* Get the last number of the range */
2315 	    rend = zstrtol(dots+2,&p,10);
2316 	    if (p == dots+2)
2317 		err++;
2318 	    /* check for {num1..num2..incr} */
2319 	    if (p != str2) {
2320 		wid2 = (p - dots) - 2;
2321 		dots2 = p;
2322 		if (dotdot == 2 && *p == '.' && p[1] == '.') {
2323 		    rincr = zstrtol(p+2, &p, 10);
2324 		    wid3 = p - dots2 - 2;
2325 		    if (p != str2 || !rincr)
2326 			err++;
2327 		} else
2328 		    err++;
2329 	    }
2330 	}
2331 	if (!err) {
2332 	    /* If either no. begins with a zero, pad the output with   *
2333 	     * zeroes. Otherwise, set min width to 0 to suppress them.
2334 	     * str+1 is the first number in the range, dots+2 the last,
2335 	     * and dots2+2 is the increment if that's given. */
2336 	    /* TODO: sorry about this */
2337 	    int minw = (str[1] == '0' ||
2338 			(IS_DASH(str[1]) && str[2] == '0'))
2339 		       ? wid1
2340 		       : (dots[2] == '0' ||
2341 			  (IS_DASH(dots[2]) && dots[3] == '0'))
2342 		       ? wid2
2343 		       : (dots2 && (dots2[2] == '0' ||
2344 				    (IS_DASH(dots2[2]) && dots2[3] == '0')))
2345 		       ? wid3
2346 		       : 0;
2347 	    if (rincr < 0) {
2348 		/* Handle negative increment */
2349 		rincr = -rincr;
2350 		rev = !rev;
2351 	    }
2352 	    if (rstart > rend) {
2353 		/* Handle decreasing ranges correctly. */
2354 		zlong rt = rend;
2355 		rend = rstart;
2356 		rstart = rt;
2357 		rev = !rev;
2358 	    } else if (rincr > 1) {
2359 		/* when incr > 1, range is aligned to the highest number of str1,
2360 		 * compensate for this so that it is aligned to the first number */
2361 		rend -= (rend - rstart) % rincr;
2362 	    }
2363 	    uremnode(list, node);
2364 	    for (; rend >= rstart; rend -= rincr) {
2365 		/* Node added in at end, so do highest first */
2366 		p = dupstring(str3);
2367 #if defined(ZLONG_IS_LONG_LONG) && defined(PRINTF_HAS_LLD)
2368 		sprintf(p + strp, "%0*lld", minw, rend);
2369 #else
2370 		sprintf(p + strp, "%0*ld", minw, (long)rend);
2371 #endif
2372 		strcat(p + strp, str2 + 1);
2373 		insertlinknode(list, last, p);
2374 		if (rev)	/* decreasing:  add in reverse order. */
2375 		    last = nextnode(last);
2376 	    }
2377 	    *np = nextnode(olast);
2378 	    return;
2379 	}
2380     }
2381     if (!comma && isset(BRACECCL)) {	/* {a-mnop} */
2382 	/* Here we expand each character to a separate node,      *
2383 	 * but also ranges of characters like a-m.  ccl is a      *
2384 	 * set of flags saying whether each character is present; *
2385 	 * the final list is in lexical order.                    */
2386 	char ccl[256], *p;
2387 	unsigned char c1, c2;
2388 	unsigned int len, pl;
2389 	int lastch = -1;
2390 
2391 	uremnode(list, node);
2392 	memset(ccl, 0, sizeof(ccl) / sizeof(ccl[0]));
2393 	for (p = str + 1; p < str2;) {
2394 	    if (itok(c1 = *p++))
2395 		c1 = ztokens[c1 - STOUC(Pound)];
2396 	    if ((char) c1 == Meta)
2397 		c1 = 32 ^ *p++;
2398 	    if (itok(c2 = *p))
2399 		c2 = ztokens[c2 - STOUC(Pound)];
2400 	    if ((char) c2 == Meta)
2401 		c2 = 32 ^ p[1];
2402 	    if (IS_DASH((char)c1) && lastch >= 0 &&
2403 		p < str2 && lastch <= (int)c2) {
2404 		while (lastch < (int)c2)
2405 		    ccl[lastch++] = 1;
2406 		lastch = -1;
2407 	    } else
2408 		ccl[lastch = c1] = 1;
2409 	}
2410 	pl = str - str3;
2411 	len = pl + strlen(++str2) + 2;
2412 	for (p = ccl + 256; p-- > ccl;)
2413 	    if (*p) {
2414 		c1 = p - ccl;
2415 		if (imeta(c1)) {
2416 		    str = hcalloc(len + 1);
2417 		    str[pl] = Meta;
2418 		    str[pl+1] = c1 ^ 32;
2419 		    strcpy(str + pl + 2, str2);
2420 		} else {
2421 		    str = hcalloc(len);
2422 		    str[pl] = c1;
2423 		    strcpy(str + pl + 1, str2);
2424 		}
2425 		memcpy(str, str3, pl);
2426 		insertlinknode(list, last, str);
2427 	    }
2428 	*np = nextnode(last);
2429 	return;
2430     }
2431     prev = str++ - str3;
2432     str2++;
2433     uremnode(list, node);
2434     node = last;
2435     /* Finally, normal comma expansion               *
2436      * str1{foo,bar}str2 -> str1foostr2 str1barstr2. *
2437      * Any number of intervening commas is allowed.  */
2438     for (;;) {
2439 	char *zz, *str4;
2440 	int cnt;
2441 
2442 	for (str4 = str, cnt = 0; cnt || (*str != Comma && *str !=
2443 					  Outbrace); str++) {
2444 	    if (*str == Inbrace)
2445 		cnt++;
2446 	    else if (*str == Outbrace)
2447 		cnt--;
2448 	    DPUTS(!*str, "BUG: illegal brace expansion");
2449 	}
2450 	/* Concatenate the string before the braces (str3), the section *
2451 	 * just found (str4) and the text after the braces (str2)       */
2452 	zz = (char *) hcalloc(prev + (str - str4) + strlen(str2) + 1);
2453 	ztrncpy(zz, str3, prev);
2454 	strncat(zz, str4, str - str4);
2455 	strcat(zz, str2);
2456 	/* and add this text to the argument list. */
2457 	insertlinknode(list, node, zz);
2458 	incnode(node);
2459 	if (*str != Outbrace)
2460 	    str++;
2461 	else
2462 	    break;
2463     }
2464     *np = nextnode(last);
2465 }
2466 
2467 /* check to see if a matches b (b is not a filename pattern) */
2468 
2469 /**/
2470 int
matchpat(char * a,char * b)2471 matchpat(char *a, char *b)
2472 {
2473     Patprog p;
2474     int ret;
2475 
2476     queue_signals();	/* Protect PAT_STATIC */
2477 
2478     if (!(p = patcompile(b, PAT_STATIC, NULL))) {
2479 	zerr("bad pattern: %s", b);
2480 	ret = 0;
2481     } else
2482       ret = pattry(p, a);
2483 
2484     unqueue_signals();
2485 
2486     return ret;
2487 }
2488 
2489 /* do the ${foo%%bar}, ${foo#bar} stuff */
2490 /* please do not laugh at this code. */
2491 
2492 /* Having found a match in getmatch, decide what part of string
2493  * to return.  The matched part starts b characters into string imd->ustr
2494  * and finishes e characters in: 0 <= b <= e <= imd->ulen on input
2495  * (yes, empty matches should work).
2496  *
2497  * imd->flags is a set of the SUB_* matches defined in zsh.h from
2498  * SUB_MATCH onwards; the lower parts are ignored.
2499  *
2500  * imd->replstr is the replacement string for a substitution
2501  *
2502  * imd->replstr is metafied and the values put in imd->repllist are metafied.
2503  */
2504 
2505 /**/
2506 static char *
get_match_ret(Imatchdata imd,int b,int e)2507 get_match_ret(Imatchdata imd, int b, int e)
2508 {
2509     char buf[80], *r, *p, *rr, *replstr = imd->replstr;
2510     int ll = 0, bl = 0, t = 0, add = 0, fl = imd->flags, i;
2511 
2512     /* Account for b and e referring to unmetafied string */
2513     for (p = imd->ustr; p < imd->ustr + b; p++)
2514 	if (imeta(*p))
2515 	    add++;
2516     b += add;
2517     for (; p < imd->ustr + e; p++)
2518 	if (imeta(*p))
2519 	    add++;
2520     e += add;
2521 
2522     /* Everything now refers to metafied lengths. */
2523     if (replstr || (fl & SUB_LIST)) {
2524 	if (fl & SUB_DOSUBST) {
2525 	    replstr = dupstring(replstr);
2526 	    singsub(&replstr);
2527 	    untokenize(replstr);
2528 	}
2529 	if ((fl & (SUB_GLOBAL|SUB_LIST)) && imd->repllist) {
2530 	    /* We are replacing the chunk, just add this to the list */
2531 	    Repldata rd = (Repldata)
2532 		((fl & SUB_LIST) ? zalloc(sizeof(*rd)) : zhalloc(sizeof(*rd)));
2533 	    rd->b = b;
2534 	    rd->e = e;
2535 	    rd->replstr = replstr;
2536 	    if (fl & SUB_LIST)
2537 		zaddlinknode(imd->repllist, rd);
2538 	    else
2539 		addlinknode(imd->repllist, rd);
2540 	    return imd->mstr;
2541 	}
2542 	ll += strlen(replstr);
2543     }
2544     if (fl & SUB_MATCH)			/* matched portion */
2545 	ll += 1 + (e - b);
2546     if (fl & SUB_REST)		/* unmatched portion */
2547 	ll += 1 + (imd->mlen - (e - b));
2548     if (fl & SUB_BIND) {
2549 	/* position of start of matched portion */
2550 	sprintf(buf, "%d ", MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+b) + 1);
2551 	ll += (bl = strlen(buf));
2552     }
2553     if (fl & SUB_EIND) {
2554 	/* position of end of matched portion */
2555 	sprintf(buf + bl, "%d ",
2556 		MB_METASTRLEN2END(imd->mstr, 0, imd->mstr+e) + 1);
2557 	ll += (bl = strlen(buf));
2558     }
2559     if (fl & SUB_LEN) {
2560 	/* length of matched portion */
2561 	sprintf(buf + bl, "%d ", MB_METASTRLEN2END(imd->mstr+b, 0,
2562 						   imd->mstr+e));
2563 	ll += (bl = strlen(buf));
2564     }
2565     if (bl)
2566 	buf[bl - 1] = '\0';
2567 
2568     rr = r = (char *) hcalloc(ll);
2569 
2570     if (fl & SUB_MATCH) {
2571 	/* copy matched portion to new buffer */
2572 	for (i = b, p = imd->mstr + b; i < e; i++)
2573 	    *rr++ = *p++;
2574 	t = 1;
2575     }
2576     if (fl & SUB_REST) {
2577 	/* Copy unmatched portion to buffer.  If both portions *
2578 	 * requested, put a space in between (why?)            */
2579 	if (t)
2580 	    *rr++ = ' ';
2581 	/* there may be unmatched bits at both beginning and end of string */
2582 	for (i = 0, p = imd->mstr; i < b; i++)
2583 	    *rr++ = *p++;
2584 	if (replstr)
2585 	    for (p = replstr; *p; )
2586 		*rr++ = *p++;
2587 	for (i = e, p = imd->mstr + e; i < imd->mlen; i++)
2588 	    *rr++ = *p++;
2589 	t = 1;
2590     }
2591     *rr = '\0';
2592     if (bl) {
2593 	/* if there was a buffer (with a numeric result), add it; *
2594 	 * if there was other stuff too, stick in a space first.  */
2595 	if (t)
2596 	    *rr++ = ' ';
2597 	strcpy(rr, buf);
2598     }
2599     return r;
2600 }
2601 
2602 static Patprog
compgetmatch(char * pat,int * flp,char ** replstrp)2603 compgetmatch(char *pat, int *flp, char **replstrp)
2604 {
2605     Patprog p;
2606     /*
2607      * Flags to pattern compiler:  use static buffer since we only
2608      * have one pattern at a time; we will try the must-match test ourselves,
2609      * so tell the pattern compiler we are scanning.
2610      */
2611 
2612     /* int patflags = PAT_STATIC|PAT_SCAN|PAT_NOANCH;*/
2613 
2614     /* Unfortunately, PAT_STATIC doesn't work if we have a replstr with
2615      * something like ${x#...} in it which will be singsub()ed below because
2616      * that would overwrite the pattern buffer. */
2617 
2618     int patflags = PAT_SCAN|PAT_NOANCH | (*replstrp ? 0 : PAT_STATIC);
2619 
2620     /*
2621      * Search is anchored to the end of the string if we want to match
2622      * it all, or if we are matching at the end of the string and not
2623      * using substrings.
2624      */
2625     if ((*flp & SUB_ALL) || ((*flp & SUB_END) && !(*flp & SUB_SUBSTR)))
2626 	patflags &= ~PAT_NOANCH;
2627     p = patcompile(pat, patflags, NULL);
2628     if (!p) {
2629 	zerr("bad pattern: %s", pat);
2630 	return NULL;
2631     }
2632     if (*replstrp) {
2633 	if (p->patnpar || (p->globend & GF_MATCHREF)) {
2634 	    /*
2635 	     * Either backreferences or match references, so we
2636 	     * need to re-substitute replstr each time round.
2637 	     */
2638 	    *flp |= SUB_DOSUBST;
2639 	} else {
2640 	    singsub(replstrp);
2641 	    untokenize(*replstrp);
2642 	}
2643     }
2644 
2645     return p;
2646 }
2647 
2648 /*
2649  * This is called from paramsubst to get the match for ${foo#bar} etc.
2650  * fl is a set of the SUB_* flags defined in zsh.h
2651  * *sp points to the string we have to modify. The n'th match will be
2652  * returned in *sp. The heap is used to get memory for the result string.
2653  * replstr is the replacement string from a ${.../orig/repl}, in
2654  * which case pat is the original.
2655  *
2656  * n is now ignored unless we are looking for a substring, in
2657  * which case the n'th match from the start is counted such that
2658  * there is no more than one match from each position.
2659  */
2660 
2661 /**/
2662 int
getmatch(char ** sp,char * pat,int fl,int n,char * replstr)2663 getmatch(char **sp, char *pat, int fl, int n, char *replstr)
2664 {
2665     Patprog p;
2666 
2667     if (!(p = compgetmatch(pat, &fl, &replstr)))
2668 	return 1;
2669 
2670     return igetmatch(sp, p, fl, n, replstr, NULL);
2671 }
2672 
2673 /*
2674  * This is the corresponding function for array variables.
2675  * Matching is done with the same pattern on each element.
2676  */
2677 
2678 /**/
2679 void
getmatcharr(char *** ap,char * pat,int fl,int n,char * replstr)2680 getmatcharr(char ***ap, char *pat, int fl, int n, char *replstr)
2681 {
2682     char **arr = *ap, **pp;
2683     Patprog p;
2684 
2685     if (!(p = compgetmatch(pat, &fl, &replstr)))
2686 	return;
2687 
2688     *ap = pp = hcalloc(sizeof(char *) * (arrlen(arr) + 1));
2689     while ((*pp = *arr++))
2690 	if (igetmatch(pp, p, fl, n, replstr, NULL))
2691 	    pp++;
2692 }
2693 
2694 /*
2695  * Match against str using pattern pp; return a list of
2696  * Repldata matches in the linked list *repllistp; this is
2697  * in permanent storage and to be freed by freematchlist()
2698  */
2699 
2700 /**/
2701 mod_export int
getmatchlist(char * str,Patprog p,LinkList * repllistp)2702 getmatchlist(char *str, Patprog p, LinkList *repllistp)
2703 {
2704     char **sp = &str;
2705 
2706     /*
2707      * We don't care if we have longest or shortest match, but SUB_LONG
2708      * is cheaper since the pattern code does that by default.
2709      * We need SUB_GLOBAL to get all matches.
2710      * We need SUB_SUBSTR to scan through for substrings.
2711      * We need SUB_LIST to activate the special handling of the list
2712      * passed in.
2713      */
2714     return igetmatch(sp, p, SUB_LONG|SUB_GLOBAL|SUB_SUBSTR|SUB_LIST,
2715 		     0, NULL, repllistp);
2716 }
2717 
2718 static void
freerepldata(void * ptr)2719 freerepldata(void *ptr)
2720 {
2721     zfree(ptr, sizeof(struct repldata));
2722 }
2723 
2724 /**/
2725 mod_export void
freematchlist(LinkList repllist)2726 freematchlist(LinkList repllist)
2727 {
2728     freelinklist(repllist, freerepldata);
2729 }
2730 
2731 /**/
2732 static void
set_pat_start(Patprog p,int offs)2733 set_pat_start(Patprog p, int offs)
2734 {
2735     /*
2736      * If we are messing around with the test string by advancing up
2737      * it from the start, we need to tell the pattern matcher that
2738      * a start-of-string assertion, i.e. (#s), should fail.  Hence
2739      * we test whether the offset of the real start of string from
2740      * the actual start, passed as offs, is zero.
2741      */
2742     if (offs)
2743 	p->flags |= PAT_NOTSTART;
2744     else
2745 	p->flags &= ~PAT_NOTSTART;
2746 }
2747 
2748 /**/
2749 static void
set_pat_end(Patprog p,char null_me)2750 set_pat_end(Patprog p, char null_me)
2751 {
2752     /*
2753      * If we are messing around with the string by shortening it at the
2754      * tail, we need to tell the pattern matcher that an end-of-string
2755      * assertion, i.e. (#e), should fail.  Hence we test whether
2756      * the character null_me about to be zapped is or is not already a null.
2757      */
2758     if (null_me)
2759 	p->flags |= PAT_NOTEND;
2760     else
2761 	p->flags &= ~PAT_NOTEND;
2762 }
2763 
2764 /**/
2765 #ifdef MULTIBYTE_SUPPORT
2766 
2767 /*
2768  * Increment *tp over character which may be multibyte.
2769  * Return number of bytes.
2770  * All unmetafied here.
2771  */
2772 
2773 /**/
iincchar(char ** tp,int left)2774 static int iincchar(char **tp, int left)
2775 {
2776     char *t = *tp;
2777     int mbclen = mb_charlenconv(t, left, NULL);
2778     *tp = t + mbclen;
2779 
2780     return mbclen;
2781 }
2782 
2783 /**/
2784 static int
igetmatch(char ** sp,Patprog p,int fl,int n,char * replstr,LinkList * repllistp)2785 igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
2786 	  LinkList *repllistp)
2787 {
2788     char *s = *sp, *t, *tmatch, *send;
2789     /*
2790      * Note that ioff counts (possibly multibyte) characters in the
2791      * character set (Meta's are not included), while l counts characters in
2792      * the metafied string.
2793      *
2794      * umlen is a counter for (unmetafied) byte lengths---neither characters
2795      * nor raw byte indices; this is simply an optimisation for allocation.
2796      * umltot is the full length of the string in this scheme.
2797      *
2798      * l is the raw string length, used together with any pointers into
2799      * the string (typically t).
2800      */
2801     int ioff, l = strlen(*sp), matched = 1, umltot = ztrlen(*sp);
2802     int umlen, nmatches;
2803     struct patstralloc patstralloc;
2804     struct imatchdata imd;
2805 
2806     (void)patallocstr(p, s, l, umltot, 1, &patstralloc);
2807     s = patstralloc.alloced;
2808     DPUTS(!s, "forced patallocstr failed");
2809     send = s + umltot;
2810 
2811     imd.mstr = *sp;
2812     imd.mlen = l;
2813     imd.ustr = s;
2814     imd.ulen = umltot;
2815     imd.flags = fl;
2816     imd.replstr = replstr;
2817     imd.repllist = NULL;
2818 
2819     /* perform must-match test for complex closures */
2820     if (p->mustoff)
2821     {
2822 	char *muststr = (char *)p + p->mustoff;
2823 
2824 	matched = 0;
2825 	if (p->patmlen <= umltot)
2826 	{
2827 	    for (t = s; t <= send - p->patmlen; t++)
2828 	    {
2829 		if (!memcmp(muststr, t, p->patmlen)) {
2830 		    matched = 1;
2831 		    break;
2832 		}
2833 	    }
2834 	}
2835     }
2836 
2837     /* in case we used the prog before... */
2838     p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
2839 
2840     if (fl & SUB_ALL) {
2841 	int i = matched && pattrylen(p, s, umltot, 0, &patstralloc, 0);
2842 	if (!i) {
2843 	    /* Perform under no-match conditions */
2844 	    umltot = 0;
2845 	    imd.replstr = NULL;
2846 	}
2847 	*sp = get_match_ret(&imd, 0, umltot);
2848 	if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
2849 	    return 0;
2850 	return 1;
2851     }
2852     if (matched) {
2853 	/*
2854 	 * The default behaviour is to match at the start; this
2855 	 * is modified by SUB_END and SUB_SUBSTR.  SUB_END matches
2856 	 * at the end of the string instead of the start.  SUB_SUBSTR
2857 	 * without SUB_END matches substrings searching from the start;
2858 	 * with SUB_END it matches substrings searching from the end.
2859 	 *
2860 	 * The possibilities are further modified by whether we want the
2861 	 * longest (SUB_LONG) or shortest possible match.
2862 	 *
2863 	 * SUB_START is only used in the case where we are also
2864 	 * forcing a match at the end (SUB_END with no SUB_SUBSTR,
2865 	 * with or without SUB_LONG), to indicate we should match
2866 	 * the entire string.
2867 	 */
2868 	switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) {
2869 	case 0:
2870 	case SUB_LONG:
2871 	    /*
2872 	     * Largest/smallest possible match at head of string.
2873 	     * First get the longest match...
2874 	     */
2875 	    if (pattrylen(p, s, umltot, 0, &patstralloc, 0)) {
2876 		/* patmatchlen returns unmetafied length in this case */
2877 	        int mlen = patmatchlen();
2878 		if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2879 		    send = s + mlen;
2880 		    /*
2881 		     * ... now we know whether it's worth looking for the
2882 		     * shortest, which we do by brute force.
2883 		     */
2884 		    mb_charinit();
2885 		    for (t = s, umlen = 0; t < send; ) {
2886 			set_pat_end(p, *t);
2887 			if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) {
2888 			    mlen = patmatchlen();
2889 			    break;
2890 			}
2891 			umlen += iincchar(&t, send - t);
2892 		    }
2893 		}
2894 		*sp = get_match_ret(&imd, 0, mlen);
2895 		return 1;
2896 	    }
2897 	    break;
2898 
2899 	case SUB_END:
2900 	    /*
2901 	     * Smallest possible match at tail of string.
2902 	     * As we can only be sure we've got wide characters right
2903 	     * when going forwards, we need to match at every point
2904 	     * until we fail and record the last successful match.
2905 	     *
2906 	     * It's important that we return the last successful match
2907 	     * so that match, mbegin, mend and MATCH, MBEGIN, MEND are
2908 	     * correct.
2909 	     */
2910 	    mb_charinit();
2911 	    tmatch = NULL;
2912 	    set_pat_start(p, l);
2913 	    if (pattrylen(p, send, 0, 0, &patstralloc, umltot) &&
2914 		!--n) {
2915 		*sp = get_match_ret(&imd, umltot, umltot);
2916 		return 1;
2917 	    }
2918 	    for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
2919 		set_pat_start(p, t-s);
2920 		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff))
2921 		    tmatch = t;
2922 		if (fl & SUB_START)
2923 		    break;
2924 		umlen -= iincchar(&t, send - t);
2925 	    }
2926 	    if (tmatch) {
2927 		*sp = get_match_ret(&imd, tmatch - s, umltot);
2928 		return 1;
2929 	    }
2930 	    if (!(fl & SUB_START) && pattrylen(p, s + umltot, 0, 0,
2931 					       &patstralloc, ioff)) {
2932 		*sp = get_match_ret(&imd, umltot, umltot);
2933 		return 1;
2934 	    }
2935 	    break;
2936 
2937 	case (SUB_END|SUB_LONG):
2938 	    /* Largest possible match at tail of string:       *
2939 	     * move forward along string until we get a match. *
2940 	     * Again there's no optimisation.                  */
2941 	    mb_charinit();
2942 	    for (ioff = 0, t = s, umlen = umltot; t <= send ; ioff++) {
2943 		set_pat_start(p, t-s);
2944 		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
2945 		    *sp = get_match_ret(&imd, t-s, umltot);
2946 		    return 1;
2947 		}
2948 		if (fl & SUB_START)
2949 		    break;
2950 		if (t == send)
2951 		    break;
2952 		umlen -= iincchar(&t, send - t);
2953 	    }
2954 	    if (!(fl & SUB_START) && pattrylen(p, send, 0, 0,
2955 					       &patstralloc, ioff)) {
2956 		*sp = get_match_ret(&imd, umltot, umltot);
2957 		return 1;
2958 	    }
2959 	    break;
2960 
2961 	case SUB_SUBSTR:
2962 	    /* Smallest at start, but matching substrings. */
2963 	    set_pat_start(p, l);
2964 	    if (!(fl & SUB_GLOBAL) &&
2965 		pattrylen(p, send, 0, 0, &patstralloc, 0) &&
2966 		!--n) {
2967 		*sp = get_match_ret(&imd, 0, 0);
2968 		return 1;
2969 	    } /* fall through */
2970 	case (SUB_SUBSTR|SUB_LONG):
2971 	    /* longest or smallest at start with substrings */
2972 	    t = s;
2973 	    if (fl & SUB_GLOBAL) {
2974 		imd.repllist = (fl & SUB_LIST) ? znewlinklist() : newlinklist();
2975 		if (repllistp)
2976 		     *repllistp = imd.repllist;
2977 	    }
2978 	    ioff = 0;		/* offset into string */
2979 	    umlen = umltot;
2980 	    mb_charinit();
2981 	    do {
2982 		/* loop over all matches for global substitution */
2983 		matched = 0;
2984 		for (; t <= send; ioff++) {
2985 		    /* Find the longest match from this position. */
2986 		    set_pat_start(p, t-s);
2987 		    if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
2988 			char *mpos = t + patmatchlen();
2989 			if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
2990 			    char *ptr;
2991 			    int umlen2;
2992 			    /*
2993 			     * If searching for the shortest match,
2994 			     * start with a zero length and increase
2995 			     * it until we reach the longest possible
2996 			     * match, accepting the first successful
2997 			     * match.
2998 			     */
2999 			    for (ptr = t, umlen2 = 0; ptr < mpos;) {
3000 				set_pat_end(p, *ptr);
3001 				if (pattrylen(p, t, umlen2, 0,
3002 					      &patstralloc, ioff)) {
3003 				    mpos = t + patmatchlen();
3004 				    break;
3005 				}
3006 				umlen2 += iincchar(&ptr, mpos - ptr);
3007 			    }
3008 			}
3009 			if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
3010 			    *sp = get_match_ret(&imd, t-s, mpos-s);
3011 			    if (mpos == t)
3012 				mpos += mb_charlenconv(mpos, send - mpos, NULL);
3013 			}
3014 			if (!(fl & SUB_GLOBAL)) {
3015 			    if (n) {
3016 				/*
3017 				 * Looking for a later match: in this case,
3018 				 * we can continue looking for matches from
3019 				 * the next character, even if it overlaps
3020 				 * with what we just found.
3021 				 */
3022 				umlen -= iincchar(&t, send - t);
3023 				continue;
3024 			    } else {
3025 				return 1;
3026 			    }
3027 			}
3028 			/*
3029 			 * For a global match, we need to skip the stuff
3030 			 * which is already marked for replacement.
3031 			 */
3032 			matched = 1;
3033 			if (t == send)
3034 			    break;
3035 			while (t < mpos) {
3036 			    ioff++;
3037 			    umlen -= iincchar(&t, send - t);
3038 			}
3039 			break;
3040 		    }
3041 		    if (t == send)
3042 			break;
3043 		    umlen -= iincchar(&t, send - t);
3044 		}
3045 	    } while (matched && t < send);
3046 	    /*
3047 	     * check if we can match a blank string, if so do it
3048 	     * at the start.  Goodness knows if this is a good idea
3049 	     * with global substitution, so it doesn't happen.
3050 	     */
3051 	    set_pat_start(p, l);
3052 	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
3053 		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
3054 		*sp = get_match_ret(&imd, 0, 0);
3055 		return 1;
3056 	    }
3057 	    break;
3058 
3059 	case (SUB_END|SUB_SUBSTR):
3060 	case (SUB_END|SUB_LONG|SUB_SUBSTR):
3061 	    /* Longest/shortest at end, matching substrings.       */
3062 	    {
3063 		set_pat_start(p, l);
3064 		if (pattrylen(p, send, 0, 0, &patstralloc, umltot) &&
3065 		    !--n) {
3066 		    *sp = get_match_ret(&imd, umltot, umltot);
3067 		    return 1;
3068 		}
3069 	    }
3070 	    /*
3071 	     * If multibyte characters are present we need to start from the
3072 	     * beginning.  This is a bit unpleasant because we can't tell in
3073 	     * advance how many times it will match and from where, so if n is
3074 	     * greater then 1 we will need to count the number of times it
3075 	     * matched and then go through again until we reach the right
3076 	     * point.  (Either that or record every single match in a list,
3077 	     * which isn't stupid; it involves more memory management at this
3078 	     * level but less use of the pattern matcher.)
3079 	     */
3080 	    nmatches = 0;
3081 	    tmatch = NULL;
3082 	    mb_charinit();
3083 	    for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
3084 		set_pat_start(p, t-s);
3085 		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
3086 		    nmatches++;
3087 		    tmatch = t;
3088 		}
3089 		umlen -= iincchar(&t, send - t);
3090 	    }
3091 	    if (nmatches) {
3092 		char *mpos;
3093 		if (n > 1) {
3094 		    /*
3095 		     * We need to find the n'th last match.
3096 		     */
3097 		    n = nmatches - n;
3098 		    mb_charinit();
3099 		    for (ioff = 0, t = s, umlen = umltot; t < send; ioff++) {
3100 			set_pat_start(p, t-s);
3101 			if (pattrylen(p, t, umlen, 0, &patstralloc, ioff) &&
3102 			    !n--) {
3103 			    tmatch = t;
3104 			    break;
3105 			}
3106 			umlen -= iincchar(&t, send - t);
3107 		    }
3108 		}
3109 		mpos = tmatch + patmatchlen();
3110 		/* Look for the shortest match if necessary */
3111 		if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
3112 		    for (t = tmatch, umlen = 0; t < mpos; ) {
3113 			set_pat_end(p, *t);
3114 			if (pattrylen(p, tmatch, umlen, 0,
3115 				      &patstralloc, ioff)) {
3116 			    mpos = tmatch + patmatchlen();
3117 			    break;
3118 			}
3119 			umlen += iincchar(&t, mpos - t);
3120 		    }
3121 		}
3122 		*sp = get_match_ret(&imd, tmatch-s, mpos-s);
3123 		return 1;
3124 	    }
3125 	    set_pat_start(p, l);
3126 	    if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0,
3127 					     &patstralloc, umltot) &&
3128 		!--n) {
3129 		*sp = get_match_ret(&imd, umltot, umltot);
3130 		return 1;
3131 	    }
3132 	    break;
3133 	}
3134     }
3135 
3136     if (imd.repllist && nonempty(imd.repllist)) {
3137 	/* Put all the bits of a global search and replace together. */
3138 	LinkNode nd;
3139 	Repldata rd;
3140 	int lleft;
3141 	char *ptr, *start;
3142 	int i;
3143 
3144 	/*
3145 	 * Use metafied string again.
3146 	 * Results from get_match_ret in repllist are all metafied.
3147 	 */
3148 	s = *sp;
3149 	if (!(fl & SUB_LIST)) {
3150 	    lleft = 0;		/* size of returned string */
3151 	    i = 0;	       /* start of last chunk we got from *sp */
3152 	    for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
3153 		rd = (Repldata) getdata(nd);
3154 		lleft += rd->b - i; /* previous chunk of *sp */
3155 		lleft += strlen(rd->replstr);	/* the replaced bit */
3156 		i = rd->e;		/* start of next chunk of *sp */
3157 	    }
3158 	    lleft += l - i;	/* final chunk from *sp */
3159 	    start = t = zhalloc(lleft+1);
3160 	    i = 0;
3161 	    for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
3162 		rd = (Repldata) getdata(nd);
3163 		memcpy(t, s + i, rd->b - i);
3164 		t += rd->b - i;
3165 		ptr = rd->replstr;
3166 		while (*ptr)
3167 		    *t++ = *ptr++;
3168 		i = rd->e;
3169 	    }
3170 	    memcpy(t, s + i, l - i);
3171 	    start[lleft] = '\0';
3172 	    *sp = (char *)start;
3173 	}
3174 	return 1;
3175     }
3176     if (fl & SUB_LIST) {	/* safety: don't think this can happen */
3177 	return 0;
3178     }
3179 
3180     /* munge the whole string: no match, so no replstr */
3181     imd.replstr = NULL;
3182     imd.repllist = NULL;
3183     *sp = get_match_ret(&imd, 0, 0);
3184     return (fl & SUB_RETFAIL) ? 0 : 1;
3185 }
3186 
3187 /**/
3188 #else
3189 
3190 /*
3191  * Increment pointer which may be on a Meta (x is a pointer variable),
3192  * returning the incremented value (i.e. like pre-increment).
3193  */
3194 #define METAINC(x)	((x) += (*(x) == Meta) ? 2 : 1)
3195 
3196 /**/
3197 static int
igetmatch(char ** sp,Patprog p,int fl,int n,char * replstr,LinkList * repllistp)3198 igetmatch(char **sp, Patprog p, int fl, int n, char *replstr,
3199 	  LinkList *repllistp)
3200 {
3201     char *s = *sp, *t, *send;
3202     /*
3203      * Note that ioff and uml count characters in the character
3204      * set (Meta's are not included), while l counts characters in the
3205      * metafied string.  umlen is a counter for (unmetafied) character
3206      * lengths.
3207      */
3208     int ioff, l = strlen(*sp), uml = ztrlen(*sp), matched = 1, umlen;
3209     struct patstralloc patstralloc;
3210     struct imatchdata imd;
3211 
3212     (void)patallocstr(p, s, l, uml, 1, &patstralloc);
3213     s = patstralloc.alloced;
3214     DPUTS(!s, "forced patallocstr failed");
3215     send = s + uml;
3216 
3217     imd.mstr = *sp;
3218     imd.mlen = l;
3219     imd.ustr = s;
3220     imd.ulen = uml;
3221     imd.flags = fl;
3222     imd.replstr = replstr;
3223     imd.repllist = NULL;
3224 
3225     /* perform must-match test for complex closures */
3226     if (p->mustoff)
3227     {
3228 	char *muststr = (char *)p + p->mustoff;
3229 
3230 	matched = 0;
3231 	if (p->patmlen <= uml)
3232 	{
3233 	    for (t = s; t <= send - p->patmlen; t++)
3234 	    {
3235 		if (!memcmp(muststr, t, p->patmlen)) {
3236 		    matched = 1;
3237 		    break;
3238 		}
3239 	    }
3240 	}
3241     }
3242 
3243     /* in case we used the prog before... */
3244     p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
3245 
3246     if (fl & SUB_ALL) {
3247 	int i = matched && pattrylen(p, s, uml, 0, &patstralloc, 0);
3248 	if (!i)
3249 	    imd.replstr = NULL;
3250 	*sp = get_match_ret(&imd, 0, i ? l : 0);
3251 	if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
3252 	    return 0;
3253 	return 1;
3254     }
3255     if (matched) {
3256 	switch (fl & (SUB_END|SUB_LONG|SUB_SUBSTR)) {
3257 	case 0:
3258 	case SUB_LONG:
3259 	    /*
3260 	     * Largest/smallest possible match at head of string.
3261 	     * First get the longest match...
3262 	     */
3263 	    if (pattrylen(p, s, uml, 0, &patstralloc, 0)) {
3264 		/* patmatchlen returns metafied length, as we need */
3265 	        int mlen = patmatchlen();
3266 		if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
3267 		    send = s + mlen;
3268 		    /*
3269 		     * ... now we know whether it's worth looking for the
3270 		     * shortest, which we do by brute force.
3271 		     */
3272 		    for (t = s, umlen = 0; t < s + mlen; METAINC(t), umlen++) {
3273 			set_pat_end(p, *t);
3274 			if (pattrylen(p, s, umlen, 0, &patstralloc, 0)) {
3275 			    mlen = patmatchlen();
3276 			    break;
3277 			}
3278 		    }
3279 		}
3280 		*sp = get_match_ret(&imd, 0, mlen);
3281 		return 1;
3282 	    }
3283 	    break;
3284 
3285 	case SUB_END:
3286 	    /* Smallest possible match at tail of string:  *
3287 	     * move back down string until we get a match. *
3288 	     * There's no optimization here.               */
3289 	    for (ioff = uml, t = send, umlen = 0; t >= s;
3290 		 t--, ioff--, umlen++) {
3291 		set_pat_start(p, t-s);
3292 		if (pattrylen(p, t, umlen, 0, &patstralloc, ioff)) {
3293 		    *sp = get_match_ret(&imd, t - s, uml);
3294 		    return 1;
3295 		}
3296 	    }
3297 	    break;
3298 
3299 	case (SUB_END|SUB_LONG):
3300 	    /* Largest possible match at tail of string:       *
3301 	     * move forward along string until we get a match. *
3302 	     * Again there's no optimisation.                  */
3303 	    for (ioff = 0, t = s, umlen = uml; t < send;
3304 		 ioff++, t++, umlen--) {
3305 		set_pat_start(p, t-s);
3306 		if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) {
3307 		    *sp = get_match_ret(&imd, t-s, uml);
3308 		    return 1;
3309 		}
3310 	    }
3311 	    break;
3312 
3313 	case SUB_SUBSTR:
3314 	    /* Smallest at start, but matching substrings. */
3315 	    set_pat_start(p, l);
3316 	    if (!(fl & SUB_GLOBAL) &&
3317 		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
3318 		*sp = get_match_ret(&imd, 0, 0);
3319 		return 1;
3320 	    } /* fall through */
3321 	case (SUB_SUBSTR|SUB_LONG):
3322 	    /* longest or smallest at start with substrings */
3323 	    t = s;
3324 	    if (fl & SUB_GLOBAL) {
3325 		imd.repllist = newlinklist();
3326 		if (repllistp)
3327 		    *repllistp = imd.repllist;
3328 	    }
3329 	    ioff = 0;		/* offset into string */
3330 	    umlen = uml;
3331 	    do {
3332 		/* loop over all matches for global substitution */
3333 		matched = 0;
3334 		for (; t < send; t++, ioff++, umlen--) {
3335 		    /* Find the longest match from this position. */
3336 		    set_pat_start(p, t-s);
3337 		    if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff)) {
3338 			char *mpos = t + patmatchlen();
3339 			if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
3340 			    char *ptr;
3341 			    int umlen2;
3342 			    for (ptr = t, umlen2 = 0; ptr < mpos;
3343 				 ptr++, umlen2++) {
3344 				set_pat_end(p, *ptr);
3345 				if (pattrylen(p, t, ptr - t, umlen2,
3346 					      &patstralloc, ioff)) {
3347 				    mpos = t + patmatchlen();
3348 				    break;
3349 				}
3350 			    }
3351 			}
3352 			if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
3353 			    *sp = get_match_ret(&imd, t-s, mpos-s);
3354 			    if (mpos == t)
3355 				mpos++;
3356 			}
3357 			if (!(fl & SUB_GLOBAL)) {
3358 			    if (n) {
3359 				/*
3360 				 * Looking for a later match: in this case,
3361 				 * we can continue looking for matches from
3362 				 * the next character, even if it overlaps
3363 				 * with what we just found.
3364 				 */
3365 				continue;
3366 			    } else {
3367 				return 1;
3368 			    }
3369 			}
3370 			/*
3371 			 * For a global match, we need to skip the stuff
3372 			 * which is already marked for replacement.
3373 			 */
3374 			matched = 1;
3375 			while (t < mpos) {
3376 			    ioff++;
3377 			    umlen--;
3378 			    t++;
3379 			}
3380 			break;
3381 		    }
3382 		}
3383 	    } while (matched);
3384 	    /*
3385 	     * check if we can match a blank string, if so do it
3386 	     * at the start.  Goodness knows if this is a good idea
3387 	     * with global substitution, so it doesn't happen.
3388 	     */
3389 	    set_pat_start(p, l);
3390 	    if ((fl & (SUB_LONG|SUB_GLOBAL)) == SUB_LONG &&
3391 		pattrylen(p, send, 0, 0, &patstralloc, 0) && !--n) {
3392 		*sp = get_match_ret(&imd, 0, 0);
3393 		return 1;
3394 	    }
3395 	    break;
3396 
3397 	case (SUB_END|SUB_SUBSTR):
3398 	case (SUB_END|SUB_LONG|SUB_SUBSTR):
3399 	    /* Longest/shortest at end, matching substrings.       */
3400 	    {
3401 		set_pat_start(p, l);
3402 		if (pattrylen(p, send, 0, 0, &patstralloc, uml) && !--n) {
3403 		    *sp = get_match_ret(&imd, uml, uml);
3404 		    return 1;
3405 		}
3406 	    }
3407 	    for (ioff = uml - 1, t = send - 1, umlen = 1; t >= s;
3408 		 t--, ioff--, umlen++) {
3409 		set_pat_start(p, t-s);
3410 		if (pattrylen(p, t, send - t, umlen, &patstralloc, ioff) &&
3411 		    !--n) {
3412 		    /* Found the longest match */
3413 		    char *mpos = t + patmatchlen();
3414 		    if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
3415 			char *ptr;
3416 			int umlen2;
3417 			for (ptr = t, umlen2 = 0; ptr < mpos;
3418 			     ptr++, umlen2++) {
3419 			    set_pat_end(p, *ptr);
3420 			    if (pattrylen(p, t, umlen2, 0, &patstralloc,
3421 					  ioff)) {
3422 				mpos = t + patmatchlen();
3423 				break;
3424 			    }
3425 			}
3426 		    }
3427 		    *sp = get_match_ret(&imd, t-s, mpos-s);
3428 		    return 1;
3429 		}
3430 	    }
3431 	    set_pat_start(p, l);
3432 	    if ((fl & SUB_LONG) && pattrylen(p, send, 0, 0,
3433 					     &patstralloc, uml) &&
3434 		!--n) {
3435 		*sp = get_match_ret(&imd, uml, uml);
3436 		return 1;
3437 	    }
3438 	    break;
3439 	}
3440     }
3441 
3442     if (imd.repllist && nonempty(imd.repllist)) {
3443 	/* Put all the bits of a global search and replace together. */
3444 	LinkNode nd;
3445 	Repldata rd;
3446 	int lleft = 0;		/* size of returned string */
3447 	char *ptr, *start;
3448 	int i;
3449 
3450 	/*
3451 	 * Use metafied string again.
3452 	 * Results from get_match_ret in repllist are all metafied.
3453 	 */
3454 	s = *sp;
3455 	i = 0;			/* start of last chunk we got from *sp */
3456 	for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
3457 	    rd = (Repldata) getdata(nd);
3458 	    lleft += rd->b - i; /* previous chunk of *sp */
3459 	    lleft += strlen(rd->replstr);	/* the replaced bit */
3460 	    i = rd->e;		/* start of next chunk of *sp */
3461 	}
3462 	lleft += l - i;	/* final chunk from *sp */
3463 	start = t = zhalloc(lleft+1);
3464 	i = 0;
3465 	for (nd = firstnode(imd.repllist); nd; incnode(nd)) {
3466 	    rd = (Repldata) getdata(nd);
3467 	    memcpy(t, s + i, rd->b - i);
3468 	    t += rd->b - i;
3469 	    ptr = rd->replstr;
3470 	    while (*ptr)
3471 		*t++ = *ptr++;
3472 	    i = rd->e;
3473 	}
3474 	memcpy(t, s + i, l - i);
3475 	start[lleft] = '\0';
3476 	*sp = (char *)start;
3477 	return 1;
3478     }
3479 
3480     /* munge the whole string: no match, so no replstr */
3481     imd.replstr = NULL;
3482     imd.repllist = NULL;
3483     *sp = get_match_ret(&imd, 0, 0);
3484     return 1;
3485 }
3486 
3487 /**/
3488 #endif /* MULTIBYTE_SUPPORT */
3489 
3490 /* blindly turn a string into a tokenised expression without lexing */
3491 
3492 /**/
3493 mod_export void
tokenize(char * s)3494 tokenize(char *s)
3495 {
3496     zshtokenize(s, 0);
3497 }
3498 
3499 /*
3500  * shtokenize is used when we tokenize a string with GLOB_SUBST set.
3501  * In that case we need to retain backslashes when we turn the
3502  * pattern back into a string, so that the string is not
3503  * modified if it failed to match a pattern.
3504  *
3505  * It may be modified by the effect of SH_GLOB which turns off
3506  * various zsh-specific options.
3507  */
3508 
3509 /**/
3510 mod_export void
shtokenize(char * s)3511 shtokenize(char *s)
3512 {
3513     int flags = ZSHTOK_SUBST;
3514     if (isset(SHGLOB))
3515 	flags |= ZSHTOK_SHGLOB;
3516     zshtokenize(s, flags);
3517 }
3518 
3519 /**/
3520 static void
zshtokenize(char * s,int flags)3521 zshtokenize(char *s, int flags)
3522 {
3523     char *t;
3524     int bslash = 0;
3525 
3526     for (; *s; s++) {
3527       cont:
3528 	switch (*s) {
3529 	case Meta:
3530 	    /* skip both Meta and following character */
3531 	    s++;
3532 	    break;
3533 	case Bnull:
3534 	case Bnullkeep:
3535 	case '\\':
3536 	    if (bslash) {
3537 		s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
3538 		break;
3539 	    }
3540 	    bslash = 1;
3541 	    continue;
3542 	case '<':
3543 	    if (flags & ZSHTOK_SHGLOB)
3544 		break;
3545 	    if (bslash) {
3546 		s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
3547 		break;
3548 	    }
3549 	    t = s;
3550 	    while (idigit(*++s));
3551 	    if (!IS_DASH(*s))
3552 		goto cont;
3553 	    while (idigit(*++s));
3554 	    if (*s != '>')
3555 		goto cont;
3556 	    *t = Inang;
3557 	    *s = Outang;
3558 	    break;
3559 	case '(':
3560 	case '|':
3561 	case ')':
3562 	    if (flags & ZSHTOK_SHGLOB)
3563 		break;
3564 	    /*FALLTHROUGH*/
3565 	case '>':
3566 	case '^':
3567 	case '#':
3568 	case '~':
3569 	case '[':
3570 	case ']':
3571 	case '*':
3572 	case '?':
3573 	case '=':
3574 	case '-':
3575 	case '!':
3576 	    for (t = ztokens; *t; t++) {
3577 		if (*t == *s) {
3578 		    if (bslash)
3579 			s[-1] = (flags & ZSHTOK_SUBST) ? Bnullkeep : Bnull;
3580 		    else
3581 			*s = (t - ztokens) + Pound;
3582 		    break;
3583 		}
3584 	    }
3585 	    break;
3586 	}
3587 	bslash = 0;
3588     }
3589 }
3590 
3591 /* remove unnecessary Nulargs */
3592 
3593 /**/
3594 mod_export void
remnulargs(char * s)3595 remnulargs(char *s)
3596 {
3597     if (*s) {
3598 	char *o = s, c;
3599 
3600 	while ((c = *s++))
3601 	    if (c == Bnullkeep) {
3602 		/*
3603 		 * An active backslash that needs to be turned back into
3604 		 * a real backslash for output.  However, we don't
3605 		 * do that yet since we need to ignore it during
3606 		 * pattern matching.
3607 		 */
3608 		continue;
3609 	    } else if (inull(c)) {
3610 		char *t = s - 1;
3611 
3612 		while ((c = *s++)) {
3613 		    if (c == Bnullkeep)
3614 			*t++ = '\\';
3615 		    else if (!inull(c))
3616 			*t++ = c;
3617 		}
3618 		*t = '\0';
3619 		if (!*o) {
3620 		    o[0] = Nularg;
3621 		    o[1] = '\0';
3622 		}
3623 		break;
3624 	    }
3625     }
3626 }
3627 
3628 /* qualifier functions:  mostly self-explanatory, see glob(). */
3629 
3630 /* device number */
3631 
3632 /**/
3633 static int
qualdev(UNUSED (char * name),struct stat * buf,off_t dv,UNUSED (char * dummy))3634 qualdev(UNUSED(char *name), struct stat *buf, off_t dv, UNUSED(char *dummy))
3635 {
3636     return (off_t)buf->st_dev == dv;
3637 }
3638 
3639 /* number of hard links to file */
3640 
3641 /**/
3642 static int
qualnlink(UNUSED (char * name),struct stat * buf,off_t ct,UNUSED (char * dummy))3643 qualnlink(UNUSED(char *name), struct stat *buf, off_t ct, UNUSED(char *dummy))
3644 {
3645     return (g_range < 0 ? buf->st_nlink < ct :
3646 	    g_range > 0 ? buf->st_nlink > ct :
3647 	    buf->st_nlink == ct);
3648 }
3649 
3650 /* user ID */
3651 
3652 /**/
3653 static int
qualuid(UNUSED (char * name),struct stat * buf,off_t uid,UNUSED (char * dummy))3654 qualuid(UNUSED(char *name), struct stat *buf, off_t uid, UNUSED(char *dummy))
3655 {
3656     return buf->st_uid == uid;
3657 }
3658 
3659 /* group ID */
3660 
3661 /**/
3662 static int
qualgid(UNUSED (char * name),struct stat * buf,off_t gid,UNUSED (char * dummy))3663 qualgid(UNUSED(char *name), struct stat *buf, off_t gid, UNUSED(char *dummy))
3664 {
3665     return buf->st_gid == gid;
3666 }
3667 
3668 /* device special file? */
3669 
3670 /**/
3671 static int
qualisdev(UNUSED (char * name),struct stat * buf,UNUSED (off_t junk),UNUSED (char * dummy))3672 qualisdev(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3673 {
3674     return S_ISBLK(buf->st_mode) || S_ISCHR(buf->st_mode);
3675 }
3676 
3677 /* block special file? */
3678 
3679 /**/
3680 static int
qualisblk(UNUSED (char * name),struct stat * buf,UNUSED (off_t junk),UNUSED (char * dummy))3681 qualisblk(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3682 {
3683     return S_ISBLK(buf->st_mode);
3684 }
3685 
3686 /* character special file? */
3687 
3688 /**/
3689 static int
qualischr(UNUSED (char * name),struct stat * buf,UNUSED (off_t junk),UNUSED (char * dummy))3690 qualischr(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3691 {
3692     return S_ISCHR(buf->st_mode);
3693 }
3694 
3695 /* directory? */
3696 
3697 /**/
3698 static int
qualisdir(UNUSED (char * name),struct stat * buf,UNUSED (off_t junk),UNUSED (char * dummy))3699 qualisdir(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3700 {
3701     return S_ISDIR(buf->st_mode);
3702 }
3703 
3704 /* FIFO? */
3705 
3706 /**/
3707 static int
qualisfifo(UNUSED (char * name),struct stat * buf,UNUSED (off_t junk),UNUSED (char * dummy))3708 qualisfifo(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3709 {
3710     return S_ISFIFO(buf->st_mode);
3711 }
3712 
3713 /* symbolic link? */
3714 
3715 /**/
3716 static int
qualislnk(UNUSED (char * name),struct stat * buf,UNUSED (off_t junk),UNUSED (char * dummy))3717 qualislnk(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3718 {
3719     return S_ISLNK(buf->st_mode);
3720 }
3721 
3722 /* regular file? */
3723 
3724 /**/
3725 static int
qualisreg(UNUSED (char * name),struct stat * buf,UNUSED (off_t junk),UNUSED (char * dummy))3726 qualisreg(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3727 {
3728     return S_ISREG(buf->st_mode);
3729 }
3730 
3731 /* socket? */
3732 
3733 /**/
3734 static int
qualissock(UNUSED (char * name),struct stat * buf,UNUSED (off_t junk),UNUSED (char * dummy))3735 qualissock(UNUSED(char *name), struct stat *buf, UNUSED(off_t junk), UNUSED(char *dummy))
3736 {
3737     return S_ISSOCK(buf->st_mode);
3738 }
3739 
3740 /* given flag is set in mode */
3741 
3742 /**/
3743 static int
qualflags(UNUSED (char * name),struct stat * buf,off_t mod,UNUSED (char * dummy))3744 qualflags(UNUSED(char *name), struct stat *buf, off_t mod, UNUSED(char *dummy))
3745 {
3746     return mode_to_octal(buf->st_mode) & mod;
3747 }
3748 
3749 /* mode matches specification */
3750 
3751 /**/
3752 static int
qualmodeflags(UNUSED (char * name),struct stat * buf,off_t mod,UNUSED (char * dummy))3753 qualmodeflags(UNUSED(char *name), struct stat *buf, off_t mod, UNUSED(char *dummy))
3754 {
3755     long v = mode_to_octal(buf->st_mode), y = mod & 07777, n = mod >> 12;
3756 
3757     return ((v & y) == y && !(v & n));
3758 }
3759 
3760 /* regular executable file? */
3761 
3762 /**/
3763 static int
qualiscom(UNUSED (char * name),struct stat * buf,UNUSED (off_t mod),UNUSED (char * dummy))3764 qualiscom(UNUSED(char *name), struct stat *buf, UNUSED(off_t mod), UNUSED(char *dummy))
3765 {
3766     return S_ISREG(buf->st_mode) && (buf->st_mode & S_IXUGO);
3767 }
3768 
3769 /* size in required range? */
3770 
3771 /**/
3772 static int
qualsize(UNUSED (char * name),struct stat * buf,off_t size,UNUSED (char * dummy))3773 qualsize(UNUSED(char *name), struct stat *buf, off_t size, UNUSED(char *dummy))
3774 {
3775 #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
3776 # define QS_CAST_SIZE()
3777     zlong scaled = buf->st_size;
3778 #else
3779 # define QS_CAST_SIZE() (unsigned long)
3780     unsigned long scaled = (unsigned long)buf->st_size;
3781 #endif
3782 
3783     switch (g_units) {
3784     case TT_POSIX_BLOCKS:
3785 	scaled += 511l;
3786 	scaled /= 512l;
3787 	break;
3788     case TT_KILOBYTES:
3789 	scaled += 1023l;
3790 	scaled /= 1024l;
3791 	break;
3792     case TT_MEGABYTES:
3793 	scaled += 1048575l;
3794 	scaled /= 1048576l;
3795 	break;
3796 #if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
3797     case TT_GIGABYTES:
3798         scaled += ZLONG_CONST(1073741823);
3799         scaled /= ZLONG_CONST(1073741824);
3800         break;
3801     case TT_TERABYTES:
3802         scaled += ZLONG_CONST(1099511627775);
3803         scaled /= ZLONG_CONST(1099511627776);
3804         break;
3805 #endif
3806     }
3807 
3808     return (g_range < 0 ? scaled < QS_CAST_SIZE() size :
3809 	    g_range > 0 ? scaled > QS_CAST_SIZE() size :
3810 	    scaled == QS_CAST_SIZE() size);
3811 #undef QS_CAST_SIZE
3812 }
3813 
3814 /* time in required range? */
3815 
3816 /**/
3817 static int
qualtime(UNUSED (char * name),struct stat * buf,off_t days,UNUSED (char * dummy))3818 qualtime(UNUSED(char *name), struct stat *buf, off_t days, UNUSED(char *dummy))
3819 {
3820     time_t now, diff;
3821 
3822     time(&now);
3823     diff = now - (g_amc == 0 ? buf->st_atime : g_amc == 1 ? buf->st_mtime :
3824 		  buf->st_ctime);
3825     /* handle multipliers indicating units */
3826     switch (g_units) {
3827     case TT_DAYS:
3828 	diff /= 86400l;
3829 	break;
3830     case TT_HOURS:
3831 	diff /= 3600l;
3832 	break;
3833     case TT_MINS:
3834 	diff /= 60l;
3835 	break;
3836     case TT_WEEKS:
3837 	diff /= 604800l;
3838 	break;
3839     case TT_MONTHS:
3840 	diff /= 2592000l;
3841 	break;
3842     }
3843 
3844     return (g_range < 0 ? diff < days :
3845 	    g_range > 0 ? diff > days :
3846 	    diff == days);
3847 }
3848 
3849 /* evaluate a string */
3850 
3851 /**/
3852 static int
qualsheval(char * name,UNUSED (struct stat * buf),UNUSED (off_t days),char * str)3853 qualsheval(char *name, UNUSED(struct stat *buf), UNUSED(off_t days), char *str)
3854 {
3855     Eprog prog;
3856 
3857     if ((prog = parse_string(str, 0))) {
3858 	int ef = errflag, lv = lastval, ret;
3859 	int cshglob = badcshglob;
3860 
3861 	unsetparam("reply");
3862 	setsparam("REPLY", ztrdup(name));
3863 	badcshglob = 0;
3864 
3865 	execode(prog, 1, 0, "globqual");
3866 
3867 	if ((ret = lastval))
3868 	    badcshglob |= cshglob;
3869 	/* Retain any user interrupt error status */
3870 	errflag = ef | (errflag & ERRFLAG_INT);
3871 	lastval = lv;
3872 
3873 	if (!(inserts = getaparam("reply")) &&
3874 	    !(inserts = gethparam("reply"))) {
3875 	    char *tmp;
3876 
3877 	    if ((tmp = getsparam("reply")) || (tmp = getsparam("REPLY"))) {
3878 		static char *tmparr[2];
3879 
3880 		tmparr[0] = tmp;
3881 		tmparr[1] = NULL;
3882 
3883 		inserts = tmparr;
3884 	    }
3885 	}
3886 
3887 	return !ret;
3888     }
3889     return 0;
3890 }
3891 
3892 /**/
3893 static int
qualnonemptydir(char * name,struct stat * buf,UNUSED (off_t days),UNUSED (char * str))3894 qualnonemptydir(char *name, struct stat *buf, UNUSED(off_t days), UNUSED(char *str))
3895 {
3896     DIR *dirh;
3897     struct dirent *de;
3898     int unamelen;
3899     char *uname = unmetafy(dupstring(name), &unamelen);
3900 
3901     if (!S_ISDIR(buf->st_mode))
3902 	return 0;
3903 
3904     if (buf->st_nlink > 2)
3905 	return 1;
3906 
3907     if (!(dirh = opendir(uname)))
3908 	return 0;
3909 
3910     while ((de = readdir(dirh))) {
3911 	if (strcmp(de->d_name, ".") && strcmp(de->d_name, "..")) {
3912 	    closedir(dirh);
3913 	    return 1;
3914 	}
3915     }
3916 
3917     closedir(dirh);
3918     return 0;
3919 }
3920