1 /* modified from dos_glob.c to work with wchar_t */
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Guido van Rossum.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #if defined(LIBC_SCCS) && !defined(lint)
36 static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
37 /* most changes between the version above and the one below have been ported:
38 static char sscsid[]=  "$OpenBSD: glob.c,v 1.8.10.1 2001/04/10 jason Exp $";
39  */
40 #endif /* LIBC_SCCS and not lint */
41 
42 /*
43  * glob(3) -- a superset of the one defined in POSIX 1003.2.
44  *
45  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
46  *
47  * Optional extra services, controlled by flags not defined by POSIX:
48  *
49  * GLOB_QUOTE:
50  *	Escaping convention: \ inhibits any special meaning the following
51  *	character might have (except \ at end of string is retained).
52  * GLOB_MAGCHAR:
53  *	Set in gl_flags if pattern contained a globbing character.
54  * GLOB_NOMAGIC:
55  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
56  *	not contain any magic characters.  [Used in csh style globbing]
57  * GLOB_ALTDIRFUNC:
58  *	Use alternately specified directory access functions.
59  * GLOB_TILDE:
60  *	expand ~user/foo to the /home/dir/of/user/foo
61  * GLOB_BRACE:
62  *	expand {1,2}{a,b} to 1a 1b 2a 2b
63  * gl_matchc:
64  *	Number of matches in the current invocation of glob.
65  * GLOB_ALPHASORT:
66  *	sort alphabetically like csh (case doesn't matter) instead of in ASCII
67  *	order
68  */
69 
70 #include <wchar.h>
71 #include "dos_wglob.h"
72 
73 //#define GLOB_DEBUG
74 
75 #ifdef GLOB_DEBUG
76 void Rprintf(const char *, ...);
77 #endif
78 
79 #define	MAXPATHLEN	255
80 #define DOSISH
81 #define ARG_MAX		14500
82 
83 
84 #define	BG_DOLLAR	L'$'
85 #define	BG_DOT		L'.'
86 #define	BG_EOS		L'\0'
87 #define	BG_LBRACKET	L'['
88 #define	BG_NOT		L'!'
89 #define	BG_QUESTION	L'?'
90 #define	BG_QUOTE	L'\\'
91 #define	BG_RANGE	L'-'
92 #define	BG_RBRACKET	L']'
93 #define	BG_SEP		L'/'
94 #ifdef DOSISH /* true, cannot be set to false anymore */
95 #define BG_SEP2		L'\\'
96 #endif
97 #define	BG_STAR		L'*'
98 #define	BG_TILDE	L'~'
99 #define	BG_UNDERSCORE	L'_'
100 #define	BG_LBRACE	L'{'
101 #define	BG_RBRACE	L'}'
102 #define	BG_SLASH	L'/'
103 #define	BG_COMMA	L','
104 
105 
106 #include <stdlib.h>
107 #include <dirent.h>
108 #include <sys/types.h>
109 #include <sys/stat.h>
110 #include <string.h>
111 #include <wctype.h>
112 
113 
114 typedef size_t STRLEN;
115 typedef struct _stat Stat_t;
116 typedef struct _wdirent Direntry_t;
117 
118 
119 static int	 compare(const void *, const void *);
120 static int	 ci_compare(const void *, const void *);
121 static int	 g_Ctoc(const wchar_t *, wchar_t *, STRLEN);
122 static int	 g_lstat(wchar_t *, Stat_t *, wglob_t *);
123 static _WDIR	*g_opendir(wchar_t *, wglob_t *);
124 static const wchar_t *
125 		 g_strchr(const wchar_t *, int);
126 static int	 glob0(const wchar_t *, wglob_t *);
127 static int	 glob1(wchar_t *, wchar_t *, wglob_t *, size_t *);
128 static int	 glob2(wchar_t *, wchar_t *, wchar_t *, wchar_t *, wchar_t *, wchar_t *,
129 		       wglob_t *, size_t *);
130 static int	 glob3(wchar_t *, wchar_t *, wchar_t *, wchar_t *, wchar_t *, wchar_t *,
131 		       wchar_t *, wchar_t *, wglob_t *, size_t *);
132 static int	 globextend(const wchar_t *, wglob_t *, size_t *);
133 static const wchar_t *
134 		 globtilde(const wchar_t *, wchar_t *, size_t, wglob_t *);
135 static int	 globexp1(const wchar_t *, wglob_t *);
136 static int	 globexp2(const wchar_t *, const wchar_t *, wglob_t *, int *);
137 static int	 match(wchar_t *, wchar_t *, wchar_t *, int);
138 #ifdef GLOB_DEBUG
139 static void	 qprintf(const char *, wchar_t *);
140 #endif /* GLOB_DEBUG */
141 
142 /*
143    Protected and meta characters are from Unicode Private Use Area.
144    The DOS version set the upper bit of bytes for meta characters.
145    An earlier wchar_t version set the top bits of wchar_t.
146 */
147 
148 #define	M_MASK		0xffff /* pointless in current setup */
149 
150 #define P_LBRACKET  (wchar_t)	0xfdd0
151 #define P_RBRACKET  (wchar_t)	0xfdd1
152 #define P_RANGE	    (wchar_t)	0xfdd2
153 #define P_LBRACE    (wchar_t)	0xfdd3
154 #define P_RBRACE    (wchar_t)	0xfdd4
155 #define P_TILDE	    (wchar_t)	0xfdd5
156 #define P_QUOTE	    (wchar_t)	0xfdd6
157 #define	M_ALL	    (wchar_t)	0xfdd7
158 #define	M_END	    (wchar_t)	0xfdd8
159 #define	M_NOT	    (wchar_t)	0xfdd9
160 #define	M_ONE	    (wchar_t)	0xfdda
161 #define	M_RNG	    (wchar_t)	0xfddb
162 #define	M_SET	    (wchar_t)	0xfddc
163 
WC_PROTECT(wchar_t c)164 static wchar_t WC_PROTECT(wchar_t c)
165 {
166     /* []-{}~\ */
167 
168     switch(c) {
169 	case BG_LBRACKET: return P_LBRACKET;
170 	case BG_RBRACKET: return P_RBRACKET;
171 	case BG_RANGE: return P_RANGE;
172 	case BG_LBRACE: return P_LBRACE;
173 	case BG_RBRACE: return P_RBRACE;
174 	case BG_TILDE: return P_TILDE;
175 	case BG_QUOTE: return P_QUOTE;
176 	default:
177 	    /* not reachable */
178 	    return c;
179     }
180 }
181 
ismeta(wchar_t c)182 static int ismeta(wchar_t c)
183 {
184     switch(c) {
185 	case M_ALL:
186 	case M_END:
187 	case M_NOT:
188 	case M_ONE:
189 	case M_RNG:
190 	case M_SET:
191 	    return 1;
192 	default:
193 	    return 0;
194     }
195 }
196 
CHAR(wchar_t c)197 static wchar_t CHAR(wchar_t c)
198 {
199     switch(c) {
200 	case P_LBRACKET: return BG_LBRACKET;
201 	case P_RBRACKET: return BG_RBRACKET;
202 	case P_RANGE: return BG_RANGE;
203 	case P_LBRACE: return BG_LBRACE;
204 	case P_RBRACE: return BG_RBRACE;
205 	case P_TILDE: return BG_TILDE;
206 	case P_QUOTE: return BG_QUOTE;
207 	case M_ALL: return BG_STAR;
208 	case M_END: return BG_RBRACKET;
209 	case M_NOT: return BG_NOT;
210 	case M_ONE: return BG_QUESTION;
211 	case M_RNG: return BG_RANGE;
212 	case M_SET: return BG_LBRACKET;
213 	default:
214 	    return c;
215     }
216 }
217 
218 int
dos_wglob(const wchar_t * pattern,int flags,int (* errfunc)(const wchar_t *,int),wglob_t * pglob)219 dos_wglob(const wchar_t *pattern, int flags,
220 	  int (*errfunc)(const wchar_t *, int), wglob_t *pglob)
221 {
222     const wchar_t *patnext;
223     int c;
224     wchar_t *bufnext, *bufend, patbuf[MAXPATHLEN];
225 
226     patnext = pattern;
227 #if 1
228     if (!(flags & GLOB_APPEND)) {
229 	pglob->gl_pathc = 0;
230 	pglob->gl_pathv = NULL;
231 	if (!(flags & GLOB_DOOFFS))
232 	    pglob->gl_offs = 0;
233     }
234 #else
235     pglob->gl_pathc = 0;
236     pglob->gl_pathv = NULL;
237     pglob->gl_offs = 0;
238 #endif
239     pglob->gl_flags = flags & ~GLOB_MAGCHAR;
240     pglob->gl_errfunc = errfunc;
241     pglob->gl_matchc = 0;
242 
243     bufnext = patbuf;
244     bufend = bufnext + MAXPATHLEN - 1;
245 #ifdef DOSISH /* true */
246     /* Nasty hack to treat patterns like "C:*" correctly. In this
247      * case, the * should match any file in the current directory
248      * on the C: drive. However, the glob code does not treat the
249      * colon specially, so it looks for files beginning "C:" in
250      * the current directory. To fix this, change the pattern to
251      * add an explicit "./" at the start (just after the drive
252      * letter and colon - ie change to "C:./").
253      */
254     if (iswalpha(pattern[0]) && pattern[1] == L':' &&
255 	pattern[2] != BG_SEP && pattern[2] != BG_SEP2 &&
256 	bufend - bufnext > 4) {
257 	*bufnext++ = pattern[0];
258 	*bufnext++ = L':';
259 	*bufnext++ = L'.';
260 	*bufnext++ = BG_SEP;
261 	patnext += 2;
262     }
263 
264     /* Hack from Tony Plate to allow UNC network drive specification:
265      * Without this code, '\\' (i.e., literally two backslashes inpattern)
266      * at the beginning of a path is not recognized as a network drive,
267      * because the GLOB_QUOTE loop below changes the two backslashes to one.
268      * So, in the case where there are two but not three backslashes at
269      * the beginning of the path, transfer these to the output.
270      */
271     if (patnext == pattern && bufend - bufnext > 2 &&
272 	pattern[0] == BG_SEP2 && pattern[1] == BG_SEP2 &&
273 	pattern[2] != BG_SEP2) {
274 	*bufnext++ = pattern[0];
275 	*bufnext++ = pattern[1];
276 	patnext += 2;
277     }
278 #endif
279 
280     if (flags & GLOB_QUOTE) {
281 	/* Protect the quoted characters. */
282 	while (bufnext < bufend && (c = *patnext++) != BG_EOS)
283 	    if (c == BG_QUOTE) {
284 #ifdef DOSISH /* true */
285 		/* To avoid backslashitis on Win32,
286 		 * we only treat \ as a quoting character
287 		 * if it precedes one of the
288 		 * metacharacters []-{}~\
289 		 */
290 		if ((c = *patnext++) != L'[' && c != L']' &&
291 		    c != L'-' && c != L'{' && c != L'}' &&
292 		    c != L'~' && c != L'\\') {
293 		    /* WC_PROTECT has to support all characters above */
294 #else
295 # error DOSISH must be true
296 		if ((c = *patnext++) == BG_EOS) {
297 #endif
298 		    c = BG_QUOTE;
299 		    --patnext;
300 		}
301 		*bufnext++ = WC_PROTECT(c);
302 	    } else
303 		*bufnext++ = c;
304 	} else
305 	    while (bufnext < bufend && (c = *patnext++) != BG_EOS)
306 		*bufnext++ = c;
307     *bufnext = BG_EOS;
308 
309     if (flags & GLOB_BRACE)
310 	return globexp1(patbuf, pglob);
311     else
312 	return glob0(patbuf, pglob);
313 }
314 
315 /*
316  * Expand recursively a glob {} pattern. When there is no more expansion
317  * invoke the standard globbing routine to glob the rest of the magic
318  * characters
319  */
320 static int
321 globexp1(const wchar_t *pattern, wglob_t *pglob)
322 {
323     const wchar_t* ptr = pattern;
324     int rv;
325 
326     /* Protect a single {}, for find(1), like csh */
327     if (pattern[0] == BG_LBRACE && pattern[1] == BG_RBRACE && pattern[2] == BG_EOS)
328 	return glob0(pattern, pglob);
329 
330     while ((ptr = (const wchar_t *) g_strchr(ptr, BG_LBRACE)) != NULL)
331 	if (!globexp2(ptr, pattern, pglob, &rv))
332 	    return rv;
333 
334     return glob0(pattern, pglob);
335 }
336 
337 
338 /*
339  * Recursive brace globbing helper. Tries to expand a single brace.
340  * If it succeeds then it invokes globexp1 with the new pattern.
341  * If it fails then it tries to glob the rest of the pattern and returns.
342  */
343 static int
344 globexp2(const wchar_t *ptr, const wchar_t *pattern,
345 	 wglob_t *pglob, int *rv)
346 {
347     int     i;
348     wchar_t   *lm, *ls;
349     const wchar_t *pe, *pm, *pl;
350     wchar_t    patbuf[MAXPATHLEN];
351 
352     /* copy part up to the brace */
353     for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
354 	;
355     *lm = BG_EOS;
356     ls = lm;
357 
358     /* Find the balanced brace */
359     for (i = 0, pe = ++ptr; *pe; pe++)
360 	if (*pe == BG_LBRACKET) {
361 	    /* Ignore everything between [] */
362 	    for (pm = pe++; *pe != BG_RBRACKET && *pe != BG_EOS; pe++)
363 		;
364 	    if (*pe == BG_EOS) {
365 		/*
366 		 * We could not find a matching BG_RBRACKET.
367 		 * Ignore and just look for BG_RBRACE
368 		 */
369 		pe = pm;
370 	    }
371 	} else if (*pe == BG_LBRACE)
372 	    i++;
373 	else if (*pe == BG_RBRACE) {
374 	    if (i == 0)
375 		break;
376 	    i--;
377 	}
378 
379     /* Non matching braces; just glob the pattern */
380     if (i != 0 || *pe == BG_EOS) {
381 	*rv = glob0(patbuf, pglob);
382 	return 0;
383     }
384 
385     for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
386 	switch (*pm) {
387 	case BG_LBRACKET:
388 	    /* Ignore everything between [] */
389 	    for (pl = pm++; *pm != BG_RBRACKET && *pm != BG_EOS; pm++)
390 		;
391 	    if (*pm == BG_EOS) {
392 		/*
393 		 * We could not find a matching BG_RBRACKET.
394 		 * Ignore and just look for BG_RBRACE
395 		 */
396 		pm = pl;
397 	    }
398 	    break;
399 
400 	case BG_LBRACE:
401 	    i++;
402 	    break;
403 
404 	case BG_RBRACE:
405 	    if (i) {
406 		i--;
407 		break;
408 	    }
409 	    /* FALLTHROUGH */
410 	case BG_COMMA:
411 	    if (i && *pm == BG_COMMA)
412 		break;
413 	    else {
414 		/* Append the current string */
415 		for (lm = ls; (pl < pm); *lm++ = *pl++)
416 		    ;
417 
418 		/*
419 		 * Append the rest of the pattern after the
420 		 * closing brace
421 		 */
422 		for (pl = pe + 1; (*lm++ = *pl++) != BG_EOS; )
423 		    ;
424 
425 		/* Expand the current pattern */
426 #ifdef GLOB_DEBUG
427 		qprintf("globexp2:", patbuf);
428 #endif /* GLOB_DEBUG */
429 		*rv = globexp1(patbuf, pglob);
430 
431 		/* move after the comma, to the next string */
432 		pl = pm + 1;
433 	    }
434 	    break;
435 
436 	default:
437 	    break;
438 	}
439     }
440     *rv = 0;
441     return 0;
442 }
443 
444 
445 
446 /*
447  * expand tilde from the passwd file: not supported.
448  */
449 static const wchar_t *
450 globtilde(const wchar_t *pattern, wchar_t *patbuf, size_t patbuf_len, wglob_t *pglob)
451 {
452     wchar_t *h;
453     const wchar_t *p;
454     wchar_t *b, *eb;
455 
456     if (*pattern != BG_TILDE || !(pglob->gl_flags & GLOB_TILDE))
457 	return pattern;
458 
459     /* Copy up to the end of the string or / */
460     eb = &patbuf[patbuf_len - 1];
461     for (p = pattern + 1, h = (wchar_t *) patbuf;
462 	 h < eb && *p && *p != BG_SLASH; *h++ = *p++)
463 	;
464 
465     *h = BG_EOS;
466 
467     if (((wchar_t *) patbuf)[0] == BG_EOS) {
468 	/*
469 	 * handle a plain ~ or ~/ by expanding $HOME
470 	 * first and then trying the password file
471 	 */
472 	if ((h = _wgetenv(L"R_USER")) == NULL) {
473 	    return pattern;
474 	}
475     } else {
476 	/*
477 	 * Expand a ~user
478 	 */
479 	return pattern;
480     }
481 
482     /* Copy the home directory */
483     for (b = patbuf; b < eb && *h; *b++ = *h++)
484 	;
485 
486     /* Append the rest of the pattern */
487     while (b < eb && (*b++ = *p++) != BG_EOS)
488 	;
489     *b = BG_EOS;
490 
491     return patbuf;
492 }
493 
494 
495 /*
496  * The main glob() routine: compiles the pattern (optionally processing
497  * quotes), calls glob1() to do the real pattern matching, and finally
498  * sorts the list (unless unsorted operation is requested).  Returns 0
499  * if things went well, nonzero if errors occurred.  It is not an error
500  * to find no matches.
501  */
502 static int
503 glob0(const wchar_t *pattern, wglob_t *pglob)
504 {
505     const wchar_t *qpat, *qpatnext;
506     int c, err, oldflags, oldpathc;
507     wchar_t *bufnext, patbuf[MAXPATHLEN];
508     size_t limit = 0;
509 
510     qpat = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
511     qpatnext = qpat;
512     oldflags = pglob->gl_flags;
513     oldpathc = pglob->gl_pathc;
514     bufnext = patbuf;
515 
516     /* We don't need to check for buffer overflow any more. */
517     while ((c = *qpatnext++) != BG_EOS) {
518 	switch (c) {
519 	case BG_LBRACKET:
520 	    c = *qpatnext;
521 	    if (c == BG_NOT)
522 		++qpatnext;
523 	    if (*qpatnext == BG_EOS ||
524 		g_strchr((wchar_t *) qpatnext+1, BG_RBRACKET) == NULL) {
525 		*bufnext++ = BG_LBRACKET;
526 		if (c == BG_NOT)
527 		    --qpatnext;
528 		break;
529 	    }
530 	    *bufnext++ = M_SET;
531 	    if (c == BG_NOT)
532 		*bufnext++ = M_NOT;
533 	    c = *qpatnext++;
534 	    do {
535 		*bufnext++ = CHAR(c);
536 		if (*qpatnext == BG_RANGE &&
537 		    (c = qpatnext[1]) != BG_RBRACKET) {
538 		    *bufnext++ = M_RNG;
539 		    *bufnext++ = CHAR(c);
540 		    qpatnext += 2;
541 		}
542 	    } while ((c = *qpatnext++) != BG_RBRACKET);
543 	    pglob->gl_flags |= GLOB_MAGCHAR;
544 	    *bufnext++ = M_END;
545 	    break;
546 	case BG_QUESTION:
547 	    pglob->gl_flags |= GLOB_MAGCHAR;
548 	    *bufnext++ = M_ONE;
549 	    break;
550 	case BG_STAR:
551 	    pglob->gl_flags |= GLOB_MAGCHAR;
552 	    /* collapse adjacent stars to one,
553 	     * to avoid exponential behavior
554 	     */
555 	    if (bufnext == patbuf || bufnext[-1] != M_ALL)
556 		*bufnext++ = M_ALL;
557 	    break;
558 	default:
559 	    *bufnext++ = CHAR(c);
560 	    break;
561 	}
562     }
563     *bufnext = BG_EOS;
564 #ifdef GLOB_DEBUG
565     qprintf("glob0:", patbuf);
566 #endif /* GLOB_DEBUG */
567 
568     if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, &limit)) != 0) {
569 	pglob->gl_flags = oldflags;
570 	return(err);
571     }
572 
573     /*
574      * If there was no match we are going to append the pattern
575      * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
576      * and the pattern did not contain any magic characters
577      * GLOB_NOMAGIC is there just for compatibility with csh.
578      */
579     if (pglob->gl_pathc == oldpathc &&
580 	((pglob->gl_flags & GLOB_NOCHECK) ||
581 	 ((pglob->gl_flags & GLOB_NOMAGIC) &&
582 	  !(pglob->gl_flags & GLOB_MAGCHAR))))
583     {
584 #ifdef GLOB_DEBUG
585 	Rprintf("calling globextend from glob0\n");
586 #endif /* GLOB_DEBUG */
587 	pglob->gl_flags = oldflags;
588 	return(globextend(qpat, pglob, &limit));
589     }
590     else if (!(pglob->gl_flags & GLOB_NOSORT))
591 	qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
592 	      pglob->gl_pathc - oldpathc, sizeof(wchar_t *),
593 	      (pglob->gl_flags & (GLOB_ALPHASORT|GLOB_NOCASE))
594 	      ? ci_compare : compare);
595     pglob->gl_flags = oldflags;
596     return(0);
597 }
598 
599 static int
600 ci_compare(const void *p, const void *q)
601 {
602     const wchar_t *pp = *(const wchar_t **)p;
603     const wchar_t *qq = *(const wchar_t **)q;
604     int ci;
605     while (*pp && *qq) {
606 	if (towlower(*pp) != towlower(*qq))
607 	    break;
608 	++pp;
609 	++qq;
610     }
611     ci = towlower(*pp) - towlower(*qq);
612     if (ci == 0)
613 	return compare(p, q);
614     return ci;
615 }
616 
617 static int
618 compare(const void *p, const void *q)
619 {
620     return(wcscmp(*(wchar_t **)p, *(wchar_t **)q));
621 }
622 
623 static int
624 glob1(wchar_t *pattern, wchar_t *pattern_last, wglob_t *pglob, size_t *limitp)
625 {
626     wchar_t pathbuf[MAXPATHLEN];
627 
628     /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
629     if (*pattern == BG_EOS) return(0);
630     return(glob2(pathbuf, pathbuf+MAXPATHLEN-1,
631 		 pathbuf, pathbuf+MAXPATHLEN-1,
632 		 pattern, pattern_last, pglob, limitp));
633 }
634 
635 /*
636  * The functions glob2 and glob3 are mutually recursive; there is one level
637  * of recursion for each segment in the pattern that contains one or more
638  * meta characters.
639  */
640 static int
641 glob2(wchar_t *pathbuf, wchar_t *pathbuf_last, wchar_t *pathend, wchar_t *pathend_last,
642       wchar_t *pattern, wchar_t *pattern_last, wglob_t *pglob, size_t *limitp)
643 {
644     Stat_t sb;
645     wchar_t *p, *q;
646     int anymeta;
647 
648     /*
649      * Loop over pattern segments until end of pattern or until
650      * segment with meta character found.
651      */
652     for (anymeta = 0;;) {
653 	if (*pattern == BG_EOS) {		/* End of pattern? */
654 	    *pathend = BG_EOS;
655 	    if (g_lstat(pathbuf, &sb, pglob)) return(0);
656 
657 	    if (((pglob->gl_flags & GLOB_MARK) &&
658 		 pathend[-1] != BG_SEP
659 #ifdef DOSISH /* true */
660 		 && pathend[-1] != BG_SEP2
661 #endif
662 		    ) && S_ISDIR(sb.st_mode) ) {
663 		if (pathend+1 > pathend_last)
664 		    return (1);
665 		*pathend++ = BG_SEP;
666 		*pathend = BG_EOS;
667 	    }
668 	    ++pglob->gl_matchc;
669 #ifdef GLOB_DEBUG
670 	    Rprintf("calling globextend from glob2\n");
671 #endif /* GLOB_DEBUG */
672 	    return(globextend(pathbuf, pglob, limitp));
673 	}
674 
675 	/* Find end of next segment, copy tentatively to pathend. */
676 	q = pathend;
677 	p = pattern;
678 	while (*p != BG_EOS && *p != BG_SEP
679 #ifdef DOSISH /* true */
680 	       && *p != BG_SEP2
681 #endif
682 	    ) {
683 	    if (ismeta(*p)) anymeta = 1;
684 	    if (q+1 > pathend_last) return (1);
685 	    *q++ = *p++;
686 	}
687 
688 	if (!anymeta) {		/* No expansion, do next segment. */
689 	    pathend = q;
690 	    pattern = p;
691 	    while (*pattern == BG_SEP
692 #ifdef DOSISH /* true */
693 		   || *pattern == BG_SEP2
694 #endif
695 		) {
696 		if (pathend+1 > pathend_last) return (1);
697 		*pathend++ = *pattern++;
698 	    }
699 	} else
700 	    /* Need expansion, recurse. */
701 	    return(glob3(pathbuf, pathbuf_last, pathend,
702 			 pathend_last, pattern, pattern_last,
703 			 p, pattern_last, pglob, limitp));
704     }
705     /* NOTREACHED */
706 }
707 
708 static int
709 glob3(wchar_t *pathbuf, wchar_t *pathbuf_last, wchar_t *pathend, wchar_t *pathend_last,
710       wchar_t *pattern, wchar_t *pattern_last,
711       wchar_t *restpattern, wchar_t *restpattern_last, wglob_t *pglob, size_t *limitp)
712 {
713     Direntry_t *dp;
714     _WDIR *dirp;
715     int err;
716     int nocase;
717     wchar_t buf[MAXPATHLEN];
718 
719     if (pathend > pathend_last)
720 	return (1);
721     *pathend = BG_EOS;
722     errno = 0;
723 
724     if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
725 	/* TODO: don't call for ENOENT or ENOTDIR? */
726 	if (pglob->gl_errfunc) {
727 	    if (g_Ctoc(pathbuf, buf, sizeof(buf)))
728 		return (GLOB_ABEND);
729 	    if (pglob->gl_errfunc(buf, errno) ||
730 		(pglob->gl_flags & GLOB_ERR))
731 		return (GLOB_ABEND);
732 	}
733 	return(0);
734     }
735 
736     err = 0;
737     nocase = ((pglob->gl_flags & GLOB_NOCASE) != 0);
738 
739     /* Search directory for matching names. */
740     while ((dp = _wreaddir(dirp))) {
741 	wchar_t *sc, *dc;
742 
743 	/* Initial BG_DOT must be matched literally. */
744 	if (dp->d_name[0] == BG_DOT && *pattern != BG_DOT)
745 	    continue;
746 	dc = pathend;
747 	sc = dp->d_name;
748 	while (dc < pathend_last && (*dc++ = *sc++) != BG_EOS)
749 	    ;
750 	if (dc >= pathend_last) {
751 	    *dc = BG_EOS;
752 	    err = 1;
753 	    break;
754 	}
755 
756 	if (!match(pathend, pattern, restpattern, nocase)) {
757 	    *pathend = BG_EOS;
758 	    continue;
759 	}
760 	err = glob2(pathbuf, pathbuf_last, --dc, pathend_last,
761 		    restpattern, restpattern_last, pglob, limitp);
762 	if (err)
763 	    break;
764     }
765 
766     _wclosedir(dirp);
767     return(err);
768 }
769 
770 
771 #include <R_ext/RS.h> /* for Calloc, Realloc, Free */
772 
773 /*
774  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
775  * add the new item, and update gl_pathc.
776  *
777  * This assumes the BSD realloc, which only copies the block when its size
778  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
779  * behavior.
780  *
781  * Return 0 if new item added, error code if memory couldn't be allocated.
782  *
783  * Invariant of the glob_t structure:
784  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
785  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
786  */
787 static int
788 globextend(const wchar_t *path, wglob_t *pglob, size_t *limitp)
789 {
790     wchar_t **pathv;
791     int i;
792     STRLEN newsize, len;
793     wchar_t *copy;
794     const wchar_t *p;
795 
796 #ifdef GLOB_DEBUG
797     Rprintf("Adding ");
798     for (p = path; *p; p++)
799 	(void)Rprintf("%c", CHAR(*p));
800     Rprintf("\n");
801 #endif /* GLOB_DEBUG */
802 
803     newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
804     if (pglob->gl_pathv)
805 	pathv = Realloc(pglob->gl_pathv, newsize, wchar_t *);
806     else
807 	pathv = Calloc(newsize, wchar_t *);
808     if (pathv == NULL) {
809 	if (pglob->gl_pathv) {
810 	    Free(pglob->gl_pathv);
811 	    pglob->gl_pathv = NULL;
812 	}
813 	return(GLOB_NOSPACE);
814     }
815 
816     if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
817 	/* first time around -- clear initial gl_offs items */
818 	pathv += pglob->gl_offs;
819 	for (i = pglob->gl_offs; --i >= 0; )
820 	    *--pathv = NULL;
821     }
822     pglob->gl_pathv = pathv;
823 
824     for (p = path; *p++;)
825 	;
826     len = (STRLEN)(p - path);
827     *limitp += len;
828     copy = Calloc(p-path, wchar_t);
829     if (copy != NULL) {
830 	if (g_Ctoc(path, copy, len)) {
831 	    Free(copy);
832 	    return(GLOB_NOSPACE);
833 	}
834 	pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
835     }
836     pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
837 
838     if ((pglob->gl_flags & GLOB_LIMIT) &&
839 	newsize + *limitp >= ARG_MAX) {
840 	errno = 0;
841 	return(GLOB_NOSPACE);
842     }
843 
844     return(copy == NULL ? GLOB_NOSPACE : 0);
845 }
846 
847 
848 /*
849  * pattern matching function for filenames.  Each occurrence of the *
850  * pattern causes a recursion level.
851  */
852 static int
853 match(wchar_t *name, wchar_t *pat, wchar_t *patend, int nocase)
854 {
855     int ok, negate_range;
856     wchar_t c, k;
857 
858     while (pat < patend) {
859 	c = *pat++;
860 	switch (c & M_MASK) {
861 	case M_ALL:
862 	    if (pat == patend)
863 		return(1);
864 	    do
865 		if (match(name, pat, patend, nocase))
866 		    return(1);
867 	    while (*name++ != BG_EOS)
868 		;
869 	    return(0);
870 	case M_ONE:
871 	    if (*name++ == BG_EOS)
872 		return(0);
873 	    break;
874 	case M_SET:
875 	    ok = 0;
876 	    if ((k = *name++) == BG_EOS)
877 		return(0);
878 	    if ((negate_range = ((*pat & M_MASK) == M_NOT)) != BG_EOS)
879 		++pat;
880 	    while (((c = *pat++) & M_MASK) != M_END)
881 		if ((*pat & M_MASK) == M_RNG) {
882 		    if (nocase) {
883 			if (towlower(c) <= towlower(k) && towlower(k) <= towlower(pat[1]))
884 			    ok = 1;
885 		    } else {
886 			if (c <= k && k <= pat[1])
887 			    ok = 1;
888 		    }
889 		    pat += 2;
890 		} else if (nocase ? (towlower(c) == towlower(k)) : (c == k))
891 		    ok = 1;
892 	    if (ok == negate_range)
893 		return(0);
894 	    break;
895 	default:
896 	    k = *name++;
897 	    if (nocase ? (towlower(k) != towlower(c)) : (k != c))
898 		return(0);
899 	    break;
900 	}
901     }
902     return(*name == BG_EOS);
903 }
904 
905 /* Free allocated data belonging to a wglob_t structure. */
906 void
907 dos_wglobfree(wglob_t *pglob)
908 {
909     int i;
910     wchar_t **pp;
911 
912     if (pglob->gl_pathv != NULL) {
913 	pp = pglob->gl_pathv + pglob->gl_offs;
914 	for (i = pglob->gl_pathc; i--; ++pp)
915 	    if (*pp)
916 		Free(*pp);
917 	Free(pglob->gl_pathv);
918 	pglob->gl_pathv = NULL;
919     }
920 }
921 
922 static _WDIR *
923 g_opendir(wchar_t *str, wglob_t *pglob)
924 {
925     wchar_t buf[MAXPATHLEN];
926 
927     if (!*str) wcscpy(buf, L".");
928     else
929 	if (g_Ctoc(str, buf, sizeof(buf))) return(NULL);
930     return _wopendir(buf);
931 }
932 
933 static int
934 g_lstat(wchar_t *fn, Stat_t *sb, wglob_t *pglob)
935 {
936     wchar_t buf[MAXPATHLEN];
937 
938     if (g_Ctoc(fn, buf, sizeof(buf)))
939 	return(-1);
940     return(_wstat(buf, sb));
941 }
942 
943 static const wchar_t *
944 g_strchr(const wchar_t *str, int ch)
945 {
946     do {
947 	if (*str == ch)
948 	    return (str);
949     } while (*str++);
950     return (NULL);
951 }
952 
953 static int
954 g_Ctoc(const wchar_t *str, wchar_t *buf, STRLEN len)
955 {
956     while (len--)
957 	if ((*buf++ = *str++) == BG_EOS) return 0;
958     return 1;
959 }
960 
961 #ifdef GLOB_DEBUG
962 static void
963 qprintf(const char *str, wchar_t *s)
964 {
965     wchar_t *p;
966 
967     (void)Rprintf("%s:\n", str);
968     for (p = s; *p; p++)
969 	(void)Rprintf("%lc", CHAR(*p));
970     (void)Rprintf("\n");
971 #if 0
972     for (p = s; *p; p++)
973 	(void)Rprintf("%lc", *p & M_PROTECT ? L'"' : L' ');
974     (void)Rprintf("\n");
975 #endif
976     for (p = s; *p; p++)
977 	(void)Rprintf("%lc", ismeta(*p) ? L'_' : L' ');
978     (void)Rprintf("\n");
979 }
980 #endif /* GLOB_DEBUG */
981