1 /**
2  * @file glob.c
3  * Copyright (C) 2011-2013, MinGW.org project.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice, this permission notice, and the following
13  * disclaimer shall be included in all copies or substantial portions of
14  * the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OF OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 /* ---------------------------------------------------------------------------
26  * MinGW implementation of (approximately) POSIX conforming glob() and
27  * globfree() API functions.
28  *
29  * Written by Keith Marshall <keithmarshall@users.sourceforge.net>
30  * Copyright (C) 2011-2013, MinGW.org Project.
31  * ---------------------------------------------------------------------------
32  */
33 #include "include/bareos.h"
34 #include <glob.h>
35 #include <ctype.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <sys/types.h>
39 #include <libgen.h>
40 #include <dirent.h>
41 #include <errno.h>
42 
43 #ifdef USE_READDIR_R
44 #ifndef HAVE_READDIR_R
45 int Readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result);
46 #endif
47 #endif
48 
49 #ifndef HAVE_STRICOLL
50 #define stricoll(str1, str2) strcasecmp(str1, str2)
51 #endif
52 
53 enum {
54   /* Extend the flags offset enumeration, beyond the user visible
55    * high water mark, to accommodate some additional flags which are
56    * required for private use by the implementation.
57    */
58   __GLOB_DIRONLY_OFFSET = __GLOB_FLAG_OFFSET_HIGH_WATER_MARK,
59   __GLOB_PERIOD_PRIVATE_OFFSET,
60   /*
61    * For congruency, set a new high water mark above the private data
62    * range, (which we don't otherwise use).
63    */
64   __GLOB_PRIVATE_FLAGS_HIGH_WATER_MARK
65 };
66 
67 #define GLOB_DIRONLY	__GLOB_FLAG__(DIRONLY)
68 #ifndef GLOB_PERIOD
69 # define GLOB_PERIOD	__GLOB_FLAG__(PERIOD_PRIVATE)
70 #endif
71 
72 #ifndef GLOB_INLINE
73 # define GLOB_INLINE	static __inline__ __attribute__((__always_inline__))
74 #endif
75 
76 //#define GLOB_HARD_ESC	__CRT_GLOB_ESCAPE_CHAR__
77 #define GLOB_HARD_ESC	(char)(127)
78 
79 #if defined _WIN32 || defined __MS_DOS__
80 /*
81  * For the Microsoft platforms, we treat '\' and '/' interchangeably
82  * as directory separator characters...
83  */
84 #define GLOB_DIRSEP		('\\')
85 # define glob_is_dirsep( c )	(((c) == ('/')) || ((c) == GLOB_DIRSEP))
86 /*
87  * ...and we use the ASCII ESC code as our escape character.
88  */
89 static int glob_escape_char = GLOB_HARD_ESC;
90 
glob_strdup(const char * pattern)91 GLOB_INLINE char *glob_strdup( const char *pattern )
92 {
93   /* An inline wrapper around the standard strdup() function;
94    * this strips instances of the GLOB_HARD_ESC character, which
95    * have not themselves been escaped, from the strdup()ed copy.
96    */
97   char buf[1 + strlen( pattern )];
98   char *copy = buf; const char *origin = pattern;
99   do { if( *origin == GLOB_HARD_ESC ) ++origin;
100        *copy++ = *origin;
101      } while( *origin++ );
102   return bstrdup( buf );
103 }
104 
105 #else
106 /* Otherwise, we assume only the POSIX standard '/'...
107  */
108 #define GLOB_DIRSEP		('/')
109 # define glob_is_dirsep( c )	((c) == GLOB_DIRSEP)
110 /*
111  * ...and we interpret '\', as specified by POSIX, as
112  * the escape character.
113  */
114 static int glob_escape_char = '\\';
115 
116 #define glob_strdup	strdup
117 #endif
118 
IsGlobPattern(const char * pattern,int flags)119 static int IsGlobPattern( const char *pattern, int flags )
120 {
121   /* Check if "pattern" represents a globbing pattern
122    * with included wild card characters.
123    */
124   register const char *p;
125   register int c;
126 
127   /* Proceed only if specified pattern is not NULL...
128    */
129   if( (p = pattern) != NULL )
130   {
131     /* ...initially, with no bracketted character set open;
132      * (none can be, because we haven't yet had any opportunity
133      * to see the opening bracket).
134      */
135     int bracket = 0;
136 
137     /* Check each character in "pattern" in turn...
138      */
139     while( (c = *p++) != '\0' )
140     {
141       /* ...proceeding since we have not yet reached the NUL terminator.
142        */
143       if(  ((flags & GLOB_NOESCAPE) == 0)
144       &&  (c == glob_escape_char) && (*p++ == '\0')  )
145 	/*
146 	 * We found an escape character, (and the escape mechanism has
147 	 * not been disabled), but there is no following character to
148 	 * escape; it may be malformed, but this certainly doesn't look
149 	 * like a candidate globbing pattern.
150 	 */
151 	return 0;
152 
153       else if( bracket == 0 )
154       {
155 	/* Still outside of any bracketted character set...
156 	 */
157 	if( (c == '*') || (c == '?') )
158 	  /*
159 	   * ...either of these makes "pattern" an explicit
160 	   * globbing pattern...
161 	   */
162 	  return 1;
163 
164 	if( c == '[' )
165 	  /*
166 	   * ...while this marks the start of a bracketted
167 	   * character set.
168 	   */
169 	  bracket++;
170       }
171 
172       else if( (bracket > 1) && (c == ']') )
173 	/*
174 	 * Within a bracketted character set, where it is not
175 	 * the first character, ']' marks the end of the set,
176 	 * making "pattern" a globbing pattern.
177 	 */
178 	return 1;
179 
180       else if( c != '!' )
181 	/*
182 	 * Also within a bracketted character set, '!' is special
183 	 * when the first character, and shouldn't be counted; note
184 	 * that it should be counted when not the first character,
185 	 * but the short count resulting from ignoring it doesn't
186 	 * affect our desired outcome.
187 	 */
188 	bracket++;
189     }
190   }
191 
192   /* If we get to here, then we ran off the end of "pattern" without
193    * identifying it as a globbing pattern.
194    */
195   return 0;
196 }
197 
glob_set_adjusted(const char * pattern,int flags)198 static const char *glob_set_adjusted( const char *pattern, int flags )
199 {
200   /* Adjust the globbing pattern pointer, to make it refer to the
201    * next character (if any) following a character set specification;
202    * this adjustment is required when pattern matching is to resume
203    * after matching a set specification, irrespective of whether the
204    * match was successful or not; (a failed match is the desired
205    * outcome for an excluded character set).
206    */
207   register const char *p = pattern;
208 
209   /* We need to move the pointer forward, until we find the ']'
210    * which marks the end of the set specification.
211    */
212   while( *p != ']' )
213   {
214     /* We haven't found it yet; advance by one character...
215      */
216     if( (*p == glob_escape_char) && ((flags & GLOB_NOESCAPE) == 0) )
217       /*
218        * ...or maybe even two, when we identify a need to
219        * step over any character which has been escaped...
220        */
221       p++;
222 
223     if( *p++ == '\0' )
224       /*
225        * ...but if we find a NUL on the way, then the pattern
226        * is malformed, so we return NULL to report a bad match.
227        */
228       return NULL;
229   }
230   /* We found the expected ']'; return a pointer to the NEXT
231    * character, (which may be ANYTHING; even NUL is okay).
232    */
233   return ++p;
234 }
235 
glob_in_set(const char * set,int test,int flags)236 static const char *glob_in_set( const char *set, int test, int flags )
237 {
238   /* Check if the single character "test" is present in the set
239    * of characters represented by "set", (a specification of the
240    * form "[SET]", or "[!SET]" in the case of an excluded set).
241    *
242    * On entry, "set" always points to the first character in the
243    * set to be tested, i.e. the character FOLLOWING the '[' which
244    * opens an inclusive set, or FOLLOWING the initial '!' which
245    * marks the set as exclusive.
246    *
247    * Matching is ALWAYS performed as if checking an inclusive set;
248    * return value is a pointer to the globbing pattern character
249    * following the closing ']' of "set", when "test" IS in "set",
250    * or NULL when it is not.  Caller performing an inclusive match
251    * should handle NULL as a failed match, and non-NULL as success.
252    * Caller performing an exclusive match should handle non-NULL as
253    * a failed match, with NULL indicating success, and should call
254    * glob_set_adjusted() before resuming pattern matching in the
255    * case of a successful match.
256    */
257   register int c, lastc;
258   if( ((lastc = *set) == ']') || (lastc == '-') )
259   {
260     /* This is the special case of matching ']' or '-' as the
261      * first character in the set, where it must match literally...
262      */
263     if( lastc == test )
264       /*
265        * ...which it does, so immediately report it so.
266        */
267       return glob_set_adjusted( ++set, flags );
268 
269     /* ...otherwise we didn't match this special case of ']' or '-',
270      * so we simply ignore this special set entry, thus handling it
271      * as an implicitly escaped literal which has not been matched.
272      */
273     set++;
274   }
275   while( (c = *set++) != ']' )
276   {
277     /* We are still scanning the set, and have not yet reached the
278      * closing ']' sentinel character.
279      */
280     if( (c == '-') && (*set != ']') && ((c = *set++) != '\0') )
281     {
282       /* Since the current character is a '-', and is not immediately
283        * followed by the set's closing sentinel, nor is it at the end
284        * of the (malformed) pattern, it specifies a character range,
285        * running from the last character scanned...
286        */
287       while( lastc < c )
288       {
289 	/* ...in incremental collating sequence order, to the next
290 	 * character following the '-'...
291 	 */
292 	if( lastc++ == test )
293 	  /*
294 	   * ...returning immediately on a successful match...
295 	   */
296 	  return glob_set_adjusted( set, flags );
297       }
298       while( lastc > c )
299       {
300 	/* ...or failing that, consider the possibility that the
301 	 * range may have been specified in decrementing collating
302 	 * sequence order...
303 	 */
304 	if( lastc-- == test )
305 	  /*
306 	   * ...once again, return immediately on a successful match.
307 	   */
308 	  return glob_set_adjusted( set, flags );
309       }
310     }
311 
312     /* Within a set, the escape character is to be parsed as
313      * a literal; this should be unnecessary...
314     if( (c == glob_escape_char) && ((flags & GLOB_NOESCAPE) == 0) )
315       c = *set++;
316      */
317 
318     if( (c == '\0')
319       /*
320        * This is a malformed set; (not closed before the end of
321        * the pattern)...
322        */
323     ||  glob_is_dirsep( c )  )
324       /*
325        * ...or it attempts to explicitly match a directory separator,
326        * which is invalid in this context.  We MUST fail it, in either
327        * case, reporting a mismatch.
328        */
329       return NULL;
330 
331     if( c == test )
332       /*
333        * We found the test character within the set; adjust the pattern
334        * reference, to resume after the end of the set, and return the
335        * successful match.
336        */
337       return glob_set_adjusted( set, flags );
338 
339     /* If we get to here, we haven't yet found the test character within
340      * this set; remember the character within the set which we just tried
341      * to match, as it may represent the start of a character range, then
342      * continue the scan, until we exhaust the set.
343      */
344     lastc = c;
345   }
346   /* Having exhausted the set, without finding a match, we return NULL
347    * to indicate that the test character was NOT found in the set.
348    */
349   return NULL;
350 }
351 
GlobCaseMatch(int flags,int check,int match)352 GLOB_INLINE int GlobCaseMatch( int flags, int check, int match )
353 {
354   /* Local helper function, used to facilitate the case insensitive
355    * glob character matching appropriate for MS-Windows systems.
356    */
357   return (flags & GLOB_CASEMATCH) ? check - match
358     : tolower( check ) - tolower( match );
359 }
360 
GlobStrcmp(const char * pattern,const char * text,int flags)361 static int GlobStrcmp( const char *pattern, const char *text, int flags )
362 {
363   /* Compare "text" to a specified globbing "pattern" using semantics
364    * comparable to "strcmp()"; returns zero for a complete match, else
365    * non-zero for a mismatch.
366    *
367    * Within "pattern":
368    *   '?'     matches any one character in "text" (except '\0')
369    *   '*'     matches any sequence of zero or more characters in "text"
370    *   [SET]   matches any one character in "text" which is also in "SET"
371    *   [!SET]  matches any one character in "text" which is NOT in "SET"
372    */
373   register const char *p = pattern, *t = text;
374   register int c;
375 
376   if( (*t == '.') && (*p != '.') && ((flags & GLOB_PERIOD) == 0) )
377     /*
378      * The special GNU extension allowing wild cards to match a period
379      * as first character is NOT in effect; "text" DOES have an initial
380      * period character AND "pattern" DOES NOT match it EXPLICITLY, so
381      * this comparison must report a MISMATCH.
382      */
383     return *p - *t;
384 
385   /* Attempt to match "pattern", character by character...
386    */
387   while( (c = *p++) != '\0' )
388   {
389     /* ...so long as we haven't exhausted it...
390      */
391     switch( c )
392     {
393       case '?':
394 	/* Match any one character...
395 	 */
396 	if( *t++ == '\0' )
397 	  /* ...but when there isn't one left to be matched,
398 	   * then we must report a mismatch.
399 	   */
400 	  return '?';
401 	break;
402 
403       case '*':
404 	/* Match any sequence of zero or more characters...
405 	 */
406 	while( *p == '*' )
407 	  /*
408 	   * ...ignoring any repeated '*' in the pattern...
409 	   */
410 	  p++;
411 
412 	/* ...and if we've exhausted the pattern...
413 	 */
414 	if( *p == '\0' )
415 	  /*
416 	   * ...then we simply match all remaining characters,
417 	   * to the end of "text", so we may return immediately,
418 	   * reporting a successful match.
419 	   */
420 	  return 0;
421 
422 	/* When we haven't exhausted the pattern, then we may
423 	 * attempt to recursively match the remainder of the
424 	 * pattern to some terminal substring of "text"; we do
425 	 * this iteratively, stepping over as many characters
426 	 * of "text" as necessary, (and which thus match the '*'
427 	 * in "pattern"), until we either find the start of this
428 	 * matching substring, or we exhaust "text" without any
429 	 * possible match...
430 	 */
431 	do { c = GlobStrcmp( p, t, flags | GLOB_PERIOD );
432 	   } while( (c != 0) && (*t++ != '\0') );
433 	/*
434 	 * ...and ultimately, we return the result of this
435 	 * recursive attempt to find a match.
436 	 */
437 	return c;
438 
439       case '[':
440 	/* Here we need to match (or not match) exactly one
441 	 * character from the candidate text with any one of
442 	 * a set of characters in the pattern...
443 	 */
444 	if( (c = *t++) == '\0' )
445 	  /*
446 	   * ...but, we must return a mismatch if there is no
447 	   * candidate character left to match.
448 	   */
449 	  return '[';
450 
451 	if( *p == '!' )
452 	{
453 	  /* Match any one character which is NOT in the SET
454 	   * specified by [!SET].
455 	   */
456 	  if( glob_in_set( ++p, c, flags ) == NULL )
457 	  {
458 	    if( *p == ']' )
459 	      p++;
460 	    p = glob_set_adjusted( p, flags );
461 	  }
462 	}
463 	else
464 	{ /* Match any one character which IS in the SET
465 	   * specified by [SET].
466 	   */
467 	  p = glob_in_set( p, c, flags );
468 	}
469 	if( p == NULL )
470 	  /*
471 	   * The character under test didn't satisfy the SET
472 	   * matching criterion; return as unmatched.
473 	   */
474 	  return ']';
475 	break;
476 
477       default:
478 	/* The escape character cannot be handled as a regular
479 	 * switch case, because the escape character is specified
480 	 * as a variable, (to better support Microsoft nuisances).
481 	 * The escape mechanism may have been disabled within the
482 	 * glob() call...
483 	 */
484 	if(  ((flags & GLOB_NOESCAPE) == 0)
485 	  /*
486 	   * ...but when it is active, and we find an escape
487 	   * character without exhausting the pattern...
488 	   */
489 	&& (c == glob_escape_char) && ((c = *p) != 0)  )
490 	  /*
491 	   * ...then we handle the escaped character here, as
492 	   * a literal, and step over it, within the pattern.
493 	   */
494 	  ++p;
495 
496 	/* When we get to here, a successful match requires that
497 	 * the current pattern character "c" is an exact literal
498 	 * match for the next available character "t", if any,
499 	 * in the candidate text string...
500 	 */
501 	if( (*t == '\0') || (GlobCaseMatch( flags, c, *t ) != 0) )
502 	  /*
503 	   * ...otherwise we return a mismatch.
504 	   */
505 	  return c - *t;
506 
507 	/* No mismatch yet; proceed to test the following character
508 	 * within the candidate text string.
509 	 */
510 	t++;
511     }
512   }
513   /* When we've exhausted the pattern, then this final check will return
514    * a match if we've simultaneously exhausted the candidate text string,
515    * or a mismatch otherwise.
516    */
517   return c - *t;
518 }
519 
520 #ifdef DT_DIR
521 /*
522  * When this is defined, we assume that we can safely interrogate
523  * the d_type member of a globbed dirent structure, to determine if
524  * the referenced directory entry is itself a subdirectory entry.
525  */
526 # define GLOB_ISDIR( ent )	((ent)->d_type == DT_DIR)
527 
528 #else
529 /* We can't simply check for (ent)->d_type == DT_DIR, so we must
530  * use stat() to identify subdirectory entries.
531  */
532 # include <sys/stat.h>
533 
534   GLOB_INLINE
535   //int GLOB_ISDIR( const struct *dirent ent )
GLOB_ISDIR(const struct dirent * ent)536   int GLOB_ISDIR( const struct dirent *ent )
537   {
538     struct stat entinfo;
539     if( stat( ent->d_name, &entinfo ) == 0 )
540       return S_ISDIR( entinfo.st_mode );
541     return 0;
542   }
543 
544 GLOB_INLINE
GLOB_ISDIR(const char * path,const struct dirent * ent)545 int GLOB_ISDIR( const char *path, const struct dirent *ent )
546 {
547     PoolMem fullpath(path);
548 
549     PathAppend(fullpath, ent->d_name);
550     return PathIsDirectory(fullpath);
551 }
552 #endif
553 
554 
555 
556 #if _DIRENT_HAVE_D_NAMLEN
557 /*
558  * Our DIRENT implementation provides a direct indication
559  * of the length of the file system entity name returned by
560  * the last readdir operation...
561  */
562 # define D_NAMLEN( entry )  ((entry)->d_namlen)
563 #else
564 /*
565  * ...otherwise, we have to scan for it.
566  */
567 # define D_NAMLEN( entry )  (strlen( (entry)->d_name ))
568 #endif
569 
GlobInitialise(glob_t * gl_data)570 static int GlobInitialise( glob_t *gl_data )
571 {
572   /* Helper routine to initialise a glob_t structure
573    * for first time use.
574    */
575   if( gl_data != NULL )
576   {
577     /* Caller gave us a valid pointer to what we assume has been
578      * defined as a glob_t structure; allocate space on the heap,
579      * for storage of the globbed paths vector...
580      */
581     int entries = gl_data->gl_offs + 1;
582     if( (gl_data->gl_pathv = (char **)malloc( entries * sizeof( char ** ) )) == NULL )
583       /*
584        * ...bailing out, if insufficient free heap memory.
585        */
586       return GLOB_NOSPACE;
587 
588     /* On successful allocation, clear the initial path count...
589      */
590     gl_data->gl_pathc = 0;
591     while( entries > 0 )
592       /*
593        * ...and place a NULL pointer in each allocated slot...
594        */
595       gl_data->gl_pathv[--entries] = NULL;
596   }
597   /* ...ultimately returning a successful initialisation status.
598    */
599   return GLOB_SUCCESS;
600 }
601 
GlobExpand(glob_t * gl_buf)602 GLOB_INLINE int GlobExpand( glob_t *gl_buf )
603 {
604   /* Inline helper to compute the new size allocation required
605    * for buf->gl_pathv, prior to adding a new glob result.
606    */
607   return ((2 + gl_buf->gl_pathc + gl_buf->gl_offs) * sizeof( char ** ));
608 }
609 
GlobStoreEntry(char * path,glob_t * gl_buf)610 static int GlobStoreEntry( char *path, glob_t *gl_buf )
611 {
612   /* Local helper routine to add a single path name entity
613    * to the globbed path vector, after first expanding the
614    * allocated memory space to accommodate it.
615    */
616   char **pathv;
617   if(  (path != NULL)  &&  (gl_buf != NULL)
618   &&  ((pathv = (char **)realloc( gl_buf->gl_pathv, GlobExpand( gl_buf ))) != NULL)  )
619   {
620     /* Memory expansion was successful; store the new path name
621      * in place of the former NULL pointer at the end of the old
622      * vector...
623      */
624     gl_buf->gl_pathv = pathv;
625     gl_buf->gl_pathv[gl_buf->gl_offs + gl_buf->gl_pathc++] = path;
626     /*
627      * ...then place a further NULL pointer into the newly allocated
628      * slot, to mark the new end of the vector...
629      */
630     gl_buf->gl_pathv[gl_buf->gl_offs + gl_buf->gl_pathc] = NULL;
631     /*
632      * ...before returning a successful completion status.
633      */
634     return GLOB_SUCCESS;
635   }
636   /* If we get to here, then we were unsuccessful.
637    */
638   return GLOB_ABORTED;
639 }
640 
641 struct glob_collator
642 {
643   /* A private data structure, used to keep an ordered collection
644    * of globbed path names in collated sequence within a (possibly
645    * unbalanced) binary tree.
646    */
647   struct glob_collator	*prev;
648   struct glob_collator	*next;
649   char  		*entry;
650 };
651 
652 GLOB_INLINE struct glob_collator
glob_collate_entry(struct glob_collator * collator,char * entry,int flags)653 *glob_collate_entry( struct glob_collator *collator, char *entry, int flags )
654 {
655   /* Inline helper function to construct a binary tree representation
656    * of a collated collection of globbed path name entities.
657    */
658   int seq = 0;
659   struct glob_collator *ref = collator;
660   struct glob_collator *lastref = collator;
661   while( ref != NULL )
662   {
663     /* Walk the tree, to find the leaf node representing the insertion
664      * point, in correctly collated sequence order, for the new entry,
665      * noting whether we must insert the new entry before or after the
666      * original entry at that leaf.
667      */
668     lastref = ref;
669     if( flags & GLOB_CASEMATCH )
670       seq = strcoll( entry, ref->entry );
671     else
672       //seq = stricoll( entry, ref->entry );
673       seq = strcoll( entry, ref->entry );
674     ref = (seq > 0) ? ref->next : ref->prev;
675   }
676   /* Allocate storage for a new leaf node, and if successful...
677    */
678   if( (ref = (glob_collator*)malloc( sizeof( struct glob_collator ))) != NULL )
679   {
680     /* ...place the new entry on this new leaf...
681      */
682     ref->entry = entry;
683     ref->prev = ref->next = NULL;
684 
685     /* ...and attach it to the tree...
686      */
687     if( lastref != NULL )
688     {
689       /* ...either...
690        */
691       if( seq > 0 )
692 	/*
693 	 * ...after...
694 	 */
695 	lastref->next = ref;
696 
697       else
698 	/* ...or before...
699 	 */
700 	lastref->prev = ref;
701 
702       /* ...the original leaf,as appropriate. */
703     }
704   }
705   /* When done, return a pointer to the root node of the resultant tree.
706    */
707   return (collator == NULL) ? ref : collator;
708 }
709 
710 static void
glob_store_collated_entries(struct glob_collator * collator,glob_t * gl_buf)711 glob_store_collated_entries( struct glob_collator *collator, glob_t *gl_buf )
712 {
713   /* A local helper routine to store a collated collection of globbed
714    * path name entities into the path vector within a glob_t structure;
715    * it performs a recursive inorder traversal of a glob_collator tree,
716    * deleting it leaf by leaf, branch by branch, as it stores the path
717    * data contained thereon.
718    */
719   if( collator->prev != NULL )
720     /*
721      * Recurse into the sub-tree of entries which collate before the
722      * root of the current (sub-)tree.
723      */
724     glob_store_collated_entries( collator->prev, gl_buf );
725 
726   /* Store the path name entry at the root of the current (sub-)tree.
727    */
728   GlobStoreEntry( collator->entry, gl_buf );
729 
730   if( collator->next != NULL )
731     /*
732      * Recurse into the sub-tree of entries which collate after the
733      * root of the current (sub-)tree.
734      */
735     glob_store_collated_entries( collator->next, gl_buf );
736 
737   /* Finally, delete the root node of the current (sub-)tree; since
738    * recursion visits every node of the tree, ultimately considering
739    * each leaf as a sub-tree of only one node, unwinding recursion
740    * will cause this to delete the entire tree.
741    */
742   free( collator );
743 }
744 
745 static int
glob_match(const char * pattern,int flags,int (* errfn)(const char *,int),glob_t * gl_buf)746 glob_match( const char *pattern, int flags, int (*errfn)(const char*, int), glob_t *gl_buf )
747 {
748   /* Local helper function; it provides the backbone of the glob()
749    * implementation, recursively decomposing the pattern into separate
750    * globbable path components, to collect the union of all possible
751    * matches to the pattern, in all possible matching directories.
752    */
753   glob_t local_gl_buf;
754   int status = GLOB_SUCCESS;
755 
756   /* Begin by separating out any path prefix from the glob pattern.
757    */
758   char dirbuf[1 + strlen( pattern )];
759   const char *dir = dirname( (char *)memcpy( dirbuf, pattern, sizeof( dirbuf )) );
760   char **dirp, preferred_dirsep = GLOB_DIRSEP;
761 
762   /* Initialise a temporary local glob_t structure, to capture the
763    * intermediate results at the current level of recursion...
764    */
765   local_gl_buf.gl_offs = 0;
766   if( (status = GlobInitialise( &local_gl_buf )) != GLOB_SUCCESS )
767     /*
768      * ...bailing out if unsuccessful.
769      */
770     return status;
771 
772   /* Check if there are any globbing tokens in the path prefix...
773    */
774   if( IsGlobPattern( dir, flags ) )
775     /*
776      * ...and recurse to identify all possible matching prefixes,
777      * as may be necessary...
778      */
779     status = glob_match( dir, flags | GLOB_DIRONLY, errfn, &local_gl_buf );
780 
781   else
782     /* ...or simply store the current prefix, if not.
783      */
784     status = GlobStoreEntry( glob_strdup( dir ), &local_gl_buf );
785 
786   /* Check nothing has gone wrong, so far...
787    */
788   if( status != GLOB_SUCCESS )
789     /*
790      * ...and bail out if necessary.
791      */
792     return status;
793 
794   /* The original "pattern" argument may have included a path name
795    * prefix, which we used "dirname()" to isolate.  If there was no
796    * such prefix, then "dirname()" would have reported an effective
797    * prefix which is identically equal to "."; however, this would
798    * also be the case if the prefix was "./" (or ".\\" in the case
799    * of a WIN32 host).  Thus, we may deduce that...
800    */
801   if( glob_is_dirsep( pattern[1] ) || (strcmp( dir, "." ) != 0) )
802   {
803     /* ...when the prefix is not reported as ".", or even if it is
804      * but the original pattern had "./" (or ".\\") as the prefix,
805      * then we must adjust to identify the effective pattern with
806      * its original prefix stripped away...
807      */
808     const char *tail = pattern + strlen( dir );
809     while( (tail > pattern) && ! glob_is_dirsep( *tail ) )
810       --tail;
811     while( glob_is_dirsep( *tail ) )
812       preferred_dirsep = *tail++;
813     pattern = tail;
814   }
815 
816   else
817     /* ...otherwise, we simply note that there was no prefix.
818      */
819     dir = NULL;
820 
821   /* We now have a globbed list of prefix directories, returned from
822    * recursive processing, in local_gl_buf.gl_pathv, and we also have
823    * a separate pattern which we may attempt to match in each of them;
824    * at the outset, we have yet to match this pattern to anything.
825    */
826   status = GLOB_NOMATCH;
827   for( dirp = local_gl_buf.gl_pathv; *dirp != NULL; free( *dirp++ ) )
828   {
829     /* Provided an earlier cycle hasn't scheduled an abort...
830      */
831     if( status != GLOB_ABORTED )
832     {
833       /* ...take each candidate directory in turn, and prepare
834        * to collate any matched entities within it...
835        */
836       struct glob_collator *collator = NULL;
837 
838       /* ...attempt to open the current candidate directory...
839        */
840       DIR *dp;
841       if( (dp = opendir( *dirp )) != NULL )
842       {
843 	/* ...and when successful, instantiate a dirent structure...
844 	 */
845 #ifdef USE_READDIR_R
846 	struct dirent data;
847 	struct dirent *entry = &data;
848 #endif
849 	struct dirent *result = NULL;
850 #ifdef USE_READDIR_R
851 	size_t dirlen = (dir == NULL) ? 0 : strlen( *dirp );
852 	while( (Readdir_r( dp, entry, &result )) == 0 )
853 #else
854 	while( (entry = readdir( dp )) != NULL )
855 #endif
856 	{
857 	  /* ...into which we read each entry from the candidate
858 	   * directory, in turn, then...
859 	   */
860 	  if( (((flags & GLOB_DIRONLY) == 0) || GLOB_ISDIR( *dirp, entry ))
861 	    /*
862 	     * ...provided we don't require it to be a subdirectory,
863 	     * or it actually is one...
864 	     */
865 	  && (GlobStrcmp( pattern, entry->d_name, flags ) == 0)   )
866 	  {
867 	    /* ...and it is a globbed match for the pattern, then
868 	     * we allocate a temporary local buffer of sufficient
869 	     * size to assemble the matching path name...
870 	     */
871 	    char *found;
872 	    size_t prefix;
873 	    size_t matchlen = D_NAMLEN( entry );
874 	    char matchpath[2 + dirlen + matchlen];
875 	    if( (prefix = dirlen) > 0 )
876 	    {
877 	      /* ...first copying the prefix, if any,
878 	       * followed by a directory name separator...
879 	       */
880 	      memcpy( matchpath, *dirp, dirlen );
881 	      if( ! glob_is_dirsep( matchpath[prefix - 1] ) )
882 		matchpath[prefix++] = preferred_dirsep;
883 	    }
884 	    /* ...and append the matching dirent entry.
885 	     */
886 	    memcpy( matchpath + prefix, entry->d_name, matchlen + 1 );
887 
888 	    /* Duplicate the content of the temporary buffer to
889 	     * the heap, for assignment into gl_buf->gl_pathv...
890 	     */
891 	    if( (found = glob_strdup( matchpath )) == NULL )
892 	      /*
893 	       * ...setting the appropriate error code, in the
894 	       * event that the heap memory has been exhausted.
895 	       */
896 	      status = GLOB_NOSPACE;
897 
898 	    else
899 	    { /* This glob match has been successfully recorded on
900 	       * the heap, ready for assignment to gl_buf->gl_pathv;
901 	       * if this is the first match assigned to this gl_buf,
902 	       * and we haven't trapped any prior error...
903 	       */
904 	      if( status == GLOB_NOMATCH )
905 		/*
906 		 * ...then record this successful match.
907 		 */
908 		status = GLOB_SUCCESS;
909 
910 	      if( (flags & GLOB_NOSORT) == 0 )
911 	      {
912 		/* The results of this glob are to be sorted in
913 		 * collating sequence order; divert the current
914 		 * match into the collator.
915 		 */
916 		collator = glob_collate_entry( collator, found, flags );
917 	      }
918 	      else
919 	      { /* Sorting has been suppressed for this glob;
920 		 * just add the current match directly into the
921 		 * result vector at gl_buf->gl_pathv.
922 		 */
923 		GlobStoreEntry( found, gl_buf );
924 	      }
925 	    }
926 	  }
927 	}
928 	/* When we've processed all of the entries in the current
929 	 * prefix directory, we may close it.
930 	 */
931 	closedir( dp );
932       }
933       /* In the event of failure to open the candidate prefix directory...
934        */
935       else if( (flags & GLOB_ERR) || ((errfn != NULL) && errfn( *dirp, errno )) )
936 	/*
937 	 * ...and when the caller has set the GLOB_ERR flag, or has provided
938 	 * an error handler which returns non-zero for the failure condition,
939 	 * then we schedule an abort.
940 	 */
941 	status = GLOB_ABORTED;
942 
943       /* When we diverted the glob results for collation...
944        */
945       if( collator != NULL )
946 	/*
947 	 * ...then we redirect them to gl_buf->gl_pathv now, before we
948 	 * begin a new cycle, to process any further prefix directories
949 	 * which may have been identified; note that we do this even if
950 	 * we scheduled an abort, so that we may return any results we
951 	 * may have already collected before the error occurred.
952 	 */
953 	glob_store_collated_entries( collator, gl_buf );
954     }
955   }
956   /* Finally, free the memory block allocated for the results vector
957    * in the internal glob buffer, to avoid leaking memory, before we
958    * return the resultant status code.
959    */
960   free( local_gl_buf.gl_pathv );
961   return status;
962 }
963 
964 #define GLOB_INIT	(0x100 << 0)
965 #define GLOB_FREE	(0x100 << 1)
966 
GlobSigned(const char * check,const char * magic)967 GLOB_INLINE int GlobSigned( const char *check, const char *magic )
968 {
969   /* Inline helper function, used exclusively by the glob_registry()
970    * function, to confirm that the gl_magic field within a glob_t data
971    * structure has been set, to indicate a properly initialised state.
972    *
973    * FIXME: we'd like to be able to verify the content at "check"
974    * against the signature at "magic", but "check" is likely to be
975    * an uninitialised pointer, and MS-Windows lamely crashes when the
976    * memory it might appear to address cannot be read.  There may be a
977    * way we could trap, and effectively handle, the resulting access
978    * violation, (likely restricted to WinXP and later); in the absence
979    * of a suitable handler, we must restrict our check to require that
980    * "check" is a strict alias for "magic".  This will lose, if we have
981    * multiple copies of "glob" loaded via distinct DLLs, and we pass a
982    * "glob_t" entity which has been initialised in one DLL across the
983    * boundary of another; for now, however, checking for strict pointer
984    * aliasing seems to be the only reliably safe option available.
985    */
986   return (check == magic) ? 0 : 1;
987 }
988 
glob_registry(int request,glob_t * gl_data)989 static glob_t *glob_registry( int request, glob_t *gl_data )
990 {
991   /* Helper function to verify proper registration (initialisation)
992    * of a glob_t data structure, prior to first use; it also provides
993    * the core implementation for the globfree() function.
994    */
995   static const char *glob_magic = "glob-1.0-mingw32";
996 
997   /* We must be prepared to handle either of...
998    */
999   switch( request )
1000   {
1001     /* ...a registration (initialisation) request...
1002      */
1003     case GLOB_INIT:
1004       if( GlobSigned( (const char *)gl_data->gl_magic, glob_magic ) != 0 )
1005       {
1006 	/* The gl_magic field doesn't (yet) indicate that the
1007 	 * data structure has been initialised; assume that this
1008 	 * is first use, and initialise it now.
1009 	 */
1010 	GlobInitialise( gl_data );
1011 	gl_data->gl_magic = (void *)(glob_magic);
1012       }
1013       break;
1014 
1015     /* ...or a de-registration (globfree()) request; here we
1016      * perform a sanity check, to ensure that the passed glob_t
1017      * structure is a valid, previously initialised structure,
1018      * before we attempt to free it.
1019      */
1020     case GLOB_FREE:
1021       if( GlobSigned( (const char *)gl_data->gl_magic, glob_magic ) == 0 )
1022       {
1023 	/* On passing the sanity check, we may proceed to free
1024 	 * all dynamically (strdup) allocated string buffers in
1025 	 * the gl_pathv list, and the reference pointer table
1026 	 * itself, thus completing the globfree() activity.
1027 	 */
1028 	int base = gl_data->gl_offs;
1029 	int argc = gl_data->gl_pathc;
1030 	while( argc-- > 0 )
1031 	  free( gl_data->gl_pathv[base++] );
1032 	free( gl_data->gl_pathv );
1033       }
1034   }
1035   /* In either case, we return the original glob_t data pointer.
1036    */
1037   return gl_data;
1038 }
1039 
1040 int
__mingw_glob(const char * pattern,int flags,int (* errfn)(const char *,int),glob_t * gl_data)1041 __mingw_glob( const char *pattern, int flags, int (*errfn)(const char *, int), glob_t *gl_data )
1042 {
1043   /* Module entry point for the glob() function.
1044    */
1045   int status;
1046   /* First, consult the glob "registry", to ensure that the
1047    * glob data structure passed by the caller, has been properly
1048    * initialised.
1049    */
1050   gl_data = glob_registry( GLOB_INIT, gl_data );
1051 
1052   /* The actual globbing function is performed by glob_match()...
1053    */
1054   status = glob_match( pattern, flags, errfn, gl_data );
1055   if( (status == GLOB_NOMATCH) && ((flags & GLOB_NOCHECK) != 0) )
1056     /*
1057      * ...ultimately delegating to glob_strdup() and GlobStoreEntry()
1058      * to handle any unmatched globbing pattern which the user specified
1059      * options may require to be stored anyway.
1060      */
1061     GlobStoreEntry( glob_strdup( pattern ), gl_data );
1062 
1063   /* We always return the status reported by glob_match().
1064    */
1065   return status;
1066 }
1067 
1068 void
__mingw_globfree(glob_t * gl_data)1069 __mingw_globfree( glob_t *gl_data )
1070 {
1071   /* Module entry point for globfree() function; the activity is
1072    * entirely delegated to the glob "registry" helper function.
1073    */
1074   glob_registry( GLOB_FREE, gl_data );
1075 }
1076