1 /*  $Id$
2 
3     Part of SWI-Prolog
4 
5     Author:        Jan Wielemaker
6     E-mail:        jan@swi.psy.uva.nl
7     WWW:           http://www.swi-prolog.org
8     Copyright (C): 1985-2002, University of Amsterdam
9 
10     This library is free software; you can redistribute it and/or
11     modify it under the terms of the GNU Lesser General Public
12     License as published by the Free Software Foundation; either
13     version 2.1 of the License, or (at your option) any later version.
14 
15     This library is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18     Lesser General Public License for more details.
19 
20     You should have received a copy of the GNU Lesser General Public
21     License along with this library; if not, write to the Free Software
22     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 */
24 
25 #include "pl-incl.h"
26 #include "pl-ctype.h"
27 
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 
32 #ifdef __WATCOMC__
33 #include <direct.h>
34 #else /*__WATCOMC__*/
35 #if HAVE_DIRENT_H
36 # include <dirent.h>
37 #else
38 # define dirent direct
39 # if HAVE_SYS_NDIR_H
40 #  include <sys/ndir.h>
41 # endif
42 # if HAVE_SYS_DIR_H
43 #  include <sys/dir.h>
44 # endif
45 # if HAVE_NDIR_H
46 #  include <ndir.h>
47 # endif
48 #endif
49 #endif /*__WATCOMC__*/
50 
51 #ifdef HAVE_SYS_STAT_H
52 #include <sys/stat.h>
53 #endif
54 #ifdef HAVE_SYS_PARAM_H
55 #include <sys/param.h>
56 #endif
57 
58 #define O_EXPANDS_TESTS_EXISTS 1
59 
60 #ifndef IS_DIR_SEPARATOR
61 #define IS_DIR_SEPARATOR(c)	((c) == '/')
62 #endif
63 
64 #define char_to_int(c)	(0xff & (int)(c))
65 
66 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
67 Unix Wildcard Matching.  Recognised:
68 
69   ?		matches one arbitrary character
70   *		matches any number of any character
71   [xa-z]	matches x and a-z
72   {p1,p2}	matches pattern p1 or p2
73 
74   backslash (\) escapes a character.
75 
76 First the pattern is compiled into an intermediate representation.  Next
77 this intermediate representation is matched  against  the  target.   The
78 non-ascii  characters  are  used  to  store  control  sequences  in  the
79 intermediate representation:
80 
81   ANY		Match any character
82   STAR		Match (possibly empty) sequence
83   ALT <offset>	Match, if fails, continue at <pc> + offset
84   JMP <offset>	Jump <offset> instructions
85   ANYOF		Next 16 bytes are bitmap
86 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
87 
88 #define MAXCODE 1024
89 
90 #define ANY	128
91 #define STAR	129
92 #define ALT	130
93 #define JMP	131
94 #define ANYOF	132
95 #define EXIT	133
96 
97 #define NOCURL	0
98 #define CURL	1
99 
100 typedef unsigned char matchcode;
101 
102 typedef struct
103 { int		size;
104   matchcode	code[MAXCODE];
105 } compiled_pattern;
106 
107 static char	*compile_pattern(compiled_pattern *, char *, int);
108 static bool	match_pattern(matchcode *, char *);
109 
110 #define Output(c)	{ if ( Out->size > MAXCODE-1 ) \
111 			  { warning("pattern too large"); \
112 			    return (char *) NULL; \
113 			  } \
114 			  Out->code[Out->size++] = c; \
115 			}
116 
117 static inline void
setMap(matchcode * map,int c)118 setMap(matchcode *map, int c)
119 { GET_LD
120 
121   if ( !truePrologFlag(PLFLAG_FILE_CASE) )
122     c = makeLower(c);
123 
124   map[(c)/8] |= 1 << ((c) % 8);
125 }
126 
127 
128 static bool
compilePattern(char * p,compiled_pattern * cbuf)129 compilePattern(char *p, compiled_pattern *cbuf)
130 { cbuf->size = 0;
131   if ( compile_pattern(cbuf, p, NOCURL) == (char *) NULL )
132     fail;
133 
134   succeed;
135 }
136 
137 
138 static char *
compile_pattern(compiled_pattern * Out,char * p,int curl)139 compile_pattern(compiled_pattern *Out, char *p, int curl)
140 { int c;
141 
142   for(;;)
143   { switch(c = char_to_int(*p++))
144     { case EOS:
145 	break;
146       case '\\':
147 	Output(*p == EOS ? '\\' : (*p & 0x7f));
148 	if (*p == EOS )
149 	  break;
150 	p++;
151 	continue;
152       case '?':
153 	Output(ANY);
154 	continue;
155       case '*':
156 	Output(STAR);
157 	continue;
158       case '[':
159 	{ matchcode *map;
160 	  int n;
161 
162 	  Output(ANYOF);
163 	  map = &Out->code[Out->size];
164 	  Out->size += 16;
165 	  if ( Out->size >= MAXCODE )
166 	  { warning("Pattern too intptr_t");
167 	    return (char *) NULL;
168 	  }
169 
170 	  for( n=0; n < 16; n++)
171 	    map[n] = 0;
172 
173 	  for(;;)
174 	  { switch( c = *p++ )
175 	    { case '\\':
176 		if ( *p == EOS )
177 		{ warning("Unmatched '['");
178 		  return (char *)NULL;
179 		}
180 		setMap(map, *p);
181 		p++;
182 		continue;
183 	      case ']':
184 		break;
185 	      default:
186 		if ( p[-1] != ']' && p[0] == '-' && p[1] != ']' )
187 		{ int chr;
188 
189 		  for ( chr=p[-1]; chr <= p[1]; chr++ )
190 		    setMap(map, chr);
191 		  p += 2;
192 		} else
193 		  setMap(map, c);
194 		continue;
195 	    }
196 	    break;
197 	  }
198 
199 	  continue;
200 	}
201       case '{':
202 	{ int ai, aj = -1;
203 
204 	  for(;;)
205 	  { Output(ALT); ai = Out->size; Output(0);
206 	    if ( (p = compile_pattern(Out, p, CURL)) == (char *) NULL )
207 	      return (char *) NULL;
208 	    if ( aj > 0 )
209 	      Out->code[aj] = Out->size - aj;
210 	    if ( *p == ',' )
211 	    { Output(JMP); aj = Out->size; Output(0);
212 	      Out->code[ai] = Out->size - ai;
213 	      Output(ALT); ai = Out->size; Output(0);
214 	      p++;
215 	    } else if ( *p == '}' )
216 	    { p++;
217 	      break;
218 	    } else
219 	    { warning("Unmatched '{'");
220 	      return (char *) NULL;
221 	    }
222 	  }
223 
224 	  continue;
225 	}
226       case ANY:
227       case STAR:
228       case ALT:
229       case JMP:
230       case ANYOF:
231       case EXIT:
232 	PL_error(NULL, 0, "Reserved character",
233 		 ERR_REPRESENTATION, ATOM_pattern);
234 	return NULL;
235       case '}':
236       case ',':
237 	if ( curl == CURL )
238 	{ p--;
239 	  return p;
240 	}
241 	/*FALLTHROUGH*/
242       default:
243       { GET_LD
244 
245         if ( !truePrologFlag(PLFLAG_FILE_CASE) )
246 	  c = makeLower(c);
247 	Output(c);
248 	continue;
249       }
250     }
251 
252     Output(EXIT);
253     return p;
254   }
255 }
256 
257 
258 static inline bool
matchPattern(char * s,compiled_pattern * cbuf)259 matchPattern(char *s, compiled_pattern *cbuf)
260 { return match_pattern(cbuf->code, s);
261 }
262 
263 
264 static bool
match_pattern(matchcode * p,char * str)265 match_pattern(matchcode *p, char *str)
266 { matchcode c;
267   matchcode *s = (matchcode *) str;
268 
269   for(;;)
270   { switch( c = *p++ )
271     { case EXIT:
272 	  return (*s == EOS ? TRUE : FALSE);
273       case ANY:						/* ? */
274 	  if ( *s == EOS )
275 	    fail;
276 	  s++;
277 	  continue;
278       case ANYOF:					/* [...] */
279         { GET_LD
280 	  matchcode c2 = *s;
281 
282 	  if ( !truePrologFlag(PLFLAG_FILE_CASE) )
283 	    c2 = makeLower(c2);
284 
285 	  if ( p[c2 / 8] & (1 << (c2 % 8)) )
286 	  { p += 16;
287 	    s++;
288 	    continue;
289 	  }
290 	  fail;
291 	}
292       case STAR:					/* * */
293 	  do
294 	  { if ( match_pattern(p, (char *)s) )
295 	      succeed;
296 	  } while( *s++ );
297 	  fail;
298       case JMP:						/* { ... } */
299 	  p += *p;
300 	  continue;
301       case ALT:
302 	  if ( match_pattern(p+1, (char *)s) )
303 	    succeed;
304 	  p += *p;
305 	  continue;
306       default:						/* character */
307       { GET_LD
308 
309 	  if ( c == *s ||
310 	       (!truePrologFlag(PLFLAG_FILE_CASE) && c == makeLower(*s)) )
311 	  { s++;
312 	    continue;
313 	  }
314           fail;
315       }
316     }
317   }
318 }
319 
320 
321 /** wildcard_match(+Pattern, +Name) is semidet.
322 */
323 
324 static
325 PRED_IMPL("wildcard_match", 2, wildcard_match, 0)
326 { char *p, *s;
327   compiled_pattern buf;
328 
329   if ( !PL_get_chars_ex(A1, &p, CVT_ALL) ||
330        !PL_get_chars_ex(A2,  &s, CVT_ALL) )
331     fail;
332 
333   if ( compilePattern(p, &buf) )
334   { return matchPattern(s, &buf);
335   }
336 
337   return PL_error(NULL, 0, NULL, ERR_DOMAIN, ATOM_pattern, A1);
338 }
339 
340 
341 		 /*******************************
342 		 *	EXPAND_FILE_NAME/2	*
343 		 *******************************/
344 
345 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
346 Wildcart expansion of a pattern to a list   of files. This code uses two
347 `buffers'  for  storing  the   intermediate    results   while  limiting
348 fragmentation. The `strings' buffer contains  all strings generated. The
349 files contains indices in the `strings' buffer  pointing to the start of
350 strings.  The indices in the range [start,end) are valid.
351 
352 First  this  set  is   filled   with    the   empty   string.  Next  the
353 directory-segment with the first wildcart is   located.  If found, we go
354 through the current set, adding the segments without wildcarts, applying
355 the wildcart on the directory and adding   everything  found to the set.
356 The old set is deleted by incrementing info.start.
357 
358 If we are at the end, we add the remaining non-wildcart segments to each
359 element of the set, deleting it if the result does not exits.
360 
361 Finally we sort the result.
362 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
363 
364 typedef struct
365 { tmp_buffer	files;			/* our files */
366   tmp_buffer	strings;		/* our strings */
367   int		start;			/* 1-st valid entry of files */
368   int		end;			/* last valid entry of files */
369 } glob_info, *GlobInfo;
370 
371 #undef isspecial			/* play safe */
372 #define isspecial(c) \
373 	((c) == '[' || (c) == '{' || (c) == '?' || (c) == '*')
374 
375 static void
free_expand_info(GlobInfo info)376 free_expand_info(GlobInfo info)
377 { discardBuffer(&info->files);
378   discardBuffer(&info->strings);
379 }
380 
381 
382 static void
add_path(const char * path,GlobInfo info)383 add_path(const char *path, GlobInfo info)
384 { int idx = (int)entriesBuffer(&info->strings, char);
385   size_t n = strlen(path)+1;
386 
387   addMultipleBuffer(&info->strings, path, n, char);
388   addBuffer(&info->files, idx, int);
389   info->end++;
390 }
391 
392 
393 const char *
expand_str(GlobInfo info,int at)394 expand_str(GlobInfo info, int at)
395 { char *s = &fetchBuffer(&info->strings, at, char);
396 
397   return (const char *)s;
398 }
399 
400 
401 const char *
expand_entry(GlobInfo info,int idx)402 expand_entry(GlobInfo info, int idx)
403 { int at = fetchBuffer(&info->files, idx, int);
404 
405   return expand_str(info, at);
406 }
407 
408 
409 static void
un_escape(char * to,const char * from,const char * end)410 un_escape(char *to, const char *from, const char *end)
411 { while( from < end )
412   { if ( *from == '\\' && (isspecial(from[1]) || from[1] == '\\') )
413       from++;
414     *to++ = *from++;
415   }
416   *to = EOS;
417 }
418 
419 
420 static int
expand(const char * pattern,GlobInfo info)421 expand(const char *pattern, GlobInfo info)
422 { const char *pat = pattern;
423   compiled_pattern cbuf;
424   char prefix[MAXPATHLEN];		/* before first pattern */
425   char patbuf[MAXPATHLEN];		/* pattern buffer */
426   int end, dot;
427 
428   initBuffer(&info->files);
429   initBuffer(&info->strings);
430   info->start = 0;
431   info->end = 0;
432 
433   add_path("", info);
434 
435   for(;;)
436   { const char *s = pat, *head = pat, *tail;
437 
438     for(;;)
439     { int c;
440 
441       switch( (c=*s++) )
442       { case EOS:
443 	  if ( s > pat )		/* something left and expanded */
444 	  { un_escape(prefix, pat, s);
445 
446 	    end = info->end;
447 	    for( ; info->start < end; info->start++ )
448 	    { char path[MAXPATHLEN];
449 	      size_t plen;
450 
451 	      strcpy(path, expand_entry(info, info->start));
452 	      plen = strlen(path);
453 	      if ( prefix[0] && plen > 0 && path[plen-1] != '/' )
454 		path[plen++] = '/';
455 	      strcpy(&path[plen], prefix);
456 	      if ( end == 1 || AccessFile(path, ACCESS_EXIST) )
457 		add_path(path, info);
458 	    }
459 	  }
460 	  succeed;
461 	case '[':				/* meta characters: expand */
462 	case '{':
463 	case '?':
464 	case '*':
465 	  break;
466 	case '\\':
467 	  if ( isspecial(*s) )
468 	  { s++;
469 	    continue;
470 	  }
471 	/*FALLTHROUGH*/
472 	default:
473 	  if ( IS_DIR_SEPARATOR(c) )
474 	    head = s;
475 	  continue;
476       }
477       break;
478     }
479 
480     for( tail=s; *tail && !IS_DIR_SEPARATOR(*tail); tail++ )
481       ;
482 
483 /*  By now, head points to the start of the path holding meta characters,
484     while tail points to the tail:
485 
486     ..../meta*path/....
487 	 ^        ^
488        head     tail
489 */
490     un_escape(prefix, pat, head);
491     un_escape(patbuf, head, tail);
492 
493     if ( !compilePattern(patbuf, &cbuf) )		/* syntax error */
494       fail;
495     dot = (patbuf[0] == '.');			/* do dots as well */
496 
497     end = info->end;
498 
499     for(; info->start < end; info->start++)
500     { DIR *d;
501       struct dirent *e;
502       char path[MAXPATHLEN];
503       char tmp[MAXPATHLEN];
504       const char *current = expand_entry(info, info->start);
505 
506       strcpy(path, current);
507       strcat(path, prefix);
508 
509       if ( (d=opendir(path[0] ? OsPath(path, tmp) : ".")) )
510       { size_t plen = strlen(path);
511 
512 	if ( plen > 0 && path[plen-1] != '/' )
513 	  path[plen++] = '/';
514 
515 	for(e=readdir(d); e; e = readdir(d))
516 	{
517 #ifdef __MSDOS__
518 	  strlwr(e->d_name);
519 #endif
520 	  if ( (dot || e->d_name[0] != '.') &&
521 	       matchPattern(e->d_name, &cbuf) )
522 	  { char newp[MAXPATHLEN];
523 
524 	    strcpy(newp, path);
525 	    strcpy(&newp[plen], e->d_name);
526 /*	    if ( !tail[0] || ExistsDirectory(newp) )
527 	    Saves memory, but involves one more file-access
528 */
529 	      add_path(newp, info);
530 	  }
531 	}
532 	closedir(d);
533       }
534     }
535 
536     pat = tail;
537     if ( IS_DIR_SEPARATOR(*pat) )
538       pat++;
539   }
540 }
541 
542 
543 static int
compareBagEntries(const void * a1,const void * a2)544 compareBagEntries(const void *a1, const void *a2)
545 { GET_LD
546   GlobInfo info = LD->glob_info;
547   int i1 = *(int *)a1;
548   int i2 = *(int *)a2;
549   const char *s1, *s2;
550 
551   s1 = expand_str(info, i1);
552   s2 = expand_str(info, i2);
553 
554   if ( truePrologFlag(PLFLAG_FILE_CASE) )
555     return mbscoll(s1, s2);
556   else
557     return mbscasecoll(s1, s2);
558 }
559 
560 
561 static void
sort_expand(GlobInfo info)562 sort_expand(GlobInfo info)
563 { GET_LD
564   int *ip = &fetchBuffer(&info->files, info->start, int);
565   int is = info->end - info->start;
566 
567   LD->glob_info = info;
568   qsort(ip, is, sizeof(int), compareBagEntries);
569 }
570 
571 
572 static
573 PRED_IMPL("expand_file_name", 2, expand_file_name, 0)
574 { PRED_LD
575   char spec[MAXPATHLEN];
576   char *s;
577   glob_info info;
578   term_t l    = PL_copy_term_ref(A2);
579   term_t head = PL_new_term_ref();
580   int i;
581 
582   if ( !PL_get_chars_ex(A1, &s, CVT_ALL|REP_FN) )
583     fail;
584   if ( strlen(s) > sizeof(spec)-1 )
585     return PL_error(NULL, 0, "File name too intptr_t",
586 		    ERR_DOMAIN, ATOM_pattern, A1);
587 
588   if ( !expandVars(s, spec, sizeof(spec)) )
589     fail;
590   if ( !expand(spec, &info) )
591     goto failout;
592   sort_expand(&info);
593 
594   for( i = info.start; i< info.end; i++ )
595   { const char *e = expand_entry(&info, i);
596 
597     if ( !PL_unify_list(l, head, l) ||
598 	 !PL_unify_chars(head, PL_ATOM|REP_FN, -1, e) )
599       goto failout;
600   }
601 
602   if ( !PL_unify_nil(l) )
603   { failout:
604     free_expand_info(&info);
605     fail;
606   }
607 
608   free_expand_info(&info);
609   succeed;
610 }
611 
612 
613 /** directory_files(+Dir, -Files) is det.
614 
615 Files is a list of atoms that describe the entries in Dir.
616 */
617 
618 static
619 PRED_IMPL("directory_files", 2, directory_files, 0)
620 { PRED_LD
621   char *dname;
622   DIR *dir;
623 
624   if ( !PL_get_file_name(A1, &dname, PL_FILE_READ|PL_FILE_OSPATH) )
625     return FALSE;
626 
627   if ( (dir=opendir(dname)) )
628   { struct dirent *e;
629     term_t tail = PL_copy_term_ref(A2);
630     term_t head = PL_new_term_ref();
631 
632     for(e=readdir(dir); e; e = readdir(dir))
633     { PL_put_variable(head);
634       if ( PL_handle_signals() < 0 ||
635 	   !PL_unify_list(tail, head, tail) ||
636 	   !PL_unify_chars(head, PL_ATOM|REP_FN, (size_t)-1, e->d_name) )
637       { closedir(dir);
638 	return FALSE;
639       }
640     }
641     closedir(dir);
642 
643     return PL_unify_nil(tail);
644   }
645 
646   return PL_error(NULL, 0, OsError(), ERR_FILE_OPERATION,
647 		  ATOM_open, ATOM_directory, A1);
648 }
649 
650 
651 		 /*******************************
652 		 *      PUBLISH PREDICATES	*
653 		 *******************************/
654 
655 BeginPredDefs(glob)
656   PRED_DEF("swi_expand_file_name", 2, expand_file_name, 0)
657   PRED_DEF("swi_wildcard_match",   2, wildcard_match,   0)
658   PRED_DEF("swi_directory_files",  2, directory_files,  0)
659 EndPredDefs
660 
661 void
initGlob(void)662 initGlob(void)
663 {
664   PL_register_extensions(PL_predicates_from_glob);
665 }
666 
667