1 /* $Id$
2
3 Part of SWI-Prolog
4
5 Author: Jan Wielemaker
6 E-mail: jan@swi.psy.uva.nl
7 WWW: http://www.swi-prolog.org
8 Copyright (C): 1985-2002, University of Amsterdam
9
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
14
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25 #include "pl-incl.h"
26 #include "pl-ctype.h"
27
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31
32 #ifdef __WATCOMC__
33 #include <direct.h>
34 #else /*__WATCOMC__*/
35 #if HAVE_DIRENT_H
36 # include <dirent.h>
37 #else
38 # define dirent direct
39 # if HAVE_SYS_NDIR_H
40 # include <sys/ndir.h>
41 # endif
42 # if HAVE_SYS_DIR_H
43 # include <sys/dir.h>
44 # endif
45 # if HAVE_NDIR_H
46 # include <ndir.h>
47 # endif
48 #endif
49 #endif /*__WATCOMC__*/
50
51 #ifdef HAVE_SYS_STAT_H
52 #include <sys/stat.h>
53 #endif
54 #ifdef HAVE_SYS_PARAM_H
55 #include <sys/param.h>
56 #endif
57
58 #define O_EXPANDS_TESTS_EXISTS 1
59
60 #ifndef IS_DIR_SEPARATOR
61 #define IS_DIR_SEPARATOR(c) ((c) == '/')
62 #endif
63
64 #define char_to_int(c) (0xff & (int)(c))
65
66 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
67 Unix Wildcard Matching. Recognised:
68
69 ? matches one arbitrary character
70 * matches any number of any character
71 [xa-z] matches x and a-z
72 {p1,p2} matches pattern p1 or p2
73
74 backslash (\) escapes a character.
75
76 First the pattern is compiled into an intermediate representation. Next
77 this intermediate representation is matched against the target. The
78 non-ascii characters are used to store control sequences in the
79 intermediate representation:
80
81 ANY Match any character
82 STAR Match (possibly empty) sequence
83 ALT <offset> Match, if fails, continue at <pc> + offset
84 JMP <offset> Jump <offset> instructions
85 ANYOF Next 16 bytes are bitmap
86 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
87
88 #define MAXCODE 1024
89
90 #define ANY 128
91 #define STAR 129
92 #define ALT 130
93 #define JMP 131
94 #define ANYOF 132
95 #define EXIT 133
96
97 #define NOCURL 0
98 #define CURL 1
99
100 typedef unsigned char matchcode;
101
102 typedef struct
103 { int size;
104 matchcode code[MAXCODE];
105 } compiled_pattern;
106
107 static char *compile_pattern(compiled_pattern *, char *, int);
108 static bool match_pattern(matchcode *, char *);
109
110 #define Output(c) { if ( Out->size > MAXCODE-1 ) \
111 { warning("pattern too large"); \
112 return (char *) NULL; \
113 } \
114 Out->code[Out->size++] = c; \
115 }
116
117 static inline void
setMap(matchcode * map,int c)118 setMap(matchcode *map, int c)
119 { GET_LD
120
121 if ( !truePrologFlag(PLFLAG_FILE_CASE) )
122 c = makeLower(c);
123
124 map[(c)/8] |= 1 << ((c) % 8);
125 }
126
127
128 static bool
compilePattern(char * p,compiled_pattern * cbuf)129 compilePattern(char *p, compiled_pattern *cbuf)
130 { cbuf->size = 0;
131 if ( compile_pattern(cbuf, p, NOCURL) == (char *) NULL )
132 fail;
133
134 succeed;
135 }
136
137
138 static char *
compile_pattern(compiled_pattern * Out,char * p,int curl)139 compile_pattern(compiled_pattern *Out, char *p, int curl)
140 { int c;
141
142 for(;;)
143 { switch(c = char_to_int(*p++))
144 { case EOS:
145 break;
146 case '\\':
147 Output(*p == EOS ? '\\' : (*p & 0x7f));
148 if (*p == EOS )
149 break;
150 p++;
151 continue;
152 case '?':
153 Output(ANY);
154 continue;
155 case '*':
156 Output(STAR);
157 continue;
158 case '[':
159 { matchcode *map;
160 int n;
161
162 Output(ANYOF);
163 map = &Out->code[Out->size];
164 Out->size += 16;
165 if ( Out->size >= MAXCODE )
166 { warning("Pattern too intptr_t");
167 return (char *) NULL;
168 }
169
170 for( n=0; n < 16; n++)
171 map[n] = 0;
172
173 for(;;)
174 { switch( c = *p++ )
175 { case '\\':
176 if ( *p == EOS )
177 { warning("Unmatched '['");
178 return (char *)NULL;
179 }
180 setMap(map, *p);
181 p++;
182 continue;
183 case ']':
184 break;
185 default:
186 if ( p[-1] != ']' && p[0] == '-' && p[1] != ']' )
187 { int chr;
188
189 for ( chr=p[-1]; chr <= p[1]; chr++ )
190 setMap(map, chr);
191 p += 2;
192 } else
193 setMap(map, c);
194 continue;
195 }
196 break;
197 }
198
199 continue;
200 }
201 case '{':
202 { int ai, aj = -1;
203
204 for(;;)
205 { Output(ALT); ai = Out->size; Output(0);
206 if ( (p = compile_pattern(Out, p, CURL)) == (char *) NULL )
207 return (char *) NULL;
208 if ( aj > 0 )
209 Out->code[aj] = Out->size - aj;
210 if ( *p == ',' )
211 { Output(JMP); aj = Out->size; Output(0);
212 Out->code[ai] = Out->size - ai;
213 Output(ALT); ai = Out->size; Output(0);
214 p++;
215 } else if ( *p == '}' )
216 { p++;
217 break;
218 } else
219 { warning("Unmatched '{'");
220 return (char *) NULL;
221 }
222 }
223
224 continue;
225 }
226 case ANY:
227 case STAR:
228 case ALT:
229 case JMP:
230 case ANYOF:
231 case EXIT:
232 PL_error(NULL, 0, "Reserved character",
233 ERR_REPRESENTATION, ATOM_pattern);
234 return NULL;
235 case '}':
236 case ',':
237 if ( curl == CURL )
238 { p--;
239 return p;
240 }
241 /*FALLTHROUGH*/
242 default:
243 { GET_LD
244
245 if ( !truePrologFlag(PLFLAG_FILE_CASE) )
246 c = makeLower(c);
247 Output(c);
248 continue;
249 }
250 }
251
252 Output(EXIT);
253 return p;
254 }
255 }
256
257
258 static inline bool
matchPattern(char * s,compiled_pattern * cbuf)259 matchPattern(char *s, compiled_pattern *cbuf)
260 { return match_pattern(cbuf->code, s);
261 }
262
263
264 static bool
match_pattern(matchcode * p,char * str)265 match_pattern(matchcode *p, char *str)
266 { matchcode c;
267 matchcode *s = (matchcode *) str;
268
269 for(;;)
270 { switch( c = *p++ )
271 { case EXIT:
272 return (*s == EOS ? TRUE : FALSE);
273 case ANY: /* ? */
274 if ( *s == EOS )
275 fail;
276 s++;
277 continue;
278 case ANYOF: /* [...] */
279 { GET_LD
280 matchcode c2 = *s;
281
282 if ( !truePrologFlag(PLFLAG_FILE_CASE) )
283 c2 = makeLower(c2);
284
285 if ( p[c2 / 8] & (1 << (c2 % 8)) )
286 { p += 16;
287 s++;
288 continue;
289 }
290 fail;
291 }
292 case STAR: /* * */
293 do
294 { if ( match_pattern(p, (char *)s) )
295 succeed;
296 } while( *s++ );
297 fail;
298 case JMP: /* { ... } */
299 p += *p;
300 continue;
301 case ALT:
302 if ( match_pattern(p+1, (char *)s) )
303 succeed;
304 p += *p;
305 continue;
306 default: /* character */
307 { GET_LD
308
309 if ( c == *s ||
310 (!truePrologFlag(PLFLAG_FILE_CASE) && c == makeLower(*s)) )
311 { s++;
312 continue;
313 }
314 fail;
315 }
316 }
317 }
318 }
319
320
321 /** wildcard_match(+Pattern, +Name) is semidet.
322 */
323
324 static
325 PRED_IMPL("wildcard_match", 2, wildcard_match, 0)
326 { char *p, *s;
327 compiled_pattern buf;
328
329 if ( !PL_get_chars_ex(A1, &p, CVT_ALL) ||
330 !PL_get_chars_ex(A2, &s, CVT_ALL) )
331 fail;
332
333 if ( compilePattern(p, &buf) )
334 { return matchPattern(s, &buf);
335 }
336
337 return PL_error(NULL, 0, NULL, ERR_DOMAIN, ATOM_pattern, A1);
338 }
339
340
341 /*******************************
342 * EXPAND_FILE_NAME/2 *
343 *******************************/
344
345 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
346 Wildcart expansion of a pattern to a list of files. This code uses two
347 `buffers' for storing the intermediate results while limiting
348 fragmentation. The `strings' buffer contains all strings generated. The
349 files contains indices in the `strings' buffer pointing to the start of
350 strings. The indices in the range [start,end) are valid.
351
352 First this set is filled with the empty string. Next the
353 directory-segment with the first wildcart is located. If found, we go
354 through the current set, adding the segments without wildcarts, applying
355 the wildcart on the directory and adding everything found to the set.
356 The old set is deleted by incrementing info.start.
357
358 If we are at the end, we add the remaining non-wildcart segments to each
359 element of the set, deleting it if the result does not exits.
360
361 Finally we sort the result.
362 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
363
364 typedef struct
365 { tmp_buffer files; /* our files */
366 tmp_buffer strings; /* our strings */
367 int start; /* 1-st valid entry of files */
368 int end; /* last valid entry of files */
369 } glob_info, *GlobInfo;
370
371 #undef isspecial /* play safe */
372 #define isspecial(c) \
373 ((c) == '[' || (c) == '{' || (c) == '?' || (c) == '*')
374
375 static void
free_expand_info(GlobInfo info)376 free_expand_info(GlobInfo info)
377 { discardBuffer(&info->files);
378 discardBuffer(&info->strings);
379 }
380
381
382 static void
add_path(const char * path,GlobInfo info)383 add_path(const char *path, GlobInfo info)
384 { int idx = (int)entriesBuffer(&info->strings, char);
385 size_t n = strlen(path)+1;
386
387 addMultipleBuffer(&info->strings, path, n, char);
388 addBuffer(&info->files, idx, int);
389 info->end++;
390 }
391
392
393 const char *
expand_str(GlobInfo info,int at)394 expand_str(GlobInfo info, int at)
395 { char *s = &fetchBuffer(&info->strings, at, char);
396
397 return (const char *)s;
398 }
399
400
401 const char *
expand_entry(GlobInfo info,int idx)402 expand_entry(GlobInfo info, int idx)
403 { int at = fetchBuffer(&info->files, idx, int);
404
405 return expand_str(info, at);
406 }
407
408
409 static void
un_escape(char * to,const char * from,const char * end)410 un_escape(char *to, const char *from, const char *end)
411 { while( from < end )
412 { if ( *from == '\\' && (isspecial(from[1]) || from[1] == '\\') )
413 from++;
414 *to++ = *from++;
415 }
416 *to = EOS;
417 }
418
419
420 static int
expand(const char * pattern,GlobInfo info)421 expand(const char *pattern, GlobInfo info)
422 { const char *pat = pattern;
423 compiled_pattern cbuf;
424 char prefix[MAXPATHLEN]; /* before first pattern */
425 char patbuf[MAXPATHLEN]; /* pattern buffer */
426 int end, dot;
427
428 initBuffer(&info->files);
429 initBuffer(&info->strings);
430 info->start = 0;
431 info->end = 0;
432
433 add_path("", info);
434
435 for(;;)
436 { const char *s = pat, *head = pat, *tail;
437
438 for(;;)
439 { int c;
440
441 switch( (c=*s++) )
442 { case EOS:
443 if ( s > pat ) /* something left and expanded */
444 { un_escape(prefix, pat, s);
445
446 end = info->end;
447 for( ; info->start < end; info->start++ )
448 { char path[MAXPATHLEN];
449 size_t plen;
450
451 strcpy(path, expand_entry(info, info->start));
452 plen = strlen(path);
453 if ( prefix[0] && plen > 0 && path[plen-1] != '/' )
454 path[plen++] = '/';
455 strcpy(&path[plen], prefix);
456 if ( end == 1 || AccessFile(path, ACCESS_EXIST) )
457 add_path(path, info);
458 }
459 }
460 succeed;
461 case '[': /* meta characters: expand */
462 case '{':
463 case '?':
464 case '*':
465 break;
466 case '\\':
467 if ( isspecial(*s) )
468 { s++;
469 continue;
470 }
471 /*FALLTHROUGH*/
472 default:
473 if ( IS_DIR_SEPARATOR(c) )
474 head = s;
475 continue;
476 }
477 break;
478 }
479
480 for( tail=s; *tail && !IS_DIR_SEPARATOR(*tail); tail++ )
481 ;
482
483 /* By now, head points to the start of the path holding meta characters,
484 while tail points to the tail:
485
486 ..../meta*path/....
487 ^ ^
488 head tail
489 */
490 un_escape(prefix, pat, head);
491 un_escape(patbuf, head, tail);
492
493 if ( !compilePattern(patbuf, &cbuf) ) /* syntax error */
494 fail;
495 dot = (patbuf[0] == '.'); /* do dots as well */
496
497 end = info->end;
498
499 for(; info->start < end; info->start++)
500 { DIR *d;
501 struct dirent *e;
502 char path[MAXPATHLEN];
503 char tmp[MAXPATHLEN];
504 const char *current = expand_entry(info, info->start);
505
506 strcpy(path, current);
507 strcat(path, prefix);
508
509 if ( (d=opendir(path[0] ? OsPath(path, tmp) : ".")) )
510 { size_t plen = strlen(path);
511
512 if ( plen > 0 && path[plen-1] != '/' )
513 path[plen++] = '/';
514
515 for(e=readdir(d); e; e = readdir(d))
516 {
517 #ifdef __MSDOS__
518 strlwr(e->d_name);
519 #endif
520 if ( (dot || e->d_name[0] != '.') &&
521 matchPattern(e->d_name, &cbuf) )
522 { char newp[MAXPATHLEN];
523
524 strcpy(newp, path);
525 strcpy(&newp[plen], e->d_name);
526 /* if ( !tail[0] || ExistsDirectory(newp) )
527 Saves memory, but involves one more file-access
528 */
529 add_path(newp, info);
530 }
531 }
532 closedir(d);
533 }
534 }
535
536 pat = tail;
537 if ( IS_DIR_SEPARATOR(*pat) )
538 pat++;
539 }
540 }
541
542
543 static int
compareBagEntries(const void * a1,const void * a2)544 compareBagEntries(const void *a1, const void *a2)
545 { GET_LD
546 GlobInfo info = LD->glob_info;
547 int i1 = *(int *)a1;
548 int i2 = *(int *)a2;
549 const char *s1, *s2;
550
551 s1 = expand_str(info, i1);
552 s2 = expand_str(info, i2);
553
554 if ( truePrologFlag(PLFLAG_FILE_CASE) )
555 return mbscoll(s1, s2);
556 else
557 return mbscasecoll(s1, s2);
558 }
559
560
561 static void
sort_expand(GlobInfo info)562 sort_expand(GlobInfo info)
563 { GET_LD
564 int *ip = &fetchBuffer(&info->files, info->start, int);
565 int is = info->end - info->start;
566
567 LD->glob_info = info;
568 qsort(ip, is, sizeof(int), compareBagEntries);
569 }
570
571
572 static
573 PRED_IMPL("expand_file_name", 2, expand_file_name, 0)
574 { PRED_LD
575 char spec[MAXPATHLEN];
576 char *s;
577 glob_info info;
578 term_t l = PL_copy_term_ref(A2);
579 term_t head = PL_new_term_ref();
580 int i;
581
582 if ( !PL_get_chars_ex(A1, &s, CVT_ALL|REP_FN) )
583 fail;
584 if ( strlen(s) > sizeof(spec)-1 )
585 return PL_error(NULL, 0, "File name too intptr_t",
586 ERR_DOMAIN, ATOM_pattern, A1);
587
588 if ( !expandVars(s, spec, sizeof(spec)) )
589 fail;
590 if ( !expand(spec, &info) )
591 goto failout;
592 sort_expand(&info);
593
594 for( i = info.start; i< info.end; i++ )
595 { const char *e = expand_entry(&info, i);
596
597 if ( !PL_unify_list(l, head, l) ||
598 !PL_unify_chars(head, PL_ATOM|REP_FN, -1, e) )
599 goto failout;
600 }
601
602 if ( !PL_unify_nil(l) )
603 { failout:
604 free_expand_info(&info);
605 fail;
606 }
607
608 free_expand_info(&info);
609 succeed;
610 }
611
612
613 /** directory_files(+Dir, -Files) is det.
614
615 Files is a list of atoms that describe the entries in Dir.
616 */
617
618 static
619 PRED_IMPL("directory_files", 2, directory_files, 0)
620 { PRED_LD
621 char *dname;
622 DIR *dir;
623
624 if ( !PL_get_file_name(A1, &dname, PL_FILE_READ|PL_FILE_OSPATH) )
625 return FALSE;
626
627 if ( (dir=opendir(dname)) )
628 { struct dirent *e;
629 term_t tail = PL_copy_term_ref(A2);
630 term_t head = PL_new_term_ref();
631
632 for(e=readdir(dir); e; e = readdir(dir))
633 { PL_put_variable(head);
634 if ( PL_handle_signals() < 0 ||
635 !PL_unify_list(tail, head, tail) ||
636 !PL_unify_chars(head, PL_ATOM|REP_FN, (size_t)-1, e->d_name) )
637 { closedir(dir);
638 return FALSE;
639 }
640 }
641 closedir(dir);
642
643 return PL_unify_nil(tail);
644 }
645
646 return PL_error(NULL, 0, OsError(), ERR_FILE_OPERATION,
647 ATOM_open, ATOM_directory, A1);
648 }
649
650
651 /*******************************
652 * PUBLISH PREDICATES *
653 *******************************/
654
655 BeginPredDefs(glob)
656 PRED_DEF("swi_expand_file_name", 2, expand_file_name, 0)
657 PRED_DEF("swi_wildcard_match", 2, wildcard_match, 0)
658 PRED_DEF("swi_directory_files", 2, directory_files, 0)
659 EndPredDefs
660
661 void
initGlob(void)662 initGlob(void)
663 {
664 PL_register_extensions(PL_predicates_from_glob);
665 }
666
667