1 /* file.c -*- mode:c; coding:utf-8; -*-
2 *
3 * Copyright (c) 2010-2021 Takashi Kato <ktakashi@ymail.com>
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $Id: $
29 */
30 #include <string.h>
31 #include <ctype.h>
32 #define LIBSAGITTARIUS_BODY
33 #include "sagittarius/private/file.h"
34 #include "sagittarius/private/error.h"
35 #include "sagittarius/private/pair.h"
36 #include "sagittarius/private/port.h"
37 #include "sagittarius/private/string.h"
38 #include "sagittarius/private/library.h"
39 #include "sagittarius/private/regex.h"
40 #include "sagittarius/private/system.h"
41 #include "sagittarius/private/symbol.h"
42 #include "sagittarius/private/string.h"
43 #include "sagittarius/private/unicode.h"
44 #include "sagittarius/private/writer.h"
45
file_print(SgObject obj,SgPort * port,SgWriteContext * ctx)46 static void file_print(SgObject obj, SgPort *port, SgWriteContext *ctx)
47 {
48 Sg_Printf(port, UC("#<file %s>"), SG_FILE(obj)->name);
49 }
50
51 SG_DEFINE_BUILTIN_CLASS_SIMPLE(Sg_FileClass, file_print);
52
Sg_OpenFile(SgString * file,int flags)53 SgObject Sg_OpenFile(SgString *file, int flags)
54 {
55 SgObject z = Sg_MakeFile();
56 if (!SG_FILE_VTABLE(z)->open(z, file, flags)){
57 SgObject err = Sg_FileErrorMessage(z);
58 return err;
59 }
60 return z;
61 }
62
Sg_CloseFile(SgObject file)63 int Sg_CloseFile(SgObject file)
64 {
65 return SG_FILE_VTABLE(file)->close(file);
66 }
67
Sg_FileSeek(SgObject file,int64_t off,SgWhence whence)68 int64_t Sg_FileSeek(SgObject file, int64_t off, SgWhence whence)
69 {
70 return SG_FILE_VTABLE(file)->seek(file, off, whence);
71 }
72
Sg_MakeCustomFile(void * data,SgFileTable * vtbl)73 SgObject Sg_MakeCustomFile(void *data, SgFileTable *vtbl)
74 {
75 SgFile *z = SG_NEW(SgFile);
76 SG_SET_CLASS(z, SG_CLASS_FILE);
77 z->osdependance = data;
78 SG_FILE_VTABLE(z) = vtbl;
79 return SG_OBJ(z);
80 }
81
Sg_FindFile(SgString * path,SgObject loadPaths,SgString * suffix,int quiet)82 SgObject Sg_FindFile(SgString *path, SgObject loadPaths,
83 SgString *suffix, int quiet)
84 {
85 SgObject dir;
86 SgObject realPath;
87 const SgObject sep = Sg_String(Sg_NativeFileSeparator());
88 SG_FOR_EACH(dir, loadPaths) {
89 if (suffix) {
90 realPath = Sg_StringAppend(SG_LIST4(SG_CAR(dir),
91 sep,
92 path,
93 suffix));
94 } else {
95 realPath = Sg_StringAppend(SG_LIST3(SG_CAR(dir),
96 sep,
97 path));
98 }
99 if (Sg_FileExistP(SG_STRING(realPath))) {
100 return realPath;
101 }
102 }
103 if (!quiet) {
104 Sg_IOError(SG_IO_FILE_NOT_EXIST_ERROR,
105 SG_INTERN("find-file"),
106 SG_MAKE_STRING("given file was not found"),
107 path, SG_FALSE);
108 }
109 return SG_FALSE;
110 }
111
112 /* TODO I would prefer not to have this here but for now. */
113 #if defined(_WIN32)
114 /*
115 Windows drive letter related stuff
116 */
117 #define dirsep_p(x) ((x) == '/' || (x) == '\\')
118 #define S SG_STRING_VALUE_AT
119
next_dirsep(SgObject path,long skipped)120 static long next_dirsep(SgObject path, long skipped)
121 {
122 while (skipped < SG_STRING_SIZE(path) && !dirsep_p(S(path, skipped))) {
123 skipped++;
124 }
125 return skipped;
126 }
127 /* detect 'c:' or so */
has_drive_letter(SgObject buf)128 static inline int has_drive_letter(SgObject buf)
129 {
130 int c0 = S(buf,0), c1 = S(buf,1);
131 if (c0 > 0x80) return FALSE; /* out of ascii range */
132 return isalpha(c0) && c1 == ':';
133 }
134
135 /*
136 TODO Should we skip?
137 */
detect_prefix(SgObject path)138 static int detect_prefix(SgObject path)
139 {
140 /* network address or so e.g. \\foo\bar */
141 if (dirsep_p(S(path,0)) && dirsep_p(S(path,1))) {
142 long skipped = 2;
143 while (dirsep_p(S(path, skipped))) {
144 skipped++;
145 }
146 if ((skipped = next_dirsep(path, skipped)) < SG_STRING_SIZE(path) &&
147 skipped+1 < SG_STRING_SIZE(path) && !dirsep_p(S(path, skipped+1))) {
148 skipped = next_dirsep(path, skipped+1);
149 }
150 return skipped;
151 }
152 if (has_drive_letter(path)) {
153 return 2;
154 }
155 return 0;
156 }
157 #undef S
158 #endif
159
160
brace_expand(SgString * str,int flags)161 static SgObject brace_expand(SgString *str, int flags)
162 {
163 const int escape = !(flags & SG_NOESCAPE);
164 int lbrace = 0, rbrace = 0, nest = 0, i;
165 int haslb = FALSE, hasrb = FALSE;
166
167 /* find { and }*/
168 for (i = 0; i < SG_STRING_SIZE(str); i++) {
169 if (SG_STRING_VALUE_AT(str, i) == '{' && nest++ == 0) {
170 lbrace = i;
171 haslb = TRUE;
172 }
173 if (SG_STRING_VALUE_AT(str, i) == '}' && --nest == 0) {
174 rbrace = i;
175 hasrb = TRUE;
176 break;
177 }
178 if (SG_STRING_VALUE_AT(str, i) == '\\' && escape) {
179 if (++i == SG_STRING_SIZE(str)) break;
180 }
181 }
182 /* make "foo/{a,b}" to ("foo/a" "foo/b") */
183 if (haslb && hasrb) {
184 SgObject h = SG_NIL, t = SG_NIL;
185 SgPort *out;
186 SgStringPort tp;
187 int i;
188 /* copy value until the first '{' */
189 out = Sg_InitStringOutputPort(&tp, 255);
190 for (i = 0; i < lbrace; i++) {
191 Sg_PutcUnsafe(out, SG_STRING_VALUE_AT(str, i));
192 }
193 /* skip '{' */
194 i++;
195 while (i < rbrace) {
196 /* now we need to copy one by one */
197 int nest = 0, j;
198 SgObject tmp;
199 for (;SG_STRING_VALUE_AT(str, i) != ',' || nest != 0; i++) {
200 if (i >= rbrace) break;
201
202 if (SG_STRING_VALUE_AT(str, i) == '{') nest++;
203 if (SG_STRING_VALUE_AT(str, i) == '}') nest--;
204 if (SG_STRING_VALUE_AT(str, i) == '\\' && escape) {
205 if (++i == rbrace) break;
206 }
207 Sg_PutcUnsafe(out, SG_STRING_VALUE_AT(str, i));
208 }
209 /* skip ',' */
210 i++;
211 /* copy after the '}' */
212 for (j = rbrace+1; j < SG_STRING_SIZE(str); j++) {
213 Sg_PutcUnsafe(out, SG_STRING_VALUE_AT(str, j));
214 }
215 tmp = Sg_GetStringFromStringPort(&tp);
216 SG_APPEND(h, t, brace_expand(tmp, flags));
217 /* back to the starting position */
218 Sg_SetPortPosition(out, lbrace, SG_BEGIN);
219 }
220 SG_CLEAN_STRING_PORT(&tp);
221 return h;
222 } else {
223 return SG_LIST1(str);
224 }
225 }
226
227 static SgObject DOT_PATH = SG_FALSE;
228 static SgObject DOTDOT_PATH = SG_FALSE;
229 static SgObject FULL_CHARSET = SG_FALSE;
230
231 #define STAR SG_MAKE_INT(10) /* SG_INTERN("*") */
232 #define STAR_SLASH SG_MAKE_INT(11)
233
234 /*
235 converts given path template to pattern
236 e.g.)
237 - "foo/bar/\*" -> (("foo") ("bar") (ANY))
238 - "foo/bar/buz*" -> (("foo") ("bar") ("buz" ANY))
239 - "foo/bar/[b][u]z*" -> (("foo") ("bar") ([b] [u] "z" ANY))
240 each element of the list represents a matching rule of path element.
241 */
find_close_bracket(SgString * path,int start,int flags)242 static int find_close_bracket(SgString *path, int start, int flags)
243 {
244 const int escape = !(flags & SG_NOESCAPE);
245 int i;
246 for (i = start; i < SG_STRING_SIZE(path); i++) {
247 switch (SG_STRING_VALUE_AT(path, i)) {
248 case ']': return i;
249 case '\\':
250 if (escape) i++;
251 break;
252 }
253 }
254 return start;
255 }
256
remove_backslashes(SgObject path)257 static SgObject remove_backslashes(SgObject path)
258 {
259 int i, j, count = 0;
260 SgObject r;
261 for (i = 0; i < SG_STRING_SIZE(path); i++) {
262 if (SG_STRING_VALUE_AT(path, i) != '\\') count++;
263 }
264 /* no backslash */
265 if (SG_STRING_SIZE(path) == count) return path;
266
267 r = Sg_ReserveString(count, '\0');
268 for (i = 0, j = 0; i < SG_STRING_SIZE(path); i++) {
269 if (SG_STRING_VALUE_AT(path, i) != '\\') {
270 SG_STRING_VALUE_AT(r, j++) = SG_STRING_VALUE_AT(path, i);
271 }
272 }
273 return r;
274 }
275
convert_star(SgObject p)276 static SgObject convert_star(SgObject p)
277 {
278 /* for may laziness, we use regular expression for '*' */
279 SgObject h = SG_NIL, t = SG_NIL;
280 int has_star = FALSE;
281 SG_FOR_EACH(p, p) {
282 if (SG_EQ(SG_CAR(p), STAR)) {
283 has_star = TRUE;
284 break;
285 }
286 SG_APPEND1(h, t, SG_CAR(p));
287 }
288 if (has_star) {
289 /* TODO should we use AST directly to save some memory? */
290 SgPort *out;
291 SgStringPort tp;
292
293 /* copy value until the first '{' */
294 out = Sg_InitStringOutputPort(&tp, 255);
295 Sg_PutzUnsafe(out, ".*");
296 SG_FOR_EACH(p, SG_CDR(p)) {
297 if (SG_STRINGP(SG_CAR(p))) {
298 Sg_PutsUnsafe(out, SG_STRING(SG_CAR(p)));
299 } else if (SG_CHAR_SET_P(SG_CAR(p))) {
300 Sg_PutsUnsafe(out, Sg_CharSetToRegexString(SG_CAR(p), FALSE));
301 } else if (SG_EQ(SG_CAR(p), STAR)) {
302 Sg_PutzUnsafe(out, ".*");
303 } else {
304 Sg_Error(UC("[Internal] Unknown pattern '%S'"), SG_CAR(p));
305 }
306 }
307 Sg_PutcUnsafe(out, '$');
308 SG_APPEND1(h, t, Sg_CompileRegex(Sg_GetStringFromStringPort(&tp), 0,
309 FALSE));
310 SG_CLEAN_STRING_PORT(&tp);
311 return h;
312 }
313 /* FIXME: we don't want to allocate memory in this case */
314 return h;
315 }
316
317 #define ANY SG_MAKE_INT(1)
318 #define DIR SG_MAKE_INT(2)
319
glob_make_pattern(SgString * path,int flags)320 static SgObject glob_make_pattern(SgString *path, int flags)
321 {
322 const int escape = !(flags & SG_NOESCAPE);
323 SgObject h = SG_NIL, t = SG_NIL, h1 = SG_NIL, t1 = SG_NIL;
324 int i, start;
325 #define emit() \
326 do { \
327 if (start != i) { \
328 SgObject tmp = Sg_Substring(path, start, i); \
329 if (escape) tmp = remove_backslashes(tmp); \
330 SG_APPEND1(h1, t1, tmp); \
331 } \
332 start = i+1; \
333 } while (0)
334
335 for (i = 0, start = 0; i < SG_STRING_SIZE(path);) {
336 SgChar c = SG_STRING_VALUE_AT(path, i);
337
338 switch (c) {
339 case '[': {
340 int s = i, e;
341 e = find_close_bracket(path, start, flags);
342 if (s != e) {
343 emit();
344 SG_APPEND1(h1, t1, Sg_ParseCharSetString(path, FALSE, s, i=++e));
345 start = i;
346 }
347 i++;
348 } break;
349 case '/':
350 /* next */
351 emit();
352 /* if the path starts with '/', then this can be null */
353 if (!SG_NULLP(h1)) {
354 SG_APPEND1(h, t, convert_star(h1));
355 }
356 h1 = t1 = SG_NIL; /* reset it */
357 /* this need to be updated */
358 start = ++i;
359 break;
360 case '*': {
361 int has = (start != i);
362 emit();
363 /* merge it if it's there */
364 if (!has && SG_STRING_SIZE(path) - i >= 3 &&
365 SG_STRING_VALUE_AT(path, i+1) == '*' &&
366 SG_STRING_VALUE_AT(path, i+2) == '/') {
367 do {
368 i += 3;
369 /* skip '/' */
370 while (SG_STRING_VALUE_AT(path, i) == '/') i++;
371 } while (SG_STRING_VALUE_AT(path, i) == '*' &&
372 SG_STRING_VALUE_AT(path, i+1) == '*' &&
373 SG_STRING_VALUE_AT(path, i+2) == '/');
374 SG_APPEND1(h1, t1, STAR_SLASH);
375 SG_APPEND1(h, t, h1);
376 h1 = t1 = SG_NIL; /* reset it */
377 start = i;
378 } else {
379 SG_APPEND1(h1, t1, STAR);
380 while (SG_STRING_VALUE_AT(path, i) == '*') i++;
381 }
382 break;
383 }
384 case '?':
385 emit();
386 SG_APPEND1(h1, t1, FULL_CHARSET);
387 i++;
388 break;
389 default:
390 i++;
391 break;
392 }
393 }
394
395 emit();
396 if (!SG_NULLP(h1)) {
397 SG_APPEND1(h, t, convert_star(h1));
398 SG_APPEND1(h, t, SG_LIST1(ANY));
399 } else {
400 SG_APPEND1(h, t, SG_LIST1(DIR));
401 }
402 #undef emit
403 return h;
404 }
405
406 enum answer
407 {
408 YES,
409 NO,
410 UNKNOWN
411 };
412
join_path(SgObject base,int sep,SgObject name)413 static SgObject join_path(SgObject base, int sep, SgObject name)
414 {
415 if (sep) return Sg_BuildPath(base, name);
416 else return Sg_StringAppend2(base, name);
417 }
418
glob_match1(SgObject pat,SgObject path_element,int flags)419 static int glob_match1(SgObject pat, SgObject path_element, int flags)
420 {
421 const int period = !(flags & SG_DOTMATCH);
422 /* Flags are taken from Ruby but I don't know what FNM_PATHNAME does on glob.
423 so ignore.*/
424 /* const int pathname = flags & SG_PATHNAME; */
425 int pos = 0;
426 SgObject cp;
427
428 if (period) {
429 if (SG_STRING_VALUE_AT(path_element, 0) == '.' &&
430 /* leading period */
431 !(SG_STRINGP(SG_CAR(pat)) &&
432 SG_STRING_VALUE_AT(SG_STRING(SG_CAR(pat)), 0) == '.')) {
433 return FALSE;
434 }
435 }
436 SG_FOR_EACH(cp, pat) {
437 /* the matching is pretty much simple, a rule may contain the followings:
438 - string
439 - charset
440 - pattern (regular expression)
441 these are resolved by prefix match, one char match or regex match,
442 respectively. */
443 SgObject p = SG_CAR(cp);
444
445 if (pos >= SG_STRING_SIZE(path_element)) return FALSE;
446 if (SG_STRINGP(p)) {
447 int i;
448 for (i = 0; i < SG_STRING_SIZE(p); i++) {
449 if (!SG_EQ(SG_STRING_VALUE_AT(p, i),
450 SG_STRING_VALUE_AT(path_element, pos++))) {
451 return FALSE;
452 }
453 }
454 } else if (SG_CHAR_SET_P(p)) {
455 if (!Sg_CharSetContains(p, SG_STRING_VALUE_AT(path_element, pos++))) {
456 return FALSE;
457 }
458 } else if (SG_PATTERNP(p)) {
459 SgMatcher *m = Sg_RegexTextMatcher(SG_PATTERN(p), path_element, pos,
460 SG_STRING_SIZE(path_element));
461 return Sg_RegexTextMatches(SG_TEXT_MATCHER(m));
462 } else {
463 Sg_Error(UC("[Internal] Unknown glob rule '%S' in '%S'"), p, pat);
464 return FALSE; /* dummy */
465 }
466
467 }
468 if (pos != SG_STRING_SIZE(path_element)) return FALSE;
469 return TRUE;
470 }
471
glob_match(SgString * path,int dirsep,enum answer exist,enum answer isdir,SgObject pattern,int flags)472 static SgObject glob_match(SgString *path,
473 int dirsep,
474 enum answer exist,
475 enum answer isdir,
476 SgObject pattern,
477 int flags)
478 {
479 SgObject pat, h = SG_NIL, t = SG_NIL;
480 int match_dir = FALSE, match_any = FALSE, recursive = FALSE, plain = FALSE;
481 /* no pattern */
482 if (SG_NULLP(pattern)) return h;
483
484 /* the current rule */
485 SG_FOR_EACH(pat, pattern) {
486 if (SG_EQ(SG_CAAR(pat), STAR_SLASH)) {
487 recursive = TRUE;
488 continue;
489 }
490 if (SG_EQ(SG_CAAR(pat), DIR)) {
491 match_dir = TRUE;
492 plain = TRUE;
493 } else if (SG_EQ(SG_CAAR(pat), ANY)) {
494 match_any = TRUE;
495 plain = TRUE;
496 } else if (SG_STRINGP(SG_CAAR(pat)) && SG_NULLP(SG_CDAR(pat))) {
497 plain = TRUE;
498 }
499 break;
500 }
501
502 /* check existance on the last rule*/
503 if (match_any && exist == UNKNOWN) {
504 if (Sg_FileExistP(path)) {
505 exist = YES;
506 isdir = Sg_DirectoryP(path)
507 ? YES : Sg_FileSymbolicLinkP(path)
508 ? UNKNOWN : NO;
509 } else {
510 exist = NO;
511 isdir = NO;
512 }
513 }
514 if (match_dir && isdir == UNKNOWN) {
515 if (Sg_FileExistP(path)) {
516 exist = YES;
517 isdir = Sg_DirectoryP(path) ? YES : NO;
518 } else {
519 exist = NO;
520 isdir = NO;
521 }
522 }
523
524 if ((match_any && exist == YES) ||
525 (match_dir && isdir == YES)) {
526 SG_APPEND1(h, t, path);
527 }
528
529 if (match_any || match_dir) return h;
530
531 if (!plain || recursive) {
532 SgObject paths = Sg_ReadDirectory(path);
533 SG_FOR_EACH(paths, paths) {
534 SgObject p = SG_CAR(paths), next = SG_CDR(pat);
535 SgObject buf;
536 enum answer new_isdir = UNKNOWN;
537
538 if (!SG_EQ(path, DOT_PATH)) buf = join_path(path, dirsep, p);
539 else buf = p;
540
541 if (recursive &&
542 !(Sg_StringEqual(p, DOT_PATH) || Sg_StringEqual(p, DOTDOT_PATH))) {
543 new_isdir = Sg_DirectoryP(buf)
544 ? YES : Sg_FileSymbolicLinkP(buf)
545 ? UNKNOWN : NO;
546 }
547 if (glob_match1(SG_CAR(pat), p, flags)) {
548 SG_APPEND(h, t, glob_match(buf, TRUE, YES, new_isdir, next, flags));
549 }
550 if (recursive && new_isdir == YES) {
551 /* ok, we need to put recursive mark here as well */
552 next = Sg_Cons(SG_LIST1(STAR_SLASH), pat);
553 SG_APPEND(h, t, glob_match(buf, TRUE, YES, new_isdir, next, flags));
554 }
555 }
556 } else if (plain) {
557 SgObject name = SG_CAAR(pat);
558
559 /* if this is the first one, then ignore */
560 if (!SG_EQ(path, DOT_PATH)) name = join_path(path, dirsep, name);
561 /* we do match here to avoid dot files */
562 SG_APPEND(h, t, glob_match(name, TRUE, UNKNOWN, UNKNOWN,
563 SG_CDR(pattern), flags));
564 }
565 return h;
566 }
567
Sg_Glob(SgString * path,int flags)568 SgObject Sg_Glob(SgString *path, int flags)
569 {
570 SgObject paths, h = SG_NIL, t = SG_NIL;
571 SgString *buf;
572
573 paths = brace_expand(path, flags);
574 SG_FOR_EACH(paths, paths) {
575 SgObject r, list;
576 int drive_off = 0;
577 path = SG_STRING(SG_CAR(paths));
578 #if defined(_WIN32)
579 /* should we? */
580 drive_off = detect_prefix(path);
581 #endif
582 /* if the path is start with '/' then we need to keep it.
583 otherwise we can assume it's current directory.
584 NB: all other informations are in the `list` (compiled rule)
585 */
586 if (SG_STRING_VALUE_AT(path, drive_off) == '/') {
587 buf = Sg_Substring(path, 0, drive_off+1);
588 #if defined(_WIN32)
589 /* a bit awkward to do it but need it */
590 if (drive_off) {
591 int i;
592 for (i = 0; i < drive_off+1; i++) {
593 if (SG_STRING_VALUE_AT(buf, i) == '/') {
594 SG_STRING_VALUE_AT(buf, i) = '\\';
595 }
596 }
597 }
598 #endif
599 } else {
600 buf = DOT_PATH;
601 }
602 /* strip drive or prefix */
603 if (drive_off) {
604 path = Sg_Substring(path, drive_off, SG_STRING_SIZE(path));
605 }
606 list = glob_make_pattern(path, flags);
607
608 r = glob_match(buf, FALSE, UNKNOWN, UNKNOWN, list, flags);
609 SG_APPEND(h, t, r);
610 }
611 return h;
612 }
613
Sg__InitFile()614 void Sg__InitFile()
615 {
616 DOT_PATH = SG_MAKE_STRING(".");
617 DOTDOT_PATH = SG_MAKE_STRING("..");
618 FULL_CHARSET = Sg_CharSetComplement(Sg_MakeEmptyCharSet());
619 }
620