1 #if !defined(lint) && !defined(DOS)
2 static char rcsid[] = "$Id: filesys.c 770 2007-10-24 00:23:09Z hubert@u.washington.edu $";
3 #endif
4
5 /*
6 * ========================================================================
7 * Copyright 2013-2021 Eduardo Chappa
8 * Copyright 2006-2007 University of Washington
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * ========================================================================
17 */
18
19 /* includable WITHOUT dependency on c-client */
20 #include "../../c-client/mail.h"
21 #include "../../c-client/utf8.h"
22
23 #ifdef _WINDOWS
24 /* wingdi.h uses ERROR (!) and we aren't using the c-client ERROR so... */
25 #undef ERROR
26 #endif
27
28 #include <system.h>
29 #include <general.h>
30
31 #include "../../c-client/fs.h"
32
33 /* includable WITHOUT dependency on pico */
34 #include "../../pico/keydefs.h"
35 #ifdef _WINDOWS
36 #include "../../pico/osdep/mswin.h"
37 #endif
38
39 #include "filesys.h"
40 #include "utf8.h"
41
42
43 #define bad_char ((UCS) '?')
44
45
46 /*
47 * Make it easier to use the convert_to_locale function for filenames
48 * and directory names. Note, only one at a time because there's only
49 * one buffer.
50 * This isn't being freed as it stands now.
51 */
52 char *
fname_to_locale(char * fname)53 fname_to_locale(char *fname)
54 {
55 static char *fname_locale_buf = NULL;
56 static size_t fname_locale_len = 0;
57 char *converted_fname, *p;
58
59 if(fname == NULL){ /* special call to free memory */
60 if(fname_locale_buf) fs_give((void **) &fname_locale_buf);
61 fname_locale_len = 0;
62 return NULL;
63 }
64
65 p = convert_to_locale(fname);
66 if(p)
67 converted_fname = p;
68 else
69 converted_fname = fname;
70
71 if(converted_fname){
72 if(strlen(converted_fname)+1 > fname_locale_len){
73 if(fname_locale_buf)
74 fs_give((void **) &fname_locale_buf);
75
76 fname_locale_len = strlen(converted_fname)+1;
77 fname_locale_buf = (char *) fs_get(fname_locale_len * sizeof(char));
78 }
79
80 strncpy(fname_locale_buf, converted_fname, fname_locale_len);
81 fname_locale_buf[fname_locale_len-1] = '\0';
82 }
83 else{
84 if(fname_locale_len == 0){
85 fname_locale_len = 1;
86 fname_locale_buf = (char *) fs_get(fname_locale_len * sizeof(char));
87 }
88
89 fname_locale_buf[0] = '\0';
90 }
91
92 if(p)
93 fs_give((void **) &p);
94
95 return(fname_locale_buf);
96 }
97
98
99 /*
100 * Make it easier to use the convert_to_utf8 function for filenames
101 * and directory names. Note, only one at a time because there's only
102 * one buffer.
103 * This isn't being freed as it stands now.
104 */
105 char *
fname_to_utf8(char * fname)106 fname_to_utf8(char *fname)
107 {
108 static char *fname_utf8_buf = NULL;
109 static size_t fname_utf8_len = 0;
110 char *converted_fname, *p;
111
112 if(fname == NULL){ /* special call to free memory */
113 if(fname_utf8_buf) fs_give((void **) &fname_utf8_buf);
114 fname_utf8_len = 0;
115 return NULL;
116 }
117
118 p = convert_to_utf8(fname, NULL, 0);
119 if(p)
120 converted_fname = p;
121 else
122 converted_fname = fname;
123
124 if(converted_fname){
125 if(strlen(converted_fname)+1 > fname_utf8_len){
126 if(fname_utf8_buf)
127 fs_give((void **) &fname_utf8_buf);
128
129 fname_utf8_len = strlen(converted_fname)+1;
130 fname_utf8_buf = (char *) fs_get(fname_utf8_len * sizeof(char));
131 }
132
133 strncpy(fname_utf8_buf, converted_fname, fname_utf8_len);
134 fname_utf8_buf[fname_utf8_len-1] = '\0';
135 }
136 else{
137 if(fname_utf8_len == 0){
138 fname_utf8_len = 1;
139 fname_utf8_buf = (char *) fs_get(fname_utf8_len * sizeof(char));
140 }
141
142 fname_utf8_buf[0] = '\0';
143 }
144
145 if(p)
146 fs_give((void **) &p);
147
148 return(fname_utf8_buf);
149 }
150
151
152 /*
153 * The fp file pointer is open for read on a file which has contents
154 * that are encoded in the user's locale charset. That multibyte stream
155 * of characters is converted to wide characters and returned one at
156 * a time.
157 *
158 * Not sure what to do if an uninterpretable character happens. Returning
159 * the bad character now.
160 */
161 UCS
read_a_wide_char(FILE * fp,void * input_cs)162 read_a_wide_char(FILE *fp,
163 void *input_cs) /* input_cs ignored in Windows */
164 {
165 #ifdef _WINDOWS
166 _TINT val;
167
168 val = _fgettc(fp);
169 if(val == _TEOF)
170 return(CCONV_EOF);
171
172 return((UCS) val);
173 #else /* UNIX */
174 unsigned long octets_so_far, remaining_octets;
175 unsigned char *inputp;
176 unsigned char inputbuf[20];
177 int c;
178 UCS ucs;
179
180 c = fgetc(fp);
181 if(c == EOF)
182 return(CCONV_EOF);
183
184 /*
185 * Read enough bytes to make up a character and convert it to UCS-4.
186 */
187 memset(inputbuf, 0, sizeof(inputbuf));
188 inputbuf[0] = (unsigned char) c;
189 octets_so_far = 1;
190 for(;;){
191 remaining_octets = octets_so_far;
192 inputp = inputbuf;
193 ucs = mbtow(input_cs, &inputp, &remaining_octets);
194 switch(ucs){
195 case CCONV_BADCHAR:
196 return(bad_char);
197
198 case CCONV_NEEDMORE:
199 if(octets_so_far >= sizeof(inputbuf))
200 return(bad_char);
201
202 c = fgetc(fp);
203 if(c == EOF)
204 return(CCONV_EOF);
205
206 inputbuf[octets_so_far++] = (unsigned char) c;
207 break;
208
209 default:
210 /* got a good UCS-4 character */
211 return(ucs);
212 }
213 }
214
215 return(bad_char);
216 #endif /* UNIX */
217 }
218
219
220 int
write_a_wide_char(UCS ucs,FILE * fp)221 write_a_wide_char(UCS ucs, FILE *fp)
222 {
223 #ifdef _WINDOWS
224 int rv = 1;
225 TCHAR w;
226
227 w = (TCHAR) ucs;
228 if(_fputtc(w, fp) == _TEOF)
229 rv = EOF;
230
231 return(rv);
232 #else /* UNIX */
233 int rv = 1;
234 int i, outchars;
235 unsigned char obuf[MAX(MB_LEN_MAX,32)];
236
237 if(ucs < 0x80){
238 obuf[0] = (unsigned char) ucs;
239 outchars = 1;
240 }
241 else{
242 outchars = wtomb((char *) obuf, ucs);
243 if(outchars < 0){
244 outchars = 1;
245 obuf[0] = bad_char; /* ??? */
246 }
247 }
248
249 for(i = 0; i < outchars; i++)
250 if(fputc(obuf[i], fp) == EOF){
251 rv = EOF;
252 break;
253 }
254
255 return(rv);
256 #endif /* UNIX */
257 }
258
259
260 int
our_stat(char * filename,struct stat * sbuf)261 our_stat(char *filename, struct stat *sbuf)
262 {
263 #ifdef _WINDOWS
264 LPTSTR f = NULL;
265 int ret = -1;
266 struct _stat s;
267
268 f = utf8_to_lptstr((LPSTR) filename);
269 if(f){
270 ret = _tstat(f, &s);
271
272 sbuf->st_dev = s.st_dev;
273 sbuf->st_ino = s.st_ino;
274 sbuf->st_mode = s.st_mode;
275 sbuf->st_nlink = s.st_nlink;
276 sbuf->st_uid = s.st_uid;
277 sbuf->st_gid = s.st_gid;
278 sbuf->st_rdev = s.st_rdev;
279 sbuf->st_size = s.st_size;
280 sbuf->st_atime = (time_t) s.st_atime;
281 sbuf->st_mtime = (time_t) s.st_mtime;
282 sbuf->st_ctime = (time_t) s.st_ctime;
283
284 fs_give((void **) &f);
285 }
286
287 return ret;
288 #else /* UNIX */
289 return(stat(fname_to_locale(filename), sbuf));
290 #endif /* UNIX */
291 }
292
293
294 int
our_lstat(char * filename,struct stat * sbuf)295 our_lstat(char *filename, struct stat *sbuf)
296 {
297 #ifdef _WINDOWS
298 assert(0); /* lstat not used in Windows */
299 return(-1);
300 #else /* UNIX */
301 return(lstat(fname_to_locale(filename), sbuf));
302 #endif /* UNIX */
303 }
304
305
306 FILE *
our_fopen(char * path,char * mode)307 our_fopen(char *path, char *mode)
308 {
309 #ifdef _WINDOWS
310 LPTSTR p = NULL, m = NULL;
311 FILE *ret = NULL;
312 char *mode_with_ccs = NULL;
313 char buf[500];
314 size_t len;
315
316 if(mode && (*mode == 'r' || *mode == 'a')){
317 char *force_bom_check = ", ccs=UNICODE";
318
319 if(strchr(mode, 'b'))
320 mode_with_ccs = mode;
321 else{
322 /*
323 * The docs seem to say that we don't need the ccs parameter and
324 * if the file has a BOM at the beginning it will notice that and
325 * use it. However, we're not seeing that. Instead, what we see is
326 * that giving a parameter of UNICODE causes the desired behavior.
327 * This causes it to check for a BOM and if it finds one it uses it.
328 * If it doesn't find one, it treats the file as ANSI, which is what
329 * we want.
330 */
331 if((len = strlen(mode) + strlen(force_bom_check)) < sizeof(buf)){
332 len = sizeof(buf)-1;
333 mode_with_ccs = buf;
334 }
335 else
336 mode_with_ccs = (char *) MemAlloc((len+1) * sizeof(char));
337
338 if(mode_with_ccs)
339 snprintf(mode_with_ccs, len+1, "%s%s", mode, force_bom_check);
340 else
341 mode_with_ccs = mode; /* can't happen */
342 }
343 }
344 else if(mode && (*mode == 'w')){
345 char *force_utf8 = ", ccs=UTF-8";
346
347 if(strchr(mode, 'b'))
348 mode_with_ccs = mode;
349 else{
350 if((len = strlen(mode) + strlen(force_utf8)) < sizeof(buf)){
351 len = sizeof(buf)-1;
352 mode_with_ccs = buf;
353 }
354 else
355 mode_with_ccs = (char *) MemAlloc((len+1) * sizeof(char));
356
357 if(mode_with_ccs)
358 snprintf(mode_with_ccs, len+1, "%s%s", mode, force_utf8);
359 else
360 mode_with_ccs = mode; /* can't happen */
361 }
362 }
363
364 p = utf8_to_lptstr((LPSTR) path);
365
366 if(p){
367 m = utf8_to_lptstr((LPSTR) mode_with_ccs);
368 if(m){
369 ret = _tfopen(p, m);
370 MemFree((void *) m);
371 }
372
373 fs_give((void **) &p);
374 }
375
376 if(mode_with_ccs && mode_with_ccs != buf && mode_with_ccs != mode)
377 MemFree((void *) mode_with_ccs);
378
379 return ret;
380 #else /* UNIX */
381 return(fopen(fname_to_locale(path), mode));
382 #endif /* UNIX */
383 }
384
385
386 int
our_open(char * path,int flags,mode_t mode)387 our_open(char *path, int flags, mode_t mode)
388 {
389 #ifdef _WINDOWS
390 LPTSTR p = NULL;
391 int ret = -1;
392
393 /*
394 * Setting the _O_WTEXT flag when opening a file for reading
395 * will cause us to read the first few bytes to check for
396 * a BOM and to translate from that encoding if we find it.
397 * This only works with stream I/O, not low-level read/write.
398 *
399 * When opening for writing the flag _O_U8TEXT will cause
400 * us to put a UTF-8 BOM at the start of the file.
401 *
402 * O_TEXT will cause LF -> CRLF on output, opposite on input
403 * O_BINARY suppresses that.
404 * _O_U8TEXT implies O_TEXT.
405 */
406
407 p = utf8_to_lptstr((LPSTR) path);
408
409 if(p){
410 ret = _topen(p, flags, mode);
411 fs_give((void **) &p);
412 }
413
414 return ret;
415 #else /* UNIX */
416 return(open(fname_to_locale(path), flags, mode));
417 #endif /* UNIX */
418 }
419
420
421 int
our_creat(char * path,mode_t mode)422 our_creat(char *path, mode_t mode)
423 {
424 #ifdef _WINDOWS
425 LPTSTR p = NULL;
426 int ret = -1;
427
428 p = utf8_to_lptstr((LPSTR) path);
429
430 if(p){
431 ret = _tcreat(p, mode);
432 fs_give((void **) &p);
433 }
434
435 return ret;
436 #else /* UNIX */
437 return(creat(fname_to_locale(path), mode));
438 #endif /* UNIX */
439 }
440
441
442 int
our_mkdir(char * path,mode_t mode)443 our_mkdir(char *path, mode_t mode)
444 {
445 #ifdef _WINDOWS
446 /* mode is a noop for _WINDOWS */
447 LPTSTR p = NULL;
448 int ret = -1;
449
450 p = utf8_to_lptstr((LPSTR) path);
451
452 if(p){
453 ret = _tmkdir(p);
454 fs_give((void **) &p);
455 }
456
457 return ret;
458 #else /* UNIX */
459 return(mkdir(fname_to_locale(path), mode));
460 #endif /* UNIX */
461 }
462
463
464 int
our_rename(char * oldpath,char * newpath)465 our_rename(char *oldpath, char *newpath)
466 {
467 #ifdef _WINDOWS
468 LPTSTR pold = NULL, pnew = NULL;
469 int ret = -1;
470
471 pold = utf8_to_lptstr((LPSTR) oldpath);
472 pnew = utf8_to_lptstr((LPSTR) newpath);
473
474 if(pold && pnew)
475 ret = _trename(pold, pnew);
476
477 if(pold)
478 fs_give((void **) &pold);
479 if(pnew)
480 fs_give((void **) &pnew);
481
482 return ret;
483 #else /* UNIX */
484 char *p, *pold;
485 size_t len;
486 int ret = -1;
487
488 p = fname_to_locale(oldpath);
489 if(p){
490 len = strlen(p);
491 pold = (char *) fs_get((len+1) * sizeof(char));
492 strncpy(pold, p, len+1);
493 pold[len] = '\0';
494
495 ret = rename(pold, fname_to_locale(newpath));
496 fs_give((void **) &pold);
497 }
498
499 return ret;
500 #endif /* UNIX */
501 }
502
503 int
our_rmdir(char * path)504 our_rmdir(char *path)
505 {
506 #ifdef _WINDOWS
507 LPTSTR p = NULL;
508 int ret = -1;
509
510 p = utf8_to_lptstr((LPSTR) path);
511
512 if(p){
513 ret = _trmdir(p);
514 fs_give((void **) &p);
515 }
516
517 return ret;
518 #else /* UNIX */
519 return(rmdir(fname_to_locale(path)));
520 #endif /* UNIX */
521 }
522
523 int
our_unlink(char * path)524 our_unlink(char *path)
525 {
526 #ifdef _WINDOWS
527 LPTSTR p = NULL;
528 int ret = -1;
529
530 p = utf8_to_lptstr((LPSTR) path);
531
532 if(p){
533 ret = _tunlink(p);
534 fs_give((void **) &p);
535 }
536
537 return ret;
538 #else /* UNIX */
539 return(unlink(fname_to_locale(path)));
540 #endif /* UNIX */
541 }
542
543
544 int
our_link(char * oldpath,char * newpath)545 our_link(char *oldpath, char *newpath)
546 {
547 #ifdef _WINDOWS
548 assert(0); /* link not used in Windows */
549 return(-1);
550 #else /* UNIX */
551 char *p, *pold;
552 size_t len;
553 int ret = -1;
554
555 p = fname_to_locale(oldpath);
556 if(p){
557 len = strlen(p);
558 pold = (char *) fs_get((len+1) * sizeof(char));
559 strncpy(pold, p, len+1);
560 pold[len] = '\0';
561
562 ret = link(pold, fname_to_locale(newpath));
563 fs_give((void **) &pold);
564 }
565
566 return ret;
567 #endif /* UNIX */
568 }
569
570
571 int
our_truncate(char * path,off_t size)572 our_truncate(char *path, off_t size)
573 {
574 int ret = -1;
575 #if defined(_WINDOWS) || !defined(HAVE_TRUNCATE)
576 int fdes;
577 #endif
578
579 #ifdef _WINDOWS
580 if((fdes = our_open(path, O_RDWR | O_CREAT | S_IREAD | S_IWRITE | _O_U8TEXT, 0600)) != -1){
581 if(chsize(fdes, size) == 0)
582 ret = 0;
583
584 close(fdes);
585 }
586
587 #else /* UNIX */
588
589 #ifdef HAVE_TRUNCATE
590 ret = truncate(fname_to_locale(path), size);
591 #else /* !HAVE_TRUNCATE */
592
593 if((fdes = our_open(path, O_RDWR, 0600)) != -1){
594 ret = chsize(fdes, size) ;
595
596 if(close(fdes))
597 ret = -1;
598 }
599 #endif /* !HAVE_TRUNCATE */
600 #endif /* UNIX */
601
602 return ret;
603 }
604
605
606 int
our_chmod(char * path,mode_t mode)607 our_chmod(char *path, mode_t mode)
608 {
609 #ifdef _WINDOWS
610 LPTSTR p = NULL;
611 int ret = -1;
612
613 p = utf8_to_lptstr((LPSTR) path);
614 if(p){
615 ret = _tchmod(p, mode);
616 fs_give((void **) &p);
617 }
618
619 return ret;
620 #else /* UNIX */
621 return(chmod(fname_to_locale(path), mode));
622 #endif /* UNIX */
623 }
624
625
626 int
our_chown(char * path,uid_t owner,gid_t group)627 our_chown(char *path, uid_t owner, gid_t group)
628 {
629 #ifdef _WINDOWS
630 return 0;
631 #else /* UNIX */
632 return(chown(fname_to_locale(path), owner, group));
633 #endif /* UNIX */
634 }
635
636
637 int
our_utime(char * path,struct utimbuf * buf)638 our_utime(char *path, struct utimbuf *buf)
639 {
640 #ifdef _WINDOWS
641 LPTSTR p = NULL;
642 int ret = -1;
643
644 p = utf8_to_lptstr((LPSTR) path);
645
646 if(p){
647 ret = _tutime(p, buf);
648 fs_give((void **) &p);
649 }
650
651 return ret;
652 #else /* UNIX */
653 return(utime(fname_to_locale(path), buf));
654 #endif /* UNIX */
655 }
656
657 /*
658 * Return a malloc'd utf8-encoded char * of the provided environment
659 * variable. The env_variable argument is assumed not to be UTF-8. Returns
660 * NULL if no such environment variable.
661 *
662 * We'll pretty much swap out getenv's where convenient. Windows pretty
663 * much doesn't want to do getenv once we do unicode
664 */
665 char *
our_getenv(char * env_variable)666 our_getenv(char *env_variable)
667 {
668 #ifdef _WINDOWS
669 TCHAR lptstr_env_variable[MAXPATH+1], *p;
670 int i;
671
672 for(i = 0; env_variable[i] && i < MAXPATH; i++)
673 lptstr_env_variable[i] = env_variable[i];
674 lptstr_env_variable[i] = '\0';
675 if(p = _tgetenv(lptstr_env_variable))
676 return(lptstr_to_utf8(p));
677 else
678 return(NULL);
679 #else /* !_WINDOWS */
680 char *p, *utf8_p, *env_cpy;
681 size_t len;
682 if((p = getenv(env_variable)) != NULL){
683 /* all this when what we want is a cpystr */
684 utf8_p = fname_to_utf8(p);
685 len = strlen(utf8_p);
686 env_cpy = (char *)fs_get((len+1)*sizeof(char));
687 strncpy(env_cpy, utf8_p, len+1);
688 env_cpy[len] = '\0';
689
690 return(env_cpy);
691 }
692 else
693 return(NULL);
694 #endif /* !_WINDOWS */
695 }
696
697 int
our_access(char * path,int mode)698 our_access(char *path, int mode)
699 {
700 #ifdef _WINDOWS
701 LPTSTR p = NULL;
702 int ret = -1;
703
704 p = utf8_to_lptstr((LPSTR) path);
705 if(p){
706 ret = _taccess(p, mode);
707 fs_give((void **) &p);
708 }
709
710 return ret;
711 #else /* UNIX */
712 return(access(fname_to_locale(path), mode));
713 #endif /* UNIX */
714 }
715
716
717 /*
718 * Fgets that doesn't do any character encoding translation or any
719 * of that Windows stuff.
720 */
721 char *
fgets_binary(char * s,int size,FILE * fp)722 fgets_binary(char *s, int size, FILE *fp)
723 {
724 #ifdef _WINDOWS
725 char *p;
726 char c;
727 int r;
728
729 /*
730 * Use fread low-level input instead of fgets.
731 * Maybe if we understood better we wouldn't need this.
732 */
733 if(!s)
734 return s;
735
736 p = s;
737 while(p-s < size-1 && (r=fread(&c, sizeof(c), (size_t) 1, fp)) == 1 && c != '\n')
738 *p++ = c;
739
740 if(p-s < size-1 && r == 1){
741 /* must have gotten to end of line */
742 *p++ = '\n';
743 }
744
745 *p = '\0';
746 return(s);
747
748 #else /* UNIX */
749 return(fgets(s, size, fp));
750 #endif /* UNIX */
751 }
752