1 /*
2 * $LynxId: HTFile.c,v 1.152 2019/08/16 22:53:10 tom Exp $
3 *
4 * File Access HTFile.c
5 * ===========
6 *
7 * This is unix-specific code in general, with some VMS bits.
8 * These are routines for file access used by browsers.
9 * Development of this module for Unix DIRED_SUPPORT in Lynx
10 * regrettably has has been conducted in a manner with now
11 * creates a major impediment for hopes of adapting Lynx to
12 * a newer version of the library.
13 *
14 * History:
15 * Feb 91 Written Tim Berners-Lee CERN/CN
16 * Apr 91 vms-vms access included using DECnet syntax
17 * 26 Jun 92 (JFG) When running over DECnet, suppressed FTP.
18 * Fixed access bug for relative names on VMS.
19 * Sep 93 (MD) Access to VMS files allows sharing.
20 * 15 Nov 93 (MD) Moved HTVMSname to HTVMSUTILS.C
21 * 27 Dec 93 (FM) FTP now works with VMS hosts.
22 * FTP path must be Unix-style and cannot include
23 * the device or top directory.
24 */
25
26 #include <HTUtils.h>
27
28 #ifndef VMS
29 #if defined(DOSPATH)
30 #undef LONG_LIST
31 #define LONG_LIST /* Define this for long style unix listings (ls -l),
32 the actual style is configurable from lynx.cfg */
33 #endif
34 /* #define NO_PARENT_DIR_REFERENCE */
35 /* Define this for no parent links */
36 #endif /* !VMS */
37
38 #if defined(DOSPATH)
39 #define HAVE_READDIR 1
40 #define USE_DIRENT
41 #endif
42
43 #if defined(USE_DOS_DRIVES)
44 #include <HTDOS.h>
45 #endif
46
47 #include <HTFile.h> /* Implemented here */
48
49 #ifdef VMS
50 #include <stat.h>
51 #endif /* VMS */
52
53 #if defined (USE_ZLIB) || defined (USE_BZLIB)
54 #include <GridText.h>
55 #endif
56
57 #define MULTI_SUFFIX ".multi" /* Extension for scanning formats */
58
59 #include <HTParse.h>
60 #include <HTTCP.h>
61 #ifndef DECNET
62 #include <HTFTP.h>
63 #endif /* !DECNET */
64 #include <HTAnchor.h>
65 #include <HTAtom.h>
66 #include <HTAAProt.h>
67 #include <HTFWriter.h>
68 #include <HTInit.h>
69 #include <HTBTree.h>
70 #include <HTAlert.h>
71 #include <HTCJK.h>
72 #include <UCDefs.h>
73 #include <UCMap.h>
74 #include <UCAux.h>
75
76 #include <LYexit.h>
77 #include <LYCharSets.h>
78 #include <LYGlobalDefs.h>
79 #include <LYStrings.h>
80 #include <LYUtils.h>
81
82 #ifdef USE_PRETTYSRC
83 # include <LYPrettySrc.h>
84 #endif
85
86 #include <LYLeaks.h>
87
88 typedef struct _HTSuffix {
89 char *suffix;
90 HTAtom *rep;
91 HTAtom *encoding;
92 char *desc;
93 float quality;
94 } HTSuffix;
95
96 typedef struct {
97 struct stat file_info;
98 char sort_tags;
99 char file_name[1]; /* on the end of the struct, since its length varies */
100 } DIRED;
101
102 #ifndef NGROUPS
103 #ifdef NGROUPS_MAX
104 #define NGROUPS NGROUPS_MAX
105 #else
106 #define NGROUPS 32
107 #endif /* NGROUPS_MAX */
108 #endif /* NGROUPS */
109
110 #ifndef GETGROUPS_T
111 #define GETGROUPS_T int
112 #endif
113
114 #include <HTML.h> /* For directory object building */
115
116 #define PUTC(c) (*target->isa->put_character)(target, c)
117 #define PUTS(s) (*target->isa->put_string)(target, s)
118 #define START(e) (*target->isa->start_element)(target, e, 0, 0, -1, 0)
119 #define END(e) (*target->isa->end_element)(target, e, 0)
120 #define MAYBE_END(e) if (HTML_dtd.tags[e].contents != SGML_EMPTY) \
121 (*target->isa->end_element)(target, e, 0)
122 #define FREE_TARGET (*target->isa->_free)(target)
123 #define ABORT_TARGET (*targetClass._abort)(target, NULL);
124
125 struct _HTStructured {
126 const HTStructuredClass *isa;
127 /* ... */
128 };
129
130 /*
131 * Controlling globals.
132 */
133 int HTDirAccess = HT_DIR_OK;
134
135 #ifdef DIRED_SUPPORT
136 int HTDirReadme = HT_DIR_README_NONE;
137
138 #else
139 int HTDirReadme = HT_DIR_README_TOP;
140 #endif /* DIRED_SUPPORT */
141
142 static const char *HTMountRoot = "/Net/"; /* Where to find mounts */
143
144 #ifdef VMS
145 static const char *HTCacheRoot = "/WWW$SCRATCH"; /* Where to cache things */
146
147 #else
148 static const char *HTCacheRoot = "/tmp/W3_Cache_"; /* Where to cache things */
149 #endif /* VMS */
150
151 static char s_no_suffix[] = "*";
152 static char s_unknown_suffix[] = "*.*";
153
154 /*
155 * Suffix registration.
156 */
157 static HTList *HTSuffixes = 0;
158
159 static HTSuffix no_suffix =
160 {
161 s_no_suffix, NULL, NULL, NULL, 1.0
162 };
163
164 static HTSuffix unknown_suffix =
165 {
166 s_unknown_suffix, NULL, NULL, NULL, 1.0
167 };
168
169 /* To free up the suffixes at program exit.
170 * ----------------------------------------
171 */
172 #ifdef LY_FIND_LEAKS
173 static void free_suffixes(void);
174 #endif
175
FindSearch(const char * filename)176 static char *FindSearch(const char *filename)
177 {
178 char *result = 0;
179
180 if ((result = strchr(filename, '?')) == 0) {
181 result = strstr(filename, "%3F");
182 }
183 return result;
184 }
185
186 #ifdef LONG_LIST
FormatStr(char ** bufp,char * start,const char * entry)187 static char *FormatStr(char **bufp,
188 char *start,
189 const char *entry)
190 {
191 char fmt[512];
192
193 if (*start) {
194 sprintf(fmt, "%%%.*ss", (int) sizeof(fmt) - 3, start);
195 HTSprintf0(bufp, fmt, entry);
196 } else if (*bufp && !(entry && *entry)) {
197 **bufp = '\0';
198 } else if (entry) {
199 StrAllocCopy(*bufp, entry);
200 }
201 return *bufp;
202 }
203
FormatSize(char ** bufp,char * start,off_t entry)204 static char *FormatSize(char **bufp,
205 char *start,
206 off_t entry)
207 {
208 char fmt[512];
209
210 if (*start) {
211 sprintf(fmt, "%%%.*s" PRI_off_t,
212 (int) sizeof(fmt) - DigitsOf(start) - 3, start);
213
214 HTSprintf0(bufp, fmt, entry);
215 } else {
216 sprintf(fmt, "%" PRI_off_t, CAST_off_t (entry));
217
218 StrAllocCopy(*bufp, fmt);
219 }
220 return *bufp;
221 }
222
FormatNum(char ** bufp,char * start,int entry)223 static char *FormatNum(char **bufp,
224 char *start,
225 int entry)
226 {
227 char fmt[512];
228
229 if (*start) {
230 sprintf(fmt, "%%%.*sd", (int) sizeof(fmt) - 3, start);
231 HTSprintf0(bufp, fmt, entry);
232 } else {
233 sprintf(fmt, "%d", entry);
234 StrAllocCopy(*bufp, fmt);
235 }
236 return *bufp;
237 }
238
LYListFmtParse(const char * fmtstr,DIRED * data,char * file,HTStructured * target,char * tail)239 static void LYListFmtParse(const char *fmtstr,
240 DIRED * data,
241 char *file,
242 HTStructured * target,
243 char *tail)
244 {
245 char c;
246 char *s;
247 char *end;
248 char *start;
249 char *str = NULL;
250 char *buf = NULL;
251 char tmp[LY_MAXPATH];
252 char type;
253
254 #ifndef NOUSERS
255 const char *name;
256 #endif
257 time_t now;
258 char *datestr;
259
260 #ifdef S_IFLNK
261 int len;
262 #endif
263 #define SEC_PER_YEAR (60 * 60 * 24 * 365)
264
265 #ifdef _WINDOWS /* 1998/01/06 (Tue) 21:20:53 */
266 static const char *pbits[] =
267 {
268 "---", "--x", "-w-", "-wx",
269 "r--", "r-x", "rw-", "rwx",
270 0};
271
272 #define PBIT(a, n, s) pbits[((a) >> (n)) & 0x7]
273
274 #else
275 static const char *pbits[] =
276 {"---", "--x", "-w-", "-wx",
277 "r--", "r-x", "rw-", "rwx", 0};
278 static const char *psbits[] =
279 {"--S", "--s", "-wS", "-ws",
280 "r-S", "r-s", "rwS", "rws", 0};
281
282 #define PBIT(a, n, s) (s) ? psbits[((a) >> (n)) & 0x7] : \
283 pbits[((a) >> (n)) & 0x7]
284 #endif
285 #if defined(S_ISVTX) && !defined(_WINDOWS)
286 static const char *ptbits[] =
287 {"--T", "--t", "-wT", "-wt",
288 "r-T", "r-t", "rwT", "rwt", 0};
289
290 #define PTBIT(a, s) (s) ? ptbits[(a) & 0x7] : pbits[(a) & 0x7]
291 #else
292 #define PTBIT(a, s) PBIT(a, 0, 0)
293 #endif
294
295 if (data->file_info.st_mode == 0)
296 fmtstr = " %a"; /* can't stat so just do anchor */
297
298 StrAllocCopy(str, fmtstr);
299 s = str;
300 end = str + strlen(str);
301 while (*s) {
302 start = s;
303 while (*s) {
304 if (*s == '%') {
305 if (*(s + 1) == '%') /* literal % */
306 s++;
307 else
308 break;
309 }
310 s++;
311 }
312 /* s is positioned either at a % or at \0 */
313 *s = '\0';
314 if (s > start) { /* some literal chars. */
315 PUTS(start);
316 }
317 if (s == end)
318 break;
319 start = ++s;
320 while (isdigit(UCH(*s)) || *s == '.' || *s == '-' || *s == ' ' ||
321 *s == '#' || *s == '+' || *s == '\'')
322 s++;
323 c = *s; /* the format char. or \0 */
324 *s = '\0';
325
326 switch (c) {
327 case '\0':
328 PUTS(start);
329 continue;
330
331 case 'A':
332 case 'a': /* anchor */
333 HTDirEntry(target, tail, data->file_name);
334 FormatStr(&buf, start, data->file_name);
335 PUTS(buf);
336 END(HTML_A);
337 *buf = '\0';
338 #ifdef S_IFLNK
339 if (c != 'A' && S_ISLNK(data->file_info.st_mode) &&
340 (len = (int) readlink(file, tmp, sizeof(tmp) - 1)) >= 0) {
341 PUTS(" -> ");
342 tmp[len] = '\0';
343 PUTS(tmp);
344 }
345 #endif
346 break;
347
348 case 'T': /* MIME type */
349 case 't': /* MIME type description */
350 if (S_ISDIR(data->file_info.st_mode)) {
351 if (c != 'T') {
352 FormatStr(&buf, start, ENTRY_IS_DIRECTORY);
353 } else {
354 FormatStr(&buf, start, "");
355 }
356 } else {
357 const char *cp2;
358 HTFormat format;
359
360 format = HTFileFormat(file, NULL, &cp2);
361
362 if (c != 'T') {
363 if (cp2 == NULL) {
364 if (!StrNCmp(HTAtom_name(format),
365 "application", 11)) {
366 cp2 = HTAtom_name(format) + 12;
367 if (!StrNCmp(cp2, "x-", 2))
368 cp2 += 2;
369 } else {
370 cp2 = HTAtom_name(format);
371 }
372 }
373 FormatStr(&buf, start, cp2);
374 } else {
375 FormatStr(&buf, start, HTAtom_name(format));
376 }
377 }
378 break;
379
380 case 'd': /* date */
381 now = time(0);
382 datestr = ctime(&data->file_info.st_mtime);
383 if ((now - data->file_info.st_mtime) < SEC_PER_YEAR / 2)
384 /*
385 * MMM DD HH:MM
386 */
387 sprintf(tmp, "%.12s", datestr + 4);
388 else
389 /*
390 * MMM DD YYYY
391 */
392 sprintf(tmp, "%.7s %.4s ", datestr + 4,
393 datestr + 20);
394 FormatStr(&buf, start, tmp);
395 break;
396
397 case 's': /* size in bytes */
398 FormatSize(&buf, start, data->file_info.st_size);
399 break;
400
401 case 'K': /* size in Kilobytes but not for directories */
402 if (S_ISDIR(data->file_info.st_mode)) {
403 FormatStr(&buf, start, "");
404 StrAllocCat(buf, " ");
405 break;
406 }
407 /* FALL THROUGH */
408 case 'k': /* size in Kilobytes */
409 FormatSize(&buf, start, ((data->file_info.st_size + 1023) / 1024));
410 StrAllocCat(buf, "K");
411 break;
412
413 case 'p': /* unix-style permission bits */
414 switch (data->file_info.st_mode & S_IFMT) {
415 #if defined(_MSC_VER) && defined(_S_IFIFO)
416 case _S_IFIFO:
417 type = 'p';
418 break;
419 #else
420 case S_IFIFO:
421 type = 'p';
422 break;
423 #endif
424 case S_IFCHR:
425 type = 'c';
426 break;
427 case S_IFDIR:
428 type = 'd';
429 break;
430 case S_IFREG:
431 type = '-';
432 break;
433 #ifdef S_IFBLK
434 case S_IFBLK:
435 type = 'b';
436 break;
437 #endif
438 #ifdef S_IFLNK
439 case S_IFLNK:
440 type = 'l';
441 break;
442 #endif
443 #ifdef S_IFSOCK
444 # ifdef S_IFIFO /* some older machines (e.g., apollo) have a conflict */
445 # if S_IFIFO != S_IFSOCK
446 case S_IFSOCK:
447 type = 's';
448 break;
449 # endif
450 # else
451 case S_IFSOCK:
452 type = 's';
453 break;
454 # endif
455 #endif /* S_IFSOCK */
456 default:
457 type = '?';
458 break;
459 }
460 #ifdef _WINDOWS
461 sprintf(tmp, "%c%s", type,
462 PBIT(data->file_info.st_mode, 6, data->file_info.st_mode & S_IRWXU));
463 #else
464 sprintf(tmp, "%c%s%s%s", type,
465 PBIT(data->file_info.st_mode, 6, data->file_info.st_mode & S_ISUID),
466 PBIT(data->file_info.st_mode, 3, data->file_info.st_mode & S_ISGID),
467 PTBIT(data->file_info.st_mode, data->file_info.st_mode & S_ISVTX));
468 #endif
469 FormatStr(&buf, start, tmp);
470 break;
471
472 case 'o': /* owner */
473 #ifndef NOUSERS
474 name = HTAA_UidToName((int) data->file_info.st_uid);
475 if (*name) {
476 FormatStr(&buf, start, name);
477 } else {
478 FormatNum(&buf, start, (int) data->file_info.st_uid);
479 }
480 #endif
481 break;
482
483 case 'g': /* group */
484 #ifndef NOUSERS
485 name = HTAA_GidToName((int) data->file_info.st_gid);
486 if (*name) {
487 FormatStr(&buf, start, name);
488 } else {
489 FormatNum(&buf, start, (int) data->file_info.st_gid);
490 }
491 #endif
492 break;
493
494 case 'l': /* link count */
495 FormatNum(&buf, start, (int) data->file_info.st_nlink);
496 break;
497
498 case '%': /* literal % with flags/width */
499 FormatStr(&buf, start, "%");
500 break;
501
502 default:
503 fprintf(stderr,
504 "Unknown format character `%c' in list format\n", c);
505 break;
506 }
507 if (buf)
508 PUTS(buf);
509
510 s++;
511 }
512 FREE(buf);
513 PUTC('\n');
514 FREE(str);
515 }
516 #endif /* LONG_LIST */
517
518 /* Define the representation associated with a file suffix.
519 * --------------------------------------------------------
520 *
521 * Calling this with suffix set to "*" will set the default
522 * representation.
523 * Calling this with suffix set to "*.*" will set the default
524 * representation for unknown suffix files which contain a ".".
525 *
526 * The encoding parameter can give a trivial (8bit, 7bit, binary)
527 * or real (gzip, compress) encoding.
528 *
529 * If filename suffix is already defined with the same encoding
530 * its previous definition is overridden.
531 */
HTSetSuffix5(const char * suffix,const char * representation,const char * encoding,const char * desc,double value)532 void HTSetSuffix5(const char *suffix,
533 const char *representation,
534 const char *encoding,
535 const char *desc,
536 double value)
537 {
538 HTSuffix *suff;
539 BOOL trivial_enc = (BOOL) IsUnityEncStr(encoding);
540
541 if (strcmp(suffix, s_no_suffix) == 0)
542 suff = &no_suffix;
543 else if (strcmp(suffix, s_unknown_suffix) == 0)
544 suff = &unknown_suffix;
545 else {
546 HTList *cur = HTSuffixes;
547
548 while (NULL != (suff = (HTSuffix *) HTList_nextObject(cur))) {
549 if (suff->suffix && 0 == strcmp(suff->suffix, suffix) &&
550 ((trivial_enc && IsUnityEnc(suff->encoding)) ||
551 (!trivial_enc && !IsUnityEnc(suff->encoding) &&
552 strcmp(encoding, HTAtom_name(suff->encoding)) == 0)))
553 break;
554 }
555 if (!suff) { /* Not found -- create a new node */
556 suff = typecalloc(HTSuffix);
557 if (suff == NULL)
558 outofmem(__FILE__, "HTSetSuffix");
559
560 if (!HTSuffixes) {
561 HTSuffixes = HTList_new();
562 #ifdef LY_FIND_LEAKS
563 atexit(free_suffixes);
564 #endif
565 }
566
567 HTList_addObject(HTSuffixes, suff);
568
569 StrAllocCopy(suff->suffix, suffix);
570 }
571 }
572
573 if (representation)
574 suff->rep = HTAtom_for(representation);
575
576 /*
577 * Memory leak fixed.
578 * 05-28-94 Lynx 2-3-1 Garrett Arch Blythe
579 * Invariant code removed.
580 */
581 suff->encoding = HTAtom_for(encoding);
582
583 StrAllocCopy(suff->desc, desc);
584
585 suff->quality = (float) value;
586 }
587
588 #ifdef LY_FIND_LEAKS
589 /*
590 * Purpose: Free all added suffixes.
591 * Arguments: void
592 * Return Value: void
593 * Remarks/Portability/Dependencies/Restrictions:
594 * To be used at program exit.
595 * Revision History:
596 * 05-28-94 created Lynx 2-3-1 Garrett Arch Blythe
597 */
free_suffixes(void)598 static void free_suffixes(void)
599 {
600 HTSuffix *suff = NULL;
601
602 /*
603 * Loop through all suffixes.
604 */
605 while (!HTList_isEmpty(HTSuffixes)) {
606 /*
607 * Free off each item and its members if need be.
608 */
609 suff = (HTSuffix *) HTList_removeLastObject(HTSuffixes);
610 FREE(suff->suffix);
611 FREE(suff->desc);
612 FREE(suff);
613 }
614 /*
615 * Free off the list itself.
616 */
617 HTList_delete(HTSuffixes);
618 HTSuffixes = NULL;
619 }
620 #endif /* LY_FIND_LEAKS */
621
622 /* Make the cache file name for a W3 document.
623 * -------------------------------------------
624 * Make up a suitable name for saving the node in
625 *
626 * E.g. /tmp/WWW_Cache_news/1234@cernvax.cern.ch
627 * /tmp/WWW_Cache_http/crnvmc/FIND/xx.xxx.xx
628 *
629 * On exit:
630 * Returns a malloc'ed string which must be freed by the caller.
631 */
HTCacheFileName(const char * name)632 char *HTCacheFileName(const char *name)
633 {
634 char *acc_method = HTParse(name, "", PARSE_ACCESS);
635 char *host = HTParse(name, "", PARSE_HOST);
636 char *path = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION);
637 char *result = NULL;
638
639 HTSprintf0(&result, "%s/WWW/%s/%s%s", HTCacheRoot, acc_method, host, path);
640
641 FREE(path);
642 FREE(acc_method);
643 FREE(host);
644 return result;
645 }
646
647 /* Open a file for write, creating the path.
648 * -----------------------------------------
649 */
650 #ifdef NOT_IMPLEMENTED
HTCreatePath(const char * path)651 static int HTCreatePath(const char *path)
652 {
653 return -1;
654 }
655 #endif /* NOT_IMPLEMENTED */
656
657 /* Convert filename from URL-path syntax to local path format
658 * ----------------------------------------------------------
659 * Input name is assumed to be the URL-path of a local file
660 * URL, i.e. what comes after the "file://localhost".
661 * '#'-fragments to be treated as such must already be stripped.
662 * If expand_all is FALSE, unescape only escaped '/'. - kw
663 *
664 * On exit:
665 * Returns a malloc'ed string which must be freed by the caller.
666 */
HTURLPath_toFile(const char * name,int expand_all,int is_remote GCC_UNUSED)667 char *HTURLPath_toFile(const char *name,
668 int expand_all,
669 int is_remote GCC_UNUSED)
670 {
671 char *path = NULL;
672 char *result = NULL;
673
674 StrAllocCopy(path, name);
675 if (expand_all)
676 HTUnEscape(path); /* Interpret all % signs */
677 else
678 HTUnEscapeSome(path, "/"); /* Interpret % signs for path delims */
679
680 CTRACE((tfp, "URLPath `%s' means path `%s'\n", name, path));
681 #if defined(USE_DOS_DRIVES)
682 StrAllocCopy(result, is_remote ? path : HTDOS_name(path));
683 #else
684 StrAllocCopy(result, path);
685 #endif
686
687 FREE(path);
688
689 return result;
690 }
691 /* Convert filenames between local and WWW formats.
692 * ------------------------------------------------
693 * Make up a suitable name for saving the node in
694 *
695 * E.g. $(HOME)/WWW/news/1234@cernvax.cern.ch
696 * $(HOME)/WWW/http/crnvmc/FIND/xx.xxx.xx
697 *
698 * On exit:
699 * Returns a malloc'ed string which must be freed by the caller.
700 */
701 /* NOTE: Don't use this function if you know that the input is a URL path
702 rather than a full URL, use HTURLPath_toFile instead. Otherwise
703 this function will return the wrong thing for some unusual
704 paths (like ones containing "//", possibly escaped). - kw
705 */
HTnameOfFile_WWW(const char * name,int WWW_prefix,int expand_all)706 char *HTnameOfFile_WWW(const char *name,
707 int WWW_prefix,
708 int expand_all)
709 {
710 char *acc_method = HTParse(name, "", PARSE_ACCESS);
711 char *host = HTParse(name, "", PARSE_HOST);
712 char *path = HTParse(name, "", PARSE_PATH + PARSE_PUNCTUATION);
713 const char *home;
714 char *result = NULL;
715
716 if (expand_all) {
717 HTUnEscape(path); /* Interpret all % signs */
718 } else
719 HTUnEscapeSome(path, "/"); /* Interpret % signs for path delims */
720
721 if (0 == strcmp(acc_method, "file") /* local file */
722 ||!*acc_method) { /* implicitly local? */
723 if ((0 == strcasecomp(host, HTHostName())) ||
724 (0 == strcasecomp(host, "localhost")) || !*host) {
725 CTRACE((tfp, "Node `%s' means path `%s'\n", name, path));
726 StrAllocCopy(result, HTSYS_name(path));
727 } else if (WWW_prefix) {
728 HTSprintf0(&result, "%s%s%s", "/Net/", host, path);
729 CTRACE((tfp, "Node `%s' means file `%s'\n", name, result));
730 } else {
731 StrAllocCopy(result, path);
732 }
733 } else if (WWW_prefix) { /* other access */
734 #ifdef VMS
735 if ((home = LYGetEnv("HOME")) == NULL)
736 home = HTCacheRoot;
737 else
738 home = HTVMS_wwwName(home);
739 #else
740 #if defined(_WINDOWS) /* 1997/10/16 (Thu) 20:42:51 */
741 home = Home_Dir();
742 #else
743 home = LYGetEnv("HOME");
744 #endif
745 if (home == NULL)
746 home = "/tmp";
747 #endif /* VMS */
748 HTSprintf0(&result, "%s/WWW/%s/%s%s", home, acc_method, host, path);
749 } else {
750 StrAllocCopy(result, path);
751 }
752
753 FREE(host);
754 FREE(path);
755 FREE(acc_method);
756
757 CTRACE((tfp, "HTnameOfFile_WWW(%s,%d,%d) = %s\n",
758 name, WWW_prefix, expand_all, result));
759
760 return result;
761 }
762
763 /* Make a WWW name from a full local path name.
764 * --------------------------------------------
765 *
766 * Bugs:
767 * At present, only the names of two network root nodes are hand-coded
768 * in and valid for the NeXT only. This should be configurable in
769 * the general case.
770 */
WWW_nameOfFile(const char * name)771 char *WWW_nameOfFile(const char *name)
772 {
773 char *result = NULL;
774
775 #ifdef NeXT
776 if (0 == StrNCmp("/private/Net/", name, 13)) {
777 HTSprintf0(&result, "%s//%s", STR_FILE_URL, name + 13);
778 } else
779 #endif /* NeXT */
780 if (0 == StrNCmp(HTMountRoot, name, 5)) {
781 HTSprintf0(&result, "%s//%s", STR_FILE_URL, name + 5);
782 } else {
783 HTSprintf0(&result, "%s//%s%s", STR_FILE_URL, HTHostName(), name);
784 }
785 CTRACE((tfp, "File `%s'\n\tmeans node `%s'\n", name, result));
786 return result;
787 }
788
789 /* Determine a suitable suffix, given the representation.
790 * ------------------------------------------------------
791 *
792 * On entry,
793 * rep is the atomized MIME style representation
794 * enc is an encoding, trivial (8bit, binary, etc.) or gzip etc.
795 *
796 * On exit:
797 * Returns a pointer to a suitable suffix string if one has been
798 * found, else "".
799 */
HTFileSuffix(HTAtom * rep,const char * enc)800 const char *HTFileSuffix(HTAtom *rep,
801 const char *enc)
802 {
803 HTSuffix *suff;
804
805 #ifdef FNAMES_8_3
806 HTSuffix *first_found = NULL;
807 #endif
808 BOOL trivial_enc;
809 int n;
810 int i;
811
812 #define NO_INIT /* don't init anymore since I do it in Lynx at startup */
813 #ifndef NO_INIT
814 if (!HTSuffixes)
815 HTFileInit();
816 #endif /* !NO_INIT */
817
818 trivial_enc = (BOOL) IsUnityEncStr(enc);
819 n = HTList_count(HTSuffixes);
820 for (i = 0; i < n; i++) {
821 suff = (HTSuffix *) HTList_objectAt(HTSuffixes, i);
822 if (suff->rep == rep &&
823 #if defined(VMS) || defined(FNAMES_8_3)
824 /* Don't return a suffix whose first char is a dot, and which
825 has more dots or asterisks after that, for
826 these systems - kw */
827 (!suff->suffix || !suff->suffix[0] || suff->suffix[0] != '.' ||
828 (StrChr(suff->suffix + 1, '.') == NULL &&
829 StrChr(suff->suffix + 1, '*') == NULL)) &&
830 #endif
831 ((trivial_enc && IsUnityEnc(suff->encoding)) ||
832 (!trivial_enc && !IsUnityEnc(suff->encoding) &&
833 strcmp(enc, HTAtom_name(suff->encoding)) == 0))) {
834 #ifdef FNAMES_8_3
835 if (suff->suffix && (strlen(suff->suffix) <= 4)) {
836 /*
837 * If length of suffix (including dot) is 4 or smaller, return
838 * this one even if we found a longer one earlier - kw
839 */
840 return suff->suffix;
841 } else if (!first_found) {
842 first_found = suff; /* remember this one */
843 }
844 #else
845 return suff->suffix; /* OK -- found */
846 #endif
847 }
848 }
849 #ifdef FNAMES_8_3
850 if (first_found)
851 return first_found->suffix;
852 #endif
853 return ""; /* Dunno */
854 }
855
856 /*
857 * Trim version from VMS filenames to avoid confusing comparisons.
858 */
859 #ifdef VMS
VMS_trim_version(const char * filename)860 static const char *VMS_trim_version(const char *filename)
861 {
862 const char *result = filename;
863 const char *version = StrChr(filename, ';');
864
865 if (version != 0) {
866 static char *stripped;
867
868 StrAllocCopy(stripped, filename);
869 stripped[version - filename] = '\0';
870 result = (const char *) stripped;
871 }
872 return result;
873 }
874 #define VMS_DEL_VERSION(name) name = VMS_trim_version(name)
875 #else
876 #define VMS_DEL_VERSION(name) /* nothing */
877 #endif
878
879 /* Determine file format from file name.
880 * -------------------------------------
881 *
882 * This version will return the representation and also set
883 * a variable for the encoding.
884 *
885 * Encoding may be a unity encoding (binary, 8bit, etc.) or
886 * a content-coding like gzip, compress.
887 *
888 * It will handle for example x.txt, x.txt,Z, x.Z
889 */
HTFileFormat(const char * filename,HTAtom ** pencoding,const char ** pdesc)890 HTFormat HTFileFormat(const char *filename,
891 HTAtom **pencoding,
892 const char **pdesc)
893 {
894 HTSuffix *suff;
895 int n;
896 int i;
897 int lf;
898 char *search;
899
900 VMS_DEL_VERSION(filename);
901
902 if ((search = FindSearch(filename)) != 0) {
903 char *newname = NULL;
904 HTFormat result;
905
906 StrAllocCopy(newname, filename);
907 newname[((const char *) search) - filename] = '\0';
908 result = HTFileFormat(newname, pencoding, pdesc);
909 free(newname);
910 return result;
911 }
912
913 if (pencoding)
914 *pencoding = NULL;
915 if (pdesc)
916 *pdesc = NULL;
917 if (LYforce_HTML_mode) {
918 if (pencoding)
919 *pencoding = WWW_ENC_8BIT;
920 return WWW_HTML;
921 }
922 #ifndef NO_INIT
923 if (!HTSuffixes)
924 HTFileInit();
925 #endif /* !NO_INIT */
926 lf = (int) strlen(filename);
927 n = HTList_count(HTSuffixes);
928 for (i = 0; i < n; i++) {
929 int ls;
930
931 suff = (HTSuffix *) HTList_objectAt(HTSuffixes, i);
932 ls = (int) strlen(suff->suffix);
933 if ((ls <= lf) && 0 == strcasecomp(suff->suffix, filename + lf - ls)) {
934 int j;
935
936 if (pencoding)
937 *pencoding = suff->encoding;
938 if (pdesc)
939 *pdesc = suff->desc;
940 if (suff->rep) {
941 return suff->rep; /* OK -- found */
942 }
943 for (j = 0; j < n; j++) { /* Got encoding, need representation */
944 int ls2;
945
946 suff = (HTSuffix *) HTList_objectAt(HTSuffixes, j);
947 ls2 = (int) strlen(suff->suffix);
948 if ((ls + ls2 <= lf) &&
949 !strncasecomp(suff->suffix,
950 filename + lf - ls - ls2, ls2)) {
951 if (suff->rep) {
952 if (pdesc && !(*pdesc))
953 *pdesc = suff->desc;
954 if (pencoding && IsUnityEnc(*pencoding) &&
955 *pencoding != WWW_ENC_7BIT &&
956 !IsUnityEnc(suff->encoding))
957 *pencoding = suff->encoding;
958 return suff->rep;
959 }
960 }
961 }
962
963 }
964 }
965
966 /* defaults tree */
967
968 suff = (StrChr(filename, '.')
969 ? (unknown_suffix.rep
970 ? &unknown_suffix
971 : &no_suffix)
972 : &no_suffix);
973
974 /*
975 * Set default encoding unless found with suffix already.
976 */
977 if (pencoding && !*pencoding) {
978 *pencoding = (suff->encoding
979 ? suff->encoding
980 : HTAtom_for("binary"));
981 }
982 return suff->rep ? suff->rep : WWW_BINARY;
983 }
984
985 /* Revise the file format in relation to the Lynx charset. - FM
986 * -------------------------------------------------------
987 *
988 * This checks the format associated with an anchor for
989 * an extended MIME Content-Type, and if a charset is
990 * indicated, sets Lynx up for proper handling in relation
991 * to the currently selected character set. - FM
992 */
HTCharsetFormat(HTFormat format,HTParentAnchor * anchor,int default_LYhndl)993 HTFormat HTCharsetFormat(HTFormat format,
994 HTParentAnchor *anchor,
995 int default_LYhndl)
996 {
997 char *cp = NULL, *cp1, *cp2, *cp3 = NULL, *cp4;
998 BOOL chartrans_ok = FALSE;
999 int chndl = -1;
1000 const char *format_name = format->name;
1001
1002 FREE(anchor->charset);
1003 if (format_name == 0)
1004 format_name = "";
1005 StrAllocCopy(cp, format_name);
1006 LYLowerCase(cp);
1007 if (((cp1 = StrChr(cp, ';')) != NULL) &&
1008 (cp2 = strstr(cp1, "charset")) != NULL) {
1009 CTRACE((tfp, "HTCharsetFormat: Extended MIME Content-Type is %s\n",
1010 format_name));
1011 cp2 += 7;
1012 while (*cp2 == ' ' || *cp2 == '=')
1013 cp2++;
1014 StrAllocCopy(cp3, cp2); /* copy to mutilate more */
1015 for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '"' &&
1016 *cp4 != ';' && *cp4 != ':' &&
1017 !WHITE(*cp4)); cp4++) {
1018 ; /* do nothing */
1019 }
1020 *cp4 = '\0';
1021 cp4 = cp3;
1022 chndl = UCGetLYhndl_byMIME(cp3);
1023 if (UCCanTranslateFromTo(chndl, current_char_set)) {
1024 chartrans_ok = YES;
1025 *cp1 = '\0';
1026 format = HTAtom_for(cp);
1027 StrAllocCopy(anchor->charset, cp4);
1028 HTAnchor_setUCInfoStage(anchor, chndl,
1029 UCT_STAGE_MIME,
1030 UCT_SETBY_MIME);
1031 } else if (chndl < 0) {
1032 /*
1033 * Got something but we don't recognize it.
1034 */
1035 chndl = UCLYhndl_for_unrec;
1036 if (chndl < 0)
1037 /*
1038 * UCLYhndl_for_unrec not defined :-( fallback to
1039 * UCLYhndl_for_unspec which always valid.
1040 */
1041 chndl = UCLYhndl_for_unspec; /* always >= 0 */
1042 if (UCCanTranslateFromTo(chndl, current_char_set)) {
1043 chartrans_ok = YES;
1044 HTAnchor_setUCInfoStage(anchor, chndl,
1045 UCT_STAGE_MIME,
1046 UCT_SETBY_DEFAULT);
1047 }
1048 }
1049 if (chartrans_ok) {
1050 LYUCcharset *p_in = HTAnchor_getUCInfoStage(anchor,
1051 UCT_STAGE_MIME);
1052 LYUCcharset *p_out = HTAnchor_setUCInfoStage(anchor,
1053 current_char_set,
1054 UCT_STAGE_HTEXT,
1055 UCT_SETBY_DEFAULT);
1056
1057 if (!p_out) {
1058 /*
1059 * Try again.
1060 */
1061 p_out = HTAnchor_getUCInfoStage(anchor, UCT_STAGE_HTEXT);
1062 }
1063 if (!strcmp(p_in->MIMEname, "x-transparent")) {
1064 HTPassEightBitRaw = TRUE;
1065 HTAnchor_setUCInfoStage(anchor,
1066 HTAnchor_getUCLYhndl(anchor,
1067 UCT_STAGE_HTEXT),
1068 UCT_STAGE_MIME,
1069 UCT_SETBY_DEFAULT);
1070 }
1071 if (!strcmp(p_out->MIMEname, "x-transparent")) {
1072 HTPassEightBitRaw = TRUE;
1073 HTAnchor_setUCInfoStage(anchor,
1074 HTAnchor_getUCLYhndl(anchor,
1075 UCT_STAGE_MIME),
1076 UCT_STAGE_HTEXT,
1077 UCT_SETBY_DEFAULT);
1078 }
1079 if (p_in->enc != UCT_ENC_CJK) {
1080 HTCJK = NOCJK;
1081 if (!(p_in->codepoints &
1082 UCT_CP_SUBSETOF_LAT1) &&
1083 chndl == current_char_set) {
1084 HTPassEightBitRaw = TRUE;
1085 }
1086 } else if (p_out->enc == UCT_ENC_CJK) {
1087 Set_HTCJK(p_in->MIMEname, p_out->MIMEname);
1088 }
1089 } else {
1090 /*
1091 * Cannot translate. If according to some heuristic the given
1092 * charset and the current display character both are likely to be
1093 * like ISO-8859 in structure, pretend we have some kind of match.
1094 */
1095 BOOL given_is_8859 = (BOOL) (!StrNCmp(cp4, "iso-8859-", 9) &&
1096 isdigit(UCH(cp4[9])));
1097 BOOL given_is_8859like = (BOOL) (given_is_8859 ||
1098 !StrNCmp(cp4, "windows-", 8) ||
1099 !StrNCmp(cp4, "cp12", 4) ||
1100 !StrNCmp(cp4, "cp-12", 5));
1101 BOOL given_and_display_8859like = (BOOL) (given_is_8859like &&
1102 (strstr(LYchar_set_names[current_char_set],
1103 "ISO-8859") ||
1104 strstr(LYchar_set_names[current_char_set],
1105 "windows-")));
1106
1107 if (given_and_display_8859like) {
1108 *cp1 = '\0';
1109 format = HTAtom_for(cp);
1110 }
1111 if (given_is_8859) {
1112 cp1 = &cp4[10];
1113 while (*cp1 &&
1114 isdigit(UCH(*cp1)))
1115 cp1++;
1116 *cp1 = '\0';
1117 }
1118 if (given_and_display_8859like) {
1119 StrAllocCopy(anchor->charset, cp4);
1120 HTPassEightBitRaw = TRUE;
1121 }
1122 HTAlert(*cp4 ? cp4 : anchor->charset);
1123 }
1124 FREE(cp3);
1125 } else if (cp1 != NULL) {
1126 /*
1127 * No charset parameter is present. Ignore all other parameters, as we
1128 * do when charset is present. - FM
1129 */
1130 *cp1 = '\0';
1131 format = HTAtom_for(cp);
1132 }
1133 FREE(cp);
1134
1135 /*
1136 * Set up defaults, if needed. - FM
1137 */
1138 if (!chartrans_ok && !anchor->charset && default_LYhndl >= 0) {
1139 HTAnchor_setUCInfoStage(anchor, default_LYhndl,
1140 UCT_STAGE_MIME,
1141 UCT_SETBY_DEFAULT);
1142 }
1143 HTAnchor_copyUCInfoStage(anchor,
1144 UCT_STAGE_PARSER,
1145 UCT_STAGE_MIME,
1146 -1);
1147
1148 return format;
1149 }
1150
1151 /* Get various pieces of meta info from file name.
1152 * -----------------------------------------------
1153 *
1154 * LYGetFileInfo fills in information that can be determined without
1155 * an actual (new) access to the filesystem, based on current suffix
1156 * and character set configuration. If the file has been loaded and
1157 * parsed before (with the same URL generated here!) and the anchor
1158 * is still around, some results may be influenced by that (in
1159 * particular, charset info from a META tag - this is not actually
1160 * tested!).
1161 * The caller should not keep pointers to the returned objects around
1162 * for too long, the valid lifetimes vary. In particular, the returned
1163 * charset string should be copied if necessary. If return of the
1164 * file_anchor is requested, that one can be used to retrieve
1165 * additional bits of info that are stored in the anchor object and
1166 * are not covered here; as usual, don't keep pointers to the
1167 * file_anchor longer than necessary since the object may disappear
1168 * through HTuncache_current_document or at the next document load.
1169 * - kw
1170 */
LYGetFileInfo(const char * filename,HTParentAnchor ** pfile_anchor,HTFormat * pformat,HTAtom ** pencoding,const char ** pdesc,const char ** pcharset,int * pfile_cs)1171 void LYGetFileInfo(const char *filename,
1172 HTParentAnchor **pfile_anchor,
1173 HTFormat *pformat,
1174 HTAtom **pencoding,
1175 const char **pdesc,
1176 const char **pcharset,
1177 int *pfile_cs)
1178 {
1179 char *Afn;
1180 char *Aname = NULL;
1181 HTFormat format;
1182 HTAtom *myEnc = NULL;
1183 HTParentAnchor *file_anchor;
1184 const char *file_csname;
1185 int file_cs;
1186
1187 /*
1188 * Convert filename to URL. Note that it is always supposed to be a
1189 * filename, not maybe-filename-maybe-URL, so we don't use
1190 * LYFillLocalFileURL and LYEnsureAbsoluteURL. - kw
1191 */
1192 Afn = HTEscape(filename, URL_PATH);
1193 LYLocalFileToURL(&Aname, Afn);
1194 file_anchor = HTAnchor_findSimpleAddress(Aname);
1195
1196 format = HTFileFormat(filename, &myEnc, pdesc);
1197 format = HTCharsetFormat(format, file_anchor, UCLYhndl_HTFile_for_unspec);
1198 file_cs = HTAnchor_getUCLYhndl(file_anchor, UCT_STAGE_MIME);
1199 file_csname = file_anchor->charset;
1200 if (!file_csname) {
1201 if (file_cs >= 0)
1202 file_csname = LYCharSet_UC[file_cs].MIMEname;
1203 else
1204 file_csname = "display character set";
1205 }
1206 CTRACE((tfp, "GetFileInfo: '%s' is a%s %s %s file, charset=%s (%d).\n",
1207 filename,
1208 ((myEnc && *HTAtom_name(myEnc) == '8') ? "n" : myEnc ? "" :
1209 *HTAtom_name(format) == 'a' ? "n" : ""),
1210 myEnc ? HTAtom_name(myEnc) : "",
1211 HTAtom_name(format),
1212 file_csname,
1213 file_cs));
1214 FREE(Afn);
1215 FREE(Aname);
1216 if (pfile_anchor)
1217 *pfile_anchor = file_anchor;
1218 if (pformat)
1219 *pformat = format;
1220 if (pencoding)
1221 *pencoding = myEnc;
1222 if (pcharset)
1223 *pcharset = file_csname;
1224 if (pfile_cs)
1225 *pfile_cs = file_cs;
1226 }
1227
1228 /* Determine value from file name.
1229 * -------------------------------
1230 *
1231 */
HTFileValue(const char * filename)1232 float HTFileValue(const char *filename)
1233 {
1234 HTSuffix *suff;
1235 int n;
1236 int i;
1237 int lf = (int) strlen(filename);
1238
1239 #ifndef NO_INIT
1240 if (!HTSuffixes)
1241 HTFileInit();
1242 #endif /* !NO_INIT */
1243 n = HTList_count(HTSuffixes);
1244 for (i = 0; i < n; i++) {
1245 int ls;
1246
1247 suff = (HTSuffix *) HTList_objectAt(HTSuffixes, i);
1248 ls = (int) strlen(suff->suffix);
1249 if ((ls <= lf) && 0 == strcmp(suff->suffix, filename + lf - ls)) {
1250 CTRACE((tfp, "File: Value of %s is %.3f\n",
1251 filename, suff->quality));
1252 return suff->quality; /* OK -- found */
1253 }
1254 }
1255 return (float) 0.3; /* Dunno! */
1256 }
1257
1258 /*
1259 * Determine compression type from file name, by looking at its suffix.
1260 * Sets as side-effect a pointer to the "dot" that begins the suffix.
1261 */
HTCompressFileType(const char * filename,const char * dots,int * rootlen)1262 CompressFileType HTCompressFileType(const char *filename,
1263 const char *dots,
1264 int *rootlen)
1265 {
1266 CompressFileType result = cftNone;
1267 char *search;
1268
1269 if ((search = FindSearch(filename)) != 0) {
1270 char *newname = NULL;
1271
1272 StrAllocCopy(newname, filename);
1273 newname[((const char *) search) - filename] = '\0';
1274 result = HTCompressFileType(newname, dots, rootlen);
1275 free(newname);
1276 } else {
1277 size_t len;
1278 const char *ftype;
1279
1280 VMS_DEL_VERSION(filename);
1281 len = strlen(filename);
1282 ftype = filename + len;
1283
1284 if ((len > 4)
1285 && !strcasecomp((ftype - 3), "bz2")
1286 && StrChr(dots, ftype[-4]) != 0) {
1287 result = cftBzip2;
1288 ftype -= 4;
1289 } else if ((len > 3)
1290 && !strcasecomp((ftype - 2), "gz")
1291 && StrChr(dots, ftype[-3]) != 0) {
1292 result = cftGzip;
1293 ftype -= 3;
1294 } else if ((len > 3)
1295 && !strcasecomp((ftype - 2), "zz")
1296 && StrChr(dots, ftype[-3]) != 0) {
1297 result = cftDeflate;
1298 ftype -= 3;
1299 } else if ((len > 2)
1300 && !strcmp((ftype - 1), "Z")
1301 && StrChr(dots, ftype[-2]) != 0) {
1302 result = cftCompress;
1303 ftype -= 2;
1304 }
1305
1306 *rootlen = (int) (ftype - filename);
1307
1308 CTRACE((tfp, "HTCompressFileType(%s) returns %d:%s\n",
1309 filename, (int) result, filename + *rootlen));
1310 }
1311 return result;
1312 }
1313
1314 /*
1315 * Determine expected file-suffix from the compression method.
1316 */
HTCompressTypeToSuffix(CompressFileType method)1317 const char *HTCompressTypeToSuffix(CompressFileType method)
1318 {
1319 const char *result = "";
1320
1321 switch (method) {
1322 default:
1323 case cftNone:
1324 result = "";
1325 break;
1326 case cftGzip:
1327 result = ".gz";
1328 break;
1329 case cftCompress:
1330 result = ".Z";
1331 break;
1332 case cftBzip2:
1333 result = ".bz2";
1334 break;
1335 case cftDeflate:
1336 result = ".zz";
1337 break;
1338 }
1339 return result;
1340 }
1341
1342 /*
1343 * Determine compression encoding from the compression method.
1344 */
HTCompressTypeToEncoding(CompressFileType method)1345 const char *HTCompressTypeToEncoding(CompressFileType method)
1346 {
1347 const char *result = NULL;
1348
1349 switch (method) {
1350 default:
1351 case cftNone:
1352 result = NULL;
1353 break;
1354 case cftGzip:
1355 result = "gzip";
1356 break;
1357 case cftCompress:
1358 result = "compress";
1359 break;
1360 case cftBzip2:
1361 result = "bzip2";
1362 break;
1363 case cftDeflate:
1364 result = "deflate";
1365 break;
1366 }
1367 return result;
1368 }
1369
1370 /*
1371 * Check if the token from "Content-Encoding" corresponds to a compression
1372 * type. RFC 2068 (and cut/paste into RFC 2616) lists these:
1373 * gzip
1374 * compress
1375 * deflate
1376 * as well as "identity" (but that does nothing).
1377 */
HTEncodingToCompressType(const char * coding)1378 CompressFileType HTEncodingToCompressType(const char *coding)
1379 {
1380 CompressFileType result = cftNone;
1381
1382 if (coding == NULL) {
1383 result = cftNone;
1384 } else if (!strcasecomp(coding, "gzip") ||
1385 !strcasecomp(coding, "x-gzip")) {
1386 result = cftGzip;
1387 } else if (!strcasecomp(coding, "compress") ||
1388 !strcasecomp(coding, "x-compress")) {
1389 result = cftCompress;
1390 } else if (!strcasecomp(coding, "bzip2") ||
1391 !strcasecomp(coding, "x-bzip2")) {
1392 result = cftBzip2;
1393 } else if (!strcasecomp(coding, "deflate") ||
1394 !strcasecomp(coding, "x-deflate")) {
1395 result = cftDeflate;
1396 }
1397 return result;
1398 }
1399
HTContentTypeToCompressType(const char * ct)1400 CompressFileType HTContentTypeToCompressType(const char *ct)
1401 {
1402 CompressFileType method = cftNone;
1403
1404 if (ct == NULL) {
1405 method = cftNone;
1406 } else if (!strncasecomp(ct, "application/gzip", 16) ||
1407 !strncasecomp(ct, "application/x-gzip", 18)) {
1408 method = cftGzip;
1409 } else if (!strncasecomp(ct, "application/compress", 20) ||
1410 !strncasecomp(ct, "application/x-compress", 22)) {
1411 method = cftCompress;
1412 } else if (!strncasecomp(ct, "application/bzip2", 17) ||
1413 !strncasecomp(ct, "application/x-bzip2", 19)) {
1414 method = cftBzip2;
1415 }
1416 return method;
1417 }
1418
1419 /*
1420 * Check the anchor's content_type and content_encoding elements for a gzip or
1421 * Unix compressed file -FM, TD
1422 */
HTContentToCompressType(HTParentAnchor * anchor)1423 CompressFileType HTContentToCompressType(HTParentAnchor *anchor)
1424 {
1425 CompressFileType method = cftNone;
1426 const char *ct = HTAnchor_content_type(anchor);
1427 const char *ce = HTAnchor_content_encoding(anchor);
1428
1429 if (ct != 0) {
1430 method = HTContentTypeToCompressType(ct);
1431 } else if (ce != 0) {
1432 method = HTEncodingToCompressType(ce);
1433 }
1434 return method;
1435 }
1436
1437 /* Determine write access to a file.
1438 * ---------------------------------
1439 *
1440 * On exit:
1441 * Returns YES if file can be accessed and can be written to.
1442 *
1443 * Bugs:
1444 * 1. No code for non-unix systems.
1445 * 2. Isn't there a quicker way?
1446 */
HTEditable(const char * filename GCC_UNUSED)1447 BOOL HTEditable(const char *filename GCC_UNUSED)
1448 {
1449 #ifndef NO_GROUPS
1450 GETGROUPS_T groups[NGROUPS];
1451 uid_t myUid;
1452 int ngroups; /* The number of groups */
1453 struct stat fileStatus;
1454 int i;
1455
1456 if (stat(filename, &fileStatus)) /* Get details of filename */
1457 return NO; /* Can't even access file! */
1458
1459 ngroups = getgroups(NGROUPS, groups); /* Groups to which I belong */
1460 myUid = geteuid(); /* Get my user identifier */
1461
1462 if (TRACE) {
1463 int i2;
1464
1465 fprintf(tfp,
1466 "File mode is 0%o, uid=%d, gid=%d. My uid=%d, %d groups (",
1467 (unsigned int) fileStatus.st_mode,
1468 (int) fileStatus.st_uid,
1469 (int) fileStatus.st_gid,
1470 (int) myUid,
1471 (int) ngroups);
1472 for (i2 = 0; i2 < ngroups; i2++)
1473 fprintf(tfp, " %d", (int) groups[i2]);
1474 fprintf(tfp, ")\n");
1475 }
1476
1477 if (fileStatus.st_mode & 0002) /* I can write anyway? */
1478 return YES;
1479
1480 if ((fileStatus.st_mode & 0200) /* I can write my own file? */
1481 &&(fileStatus.st_uid == myUid))
1482 return YES;
1483
1484 if (fileStatus.st_mode & 0020) /* Group I am in can write? */
1485 {
1486 for (i = 0; i < ngroups; i++) {
1487 if (groups[i] == fileStatus.st_gid)
1488 return YES;
1489 }
1490 }
1491 CTRACE((tfp, "\tFile is not editable.\n"));
1492 #endif /* NO_GROUPS */
1493 return NO; /* If no excuse, can't do */
1494 }
1495
1496 /* Make a save stream.
1497 * -------------------
1498 *
1499 * The stream must be used for writing back the file.
1500 * @@@ no backup done
1501 */
HTFileSaveStream(HTParentAnchor * anchor)1502 HTStream *HTFileSaveStream(HTParentAnchor *anchor)
1503 {
1504 const char *addr = anchor->address;
1505 char *localname = HTLocalName(addr);
1506 FILE *fp = fopen(localname, BIN_W);
1507
1508 FREE(localname);
1509 if (!fp)
1510 return NULL;
1511
1512 return HTFWriter_new(fp);
1513 }
1514
1515 /* Output one directory entry.
1516 * ---------------------------
1517 */
HTDirEntry(HTStructured * target,const char * tail,const char * entry)1518 void HTDirEntry(HTStructured * target, const char *tail, const char *entry)
1519 {
1520 char *relative = NULL;
1521 char *stripped = NULL;
1522 char *escaped = NULL;
1523 int len;
1524
1525 if (entry == NULL)
1526 entry = "";
1527 StrAllocCopy(escaped, entry);
1528 LYTrimPathSep(escaped);
1529 if (strcmp(escaped, "..") != 0) {
1530 stripped = escaped;
1531 escaped = HTEscape(stripped, URL_XPALPHAS);
1532 if (((len = (int) strlen(escaped)) > 2) &&
1533 escaped[(len - 3)] == '%' &&
1534 escaped[(len - 2)] == '2' &&
1535 TOUPPER(escaped[(len - 1)]) == 'F') {
1536 escaped[(len - 3)] = '\0';
1537 }
1538 }
1539
1540 if (isEmpty(tail)) {
1541 /*
1542 * Handle extra slash at end of path.
1543 */
1544 HTStartAnchor(target, NULL, (escaped[0] != '\0' ? escaped : "/"));
1545 } else {
1546 /*
1547 * If empty tail, gives absolute ref below.
1548 */
1549 relative = 0;
1550 HTSprintf0(&relative, "%s%s%s",
1551 tail,
1552 (*escaped != '\0' ? "/" : ""),
1553 escaped);
1554 HTStartAnchor(target, NULL, relative);
1555 FREE(relative);
1556 }
1557 FREE(stripped);
1558 FREE(escaped);
1559 }
1560
view_structured(HTFormat format_out)1561 static BOOL view_structured(HTFormat format_out)
1562 {
1563 BOOL result = FALSE;
1564
1565 #ifdef USE_PRETTYSRC
1566 if (psrc_view
1567 || (format_out == HTAtom_for("www/dump")))
1568 result = TRUE;
1569 #else
1570 if (format_out == WWW_SOURCE)
1571 result = TRUE;
1572 #endif
1573 return result;
1574 }
1575
1576 /*
1577 * Write a DOCTYPE to the given stream if we happen to want to see the
1578 * source view, or are dumping source. This is not needed when the source
1579 * is not visible, since the document is rendered from a HTStructured object.
1580 */
HTStructured_doctype(HTStructured * target,HTFormat format_out)1581 void HTStructured_doctype(HTStructured * target, HTFormat format_out)
1582 {
1583 if (view_structured(format_out))
1584 PUTS("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n");
1585 }
1586
HTStructured_meta(HTStructured * target,HTFormat format_out)1587 void HTStructured_meta(HTStructured * target, HTFormat format_out)
1588 {
1589 if (view_structured(format_out))
1590 PUTS("<meta http-equiv=\"Content-Type\" content=\"" STR_HTML
1591 "; charset=iso-8859-1\">\n");
1592 }
1593 /* Output parent directory entry.
1594 * ------------------------------
1595 *
1596 * This gives the TITLE and H1 header, and also a link
1597 * to the parent directory if appropriate.
1598 *
1599 * On exit:
1600 * Returns TRUE if an "Up to <parent>" link was not created
1601 * for a readable local directory because LONG_LIST is defined
1602 * and NO_PARENT_DIR_REFERENCE is not defined, so that the
1603 * calling function should use LYListFmtParse() to create a link
1604 * to the parent directory. Otherwise, it returns FALSE. - FM
1605 */
HTDirTitles(HTStructured * target,HTParentAnchor * anchor,HTFormat format_out,int tildeIsTop)1606 BOOL HTDirTitles(HTStructured * target, HTParentAnchor *anchor,
1607 HTFormat format_out,
1608 int tildeIsTop)
1609 {
1610 const char *logical = anchor->address;
1611 char *path = HTParse(logical, "", PARSE_PATH + PARSE_PUNCTUATION);
1612 char *current;
1613 char *cp = NULL;
1614 BOOL need_parent_link = FALSE;
1615 int i;
1616
1617 #if defined(USE_DOS_DRIVES)
1618 BOOL local_link = (strlen(logical) > 18
1619 && !strncasecomp(logical, "file://localhost/", 17)
1620 && LYIsDosDrive(logical + 17));
1621 BOOL is_remote = !local_link;
1622
1623 #else
1624 #define is_remote TRUE
1625 #endif
1626
1627 /*
1628 * Check tildeIsTop for treating home directory as Welcome (assume the
1629 * tilde is not followed by a username). - FM
1630 */
1631 if (tildeIsTop && !StrNCmp(path, "/~", 2)) {
1632 if (path[2] == '\0') {
1633 path[1] = '\0';
1634 } else {
1635 for (i = 0; path[(i + 2)]; i++) {
1636 path[i] = path[(i + 2)];
1637 }
1638 path[i] = '\0';
1639 }
1640 }
1641
1642 /*
1643 * Trim out the ;type= parameter, if present. - FM
1644 */
1645 if ((cp = strrchr(path, ';')) != NULL) {
1646 if (!strncasecomp((cp + 1), "type=", 5)) {
1647 if (TOUPPER(*(cp + 6)) == 'D' ||
1648 TOUPPER(*(cp + 6)) == 'A' ||
1649 TOUPPER(*(cp + 6)) == 'I')
1650 *cp = '\0';
1651 }
1652 cp = NULL;
1653 }
1654 current = LYPathLeaf(path); /* last part or "" */
1655
1656 {
1657 char *printable = NULL;
1658
1659 #ifdef DIRED_SUPPORT
1660 printable = HTURLPath_toFile(((!strncasecomp(path, "/%2F", 4)) /* "//" ? */
1661 ? (path + 1)
1662 : path),
1663 TRUE,
1664 is_remote);
1665 if (0 == strncasecomp(printable, "/vmsysu:", 8) ||
1666 0 == strncasecomp(printable, "/anonymou.", 10)) {
1667 StrAllocCopy(cp, (printable + 1));
1668 StrAllocCopy(printable, cp);
1669 FREE(cp);
1670 }
1671 #else
1672 StrAllocCopy(printable, current);
1673 HTUnEscape(printable);
1674 #endif /* DIRED_SUPPORT */
1675
1676 HTStructured_doctype(target, format_out);
1677
1678 START(HTML_HEAD);
1679 PUTC('\n');
1680 START(HTML_TITLE);
1681 PUTS(*printable ? printable : WELCOME_MSG);
1682 PUTS(SEGMENT_DIRECTORY);
1683 END(HTML_TITLE);
1684 PUTC('\n');
1685 HTStructured_meta(target, format_out);
1686 END(HTML_HEAD);
1687 PUTC('\n');
1688
1689 START(HTML_BODY);
1690 PUTC('\n');
1691
1692 #ifdef DIRED_SUPPORT
1693 START(HTML_H2);
1694 PUTS(*printable ? SEGMENT_CURRENT_DIR : "");
1695 PUTS(*printable ? printable : WELCOME_MSG);
1696 END(HTML_H2);
1697 PUTC('\n');
1698 #else
1699 START(HTML_H1);
1700 PUTS(*printable ? printable : WELCOME_MSG);
1701 END(HTML_H1);
1702 PUTC('\n');
1703 #endif /* DIRED_SUPPORT */
1704 if (((0 == strncasecomp(printable, "vmsysu:", 7)) &&
1705 (cp = StrChr(printable, '.')) != NULL &&
1706 StrChr(cp, '/') == NULL) ||
1707 (0 == strncasecomp(printable, "anonymou.", 9) &&
1708 StrChr(printable, '/') == NULL)) {
1709 FREE(printable);
1710 FREE(path);
1711 return (need_parent_link);
1712 }
1713 FREE(printable);
1714 }
1715
1716 #ifndef NO_PARENT_DIR_REFERENCE
1717 /*
1718 * Make link back to parent directory.
1719 */
1720 if (current - path > 0
1721 && LYIsPathSep(current[-1])
1722 && current[0] != '\0') { /* was a slash AND something else too */
1723 char *parent = NULL;
1724 char *relative = NULL;
1725
1726 current[-1] = '\0';
1727 parent = strrchr(path, '/'); /* penultimate slash */
1728
1729 if ((parent &&
1730 (!strcmp(parent, "/..") ||
1731 !strncasecomp(parent, "/%2F", 4))) ||
1732 !strncasecomp(current, "%2F", 3)) {
1733 FREE(path);
1734 return (need_parent_link);
1735 }
1736
1737 relative = 0;
1738 HTSprintf0(&relative, "%s/..", current);
1739
1740 #if defined(DOSPATH) || defined(__EMX__)
1741 if (local_link) {
1742 if (parent != 0 && strlen(parent) == 3) {
1743 StrAllocCat(relative, "/.");
1744 }
1745 } else
1746 #endif
1747
1748 #if !defined (VMS)
1749 {
1750 /*
1751 * On Unix, if it's not ftp and the directory cannot be read, don't
1752 * put out a link.
1753 *
1754 * On VMS, this problem is dealt with internally by
1755 * HTVMSBrowseDir().
1756 */
1757 DIR *dp = NULL;
1758
1759 if (LYisLocalFile(logical)) {
1760 /*
1761 * We need an absolute file path for the opendir. We also need
1762 * to unescape for this test. Don't worry about %2F now, they
1763 * presumably have been dealt with above, and shouldn't appear
1764 * for local files anyway... Assume OS / filesystem will just
1765 * ignore superfluous slashes. - KW
1766 */
1767 char *fullparentpath = NULL;
1768
1769 /*
1770 * Path has been shortened above.
1771 */
1772 StrAllocCopy(fullparentpath, *path ? path : "/");
1773
1774 /*
1775 * Guard against weirdness.
1776 */
1777 if (0 == strcmp(current, "..")) {
1778 StrAllocCat(fullparentpath, "/../..");
1779 } else if (0 == strcmp(current, ".")) {
1780 StrAllocCat(fullparentpath, "/..");
1781 }
1782
1783 HTUnEscape(fullparentpath);
1784 if ((dp = opendir(fullparentpath)) == NULL) {
1785 FREE(fullparentpath);
1786 FREE(relative);
1787 FREE(path);
1788 return (need_parent_link);
1789 }
1790 closedir(dp);
1791 FREE(fullparentpath);
1792 #ifdef LONG_LIST
1793 need_parent_link = TRUE;
1794 FREE(path);
1795 FREE(relative);
1796 return (need_parent_link);
1797 #endif /* LONG_LIST */
1798 }
1799 }
1800 #endif /* !VMS */
1801 HTStartAnchor(target, "", relative);
1802 FREE(relative);
1803
1804 PUTS(SEGMENT_UP_TO);
1805 if (parent) {
1806 if ((0 == strcmp(current, ".")) ||
1807 (0 == strcmp(current, ".."))) {
1808 /*
1809 * Should not happen, but if it does, at least avoid giving
1810 * misleading info. - KW
1811 */
1812 PUTS("..");
1813 } else {
1814 char *printable = NULL;
1815
1816 StrAllocCopy(printable, parent + 1);
1817 HTUnEscape(printable);
1818 PUTS(printable);
1819 FREE(printable);
1820 }
1821 } else {
1822 PUTC('/');
1823 }
1824 END(HTML_A);
1825 PUTC('\n');
1826 }
1827 #endif /* !NO_PARENT_DIR_REFERENCE */
1828
1829 FREE(path);
1830 return (need_parent_link);
1831 }
1832
1833 #if defined HAVE_READDIR
1834 /* Send README file.
1835 * -----------------
1836 *
1837 * If a README file exists, then it is inserted into the document here.
1838 */
do_readme(HTStructured * target,const char * localname)1839 static void do_readme(HTStructured * target, const char *localname)
1840 {
1841 FILE *fp;
1842 char *readme_file_name = NULL;
1843 int ch;
1844
1845 HTSprintf0(&readme_file_name, "%s/%s", localname, HT_DIR_README_FILE);
1846
1847 fp = fopen(readme_file_name, "r");
1848
1849 if (fp) {
1850 START(HTML_PRE);
1851 while ((ch = fgetc(fp)) != EOF) {
1852 PUTC((char) ch);
1853 }
1854 END(HTML_PRE);
1855 HTDisplayPartial();
1856 fclose(fp);
1857 }
1858 FREE(readme_file_name);
1859 }
1860
1861 #define DIRED_BLOK(obj) (((DIRED *)(obj))->sort_tags)
1862 #define DIRED_NAME(obj) (((DIRED *)(obj))->file_name)
1863
1864 #define NM_cmp(a,b) ((a) < (b) ? -1 : ((a) > (b) ? 1 : 0))
1865
1866 #if defined(LONG_LIST) && defined(DIRED_SUPPORT)
file_type(const char * path)1867 static const char *file_type(const char *path)
1868 {
1869 const char *type;
1870
1871 while (*path == '.')
1872 ++path;
1873 type = StrChr(path, '.');
1874 if (type == NULL)
1875 type = "";
1876 return type;
1877 }
1878 #endif /* LONG_LIST && DIRED_SUPPORT */
1879
dired_cmp(void * a,void * b)1880 static int dired_cmp(void *a, void *b)
1881 {
1882 DIRED *p = (DIRED *) a;
1883 DIRED *q = (DIRED *) b;
1884 int code = p->sort_tags - q->sort_tags;
1885
1886 #if defined(LONG_LIST) && defined(DIRED_SUPPORT)
1887 if (code == 0) {
1888 switch (dir_list_order) {
1889 case ORDER_BY_SIZE:
1890 code = -NM_cmp(p->file_info.st_size, q->file_info.st_size);
1891 break;
1892 case ORDER_BY_DATE:
1893 code = -NM_cmp(p->file_info.st_mtime, q->file_info.st_mtime);
1894 break;
1895 case ORDER_BY_MODE:
1896 code = NM_cmp(p->file_info.st_mode, q->file_info.st_mode);
1897 break;
1898 case ORDER_BY_USER:
1899 code = NM_cmp(p->file_info.st_uid, q->file_info.st_uid);
1900 break;
1901 case ORDER_BY_GROUP:
1902 code = NM_cmp(p->file_info.st_gid, q->file_info.st_gid);
1903 break;
1904 case ORDER_BY_TYPE:
1905 code = AS_cmp(file_type(p->file_name), file_type(q->file_name));
1906 break;
1907 default:
1908 code = 0;
1909 break;
1910 }
1911 }
1912 #endif /* LONG_LIST && DIRED_SUPPORT */
1913 if (code == 0)
1914 code = AS_cmp(p->file_name, q->file_name);
1915 #if 0
1916 CTRACE((tfp, "dired_cmp(%d) ->%d\n\t%c:%s (%s)\n\t%c:%s (%s)\n",
1917 dir_list_order,
1918 code,
1919 p->sort_tags, p->file_name, file_type(p->file_name),
1920 q->sort_tags, q->file_name, file_type(q->file_name)));
1921 #endif
1922 return code;
1923 }
1924
print_local_dir(DIR * dp,char * localname,HTParentAnchor * anchor,HTFormat format_out,HTStream * sink)1925 static int print_local_dir(DIR *dp, char *localname,
1926 HTParentAnchor *anchor,
1927 HTFormat format_out,
1928 HTStream *sink)
1929 {
1930 HTStructured *target; /* HTML object */
1931 HTBTree *bt;
1932 HTStructuredClass targetClass;
1933 STRUCT_DIRENT *dirbuf;
1934 char *pathname = NULL;
1935 char *tail = NULL;
1936 const char *p;
1937 char *tmpfilename = NULL;
1938 BOOL need_parent_link = FALSE;
1939 BOOL preformatted = FALSE;
1940 int status;
1941 struct stat *actual_info;
1942
1943 #ifdef DISP_PARTIAL
1944 int num_of_entries = 0; /* lines counter */
1945 #endif
1946
1947 #ifdef S_IFLNK
1948 struct stat link_info;
1949 #endif
1950
1951 CTRACE((tfp, "print_local_dir() started\n"));
1952
1953 pathname = HTParse(anchor->address, "",
1954 PARSE_PATH + PARSE_PUNCTUATION);
1955
1956 if ((p = strrchr(pathname, '/')) == NULL)
1957 p = "/";
1958 StrAllocCopy(tail, (p + 1));
1959 FREE(pathname);
1960
1961 if (UCLYhndl_HTFile_for_unspec >= 0) {
1962 HTAnchor_setUCInfoStage(anchor,
1963 UCLYhndl_HTFile_for_unspec,
1964 UCT_STAGE_PARSER,
1965 UCT_SETBY_DEFAULT);
1966 }
1967
1968 target = HTML_new(anchor, format_out, sink);
1969 targetClass = *target->isa; /* Copy routine entry points */
1970
1971 /*
1972 * The need_parent_link flag will be set if an "Up to <parent>" link was
1973 * not created for a readable parent in HTDirTitles() because LONG_LIST is
1974 * defined and NO_PARENT_DIR_REFERENCE is not defined so that need we to
1975 * create the link via an LYListFmtParse() call. - FM
1976 */
1977 need_parent_link = HTDirTitles(target, anchor, format_out, FALSE);
1978
1979 #ifdef DIRED_SUPPORT
1980 if (!isLYNXCGI(anchor->address)) {
1981 HTAnchor_setFormat(anchor, WWW_DIRED);
1982 lynx_edit_mode = TRUE;
1983 }
1984 #endif /* DIRED_SUPPORT */
1985 if (HTDirReadme == HT_DIR_README_TOP)
1986 do_readme(target, localname);
1987
1988 bt = HTBTree_new(dired_cmp);
1989
1990 _HTProgress(READING_DIRECTORY);
1991 status = HT_LOADED; /* assume we don't get interrupted */
1992 while ((dirbuf = readdir(dp)) != NULL) {
1993 /*
1994 * While there are directory entries to be read...
1995 */
1996 DIRED *data = NULL;
1997
1998 #ifdef STRUCT_DIRENT__D_INO
1999 if (dirbuf->d_ino == 0)
2000 /*
2001 * If the entry is not being used, skip it.
2002 */
2003 continue;
2004 #endif
2005 /*
2006 * Skip self, parent if handled in HTDirTitles() or if
2007 * NO_PARENT_DIR_REFERENCE is not defined, and any dot files if
2008 * no_dotfiles is set or show_dotfiles is not set. - FM
2009 */
2010 if (!strcmp(dirbuf->d_name, ".") /* self */ ||
2011 (!strcmp(dirbuf->d_name, "..") /* parent */ &&
2012 need_parent_link == FALSE) ||
2013 ((strcmp(dirbuf->d_name, "..")) &&
2014 (dirbuf->d_name[0] == '.' &&
2015 (no_dotfiles || !show_dotfiles))))
2016 continue;
2017
2018 StrAllocCopy(tmpfilename, localname);
2019 /*
2020 * If filename is not root directory, add trailing separator.
2021 */
2022 LYAddPathSep(&tmpfilename);
2023
2024 StrAllocCat(tmpfilename, dirbuf->d_name);
2025 data = (DIRED *) malloc(sizeof(DIRED) + strlen(dirbuf->d_name) + 4);
2026 if (data == NULL) {
2027 status = HT_PARTIAL_CONTENT;
2028 break;
2029 }
2030 LYTrimPathSep(tmpfilename);
2031
2032 actual_info = &(data->file_info);
2033 #ifdef S_IFLNK
2034 if (lstat(tmpfilename, actual_info) < 0) {
2035 actual_info->st_mode = 0;
2036 } else {
2037 if (S_ISLNK(actual_info->st_mode)) {
2038 actual_info = &link_info;
2039 if (stat(tmpfilename, actual_info) < 0)
2040 actual_info->st_mode = 0;
2041 }
2042 }
2043 #else
2044 if (stat(tmpfilename, actual_info) < 0)
2045 actual_info->st_mode = 0;
2046 #endif
2047
2048 strcpy(data->file_name, dirbuf->d_name);
2049 #ifndef DIRED_SUPPORT
2050 if (S_ISDIR(actual_info->st_mode)) {
2051 data->sort_tags = 'D';
2052 } else {
2053 data->sort_tags = 'F';
2054 /* D & F to have first directories, then files */
2055 }
2056 #else
2057 if (S_ISDIR(actual_info->st_mode)) {
2058 if (dir_list_style == MIXED_STYLE) {
2059 data->sort_tags = ' ';
2060 LYAddPathSep0(data->file_name);
2061 } else if (!strcmp(dirbuf->d_name, "..")) {
2062 data->sort_tags = 'A';
2063 } else {
2064 data->sort_tags = 'D';
2065 }
2066 } else if (dir_list_style == MIXED_STYLE) {
2067 data->sort_tags = ' ';
2068 } else if (dir_list_style == FILES_FIRST) {
2069 data->sort_tags = 'C';
2070 /* C & D to have first files, then directories */
2071 } else {
2072 data->sort_tags = 'F';
2073 }
2074 #endif /* !DIRED_SUPPORT */
2075 /*
2076 * Sort dirname in the tree bt.
2077 */
2078 HTBTree_add(bt, data);
2079
2080 #ifdef DISP_PARTIAL
2081 /* optimize for expensive operation: */
2082 if (num_of_entries % (partial_threshold > 0 ?
2083 partial_threshold : display_lines) == 0) {
2084 if (HTCheckForInterrupt()) {
2085 status = HT_PARTIAL_CONTENT;
2086 break;
2087 }
2088 }
2089 num_of_entries++;
2090 #endif /* DISP_PARTIAL */
2091
2092 } /* end while directory entries left to read */
2093
2094 if (status != HT_PARTIAL_CONTENT)
2095 _HTProgress(OPERATION_OK);
2096 else
2097 CTRACE((tfp, "Reading the directory interrupted by user\n"));
2098
2099 /*
2100 * Run through tree printing out in order.
2101 */
2102 {
2103 HTBTElement *next_element = HTBTree_next(bt, NULL);
2104
2105 /* pick up the first element of the list */
2106 int num_of_entries_output = 0; /* lines counter */
2107
2108 char state;
2109
2110 /* I for initial (.. file),
2111 D for directory file,
2112 F for file */
2113
2114 #ifdef DIRED_SUPPORT
2115 char test;
2116 #endif /* DIRED_SUPPORT */
2117 state = 'I';
2118
2119 while (next_element != NULL) {
2120 DIRED *entry;
2121
2122 #ifndef DISP_PARTIAL
2123 if (num_of_entries_output % HTMAX(display_lines, 10) == 0) {
2124 if (HTCheckForInterrupt()) {
2125 _HTProgress(TRANSFER_INTERRUPTED);
2126 status = HT_PARTIAL_CONTENT;
2127 break;
2128 }
2129 }
2130 #endif
2131 StrAllocCopy(tmpfilename, localname);
2132 /*
2133 * If filename is not root directory.
2134 */
2135 LYAddPathSep(&tmpfilename);
2136
2137 entry = (DIRED *) (HTBTree_object(next_element));
2138 /*
2139 * Append the current entry's filename to the path.
2140 */
2141 StrAllocCat(tmpfilename, entry->file_name);
2142 HTSimplify(tmpfilename, LYIsPathSep(*tmpfilename));
2143 /*
2144 * Output the directory entry.
2145 */
2146 if (strcmp(DIRED_NAME(HTBTree_object(next_element)), "..")) {
2147 #ifdef DIRED_SUPPORT
2148 test =
2149 (char) (DIRED_BLOK(HTBTree_object(next_element))
2150 == 'D' ? 'D' : 'F');
2151 if (state != test) {
2152 #ifndef LONG_LIST
2153 if (dir_list_style == FILES_FIRST) {
2154 if (state == 'F') {
2155 END(HTML_DIR);
2156 PUTC('\n');
2157 }
2158 } else if (dir_list_style != MIXED_STYLE)
2159 if (state == 'D') {
2160 END(HTML_DIR);
2161 PUTC('\n');
2162 }
2163 #endif /* !LONG_LIST */
2164 state =
2165 (char) (DIRED_BLOK(HTBTree_object(next_element))
2166 == 'D' ? 'D' : 'F');
2167 if (preformatted) {
2168 END(HTML_PRE);
2169 PUTC('\n');
2170 preformatted = FALSE;
2171 }
2172 START(HTML_H2);
2173 if (dir_list_style != MIXED_STYLE) {
2174 START(HTML_EM);
2175 PUTS(state == 'D'
2176 ? LABEL_SUBDIRECTORIES
2177 : LABEL_FILES);
2178 END(HTML_EM);
2179 }
2180 END(HTML_H2);
2181 PUTC('\n');
2182 #ifndef LONG_LIST
2183 START(HTML_DIR);
2184 PUTC('\n');
2185 #endif /* !LONG_LIST */
2186 }
2187 #else
2188 if (state != DIRED_BLOK(HTBTree_object(next_element))) {
2189 #ifndef LONG_LIST
2190 if (state == 'D') {
2191 END(HTML_DIR);
2192 PUTC('\n');
2193 }
2194 #endif /* !LONG_LIST */
2195 state =
2196 (char) (DIRED_BLOK(HTBTree_object(next_element))
2197 == 'D' ? 'D' : 'F');
2198 if (preformatted) {
2199 END(HTML_PRE);
2200 PUTC('\n');
2201 preformatted = FALSE;
2202 }
2203 START(HTML_H2);
2204 START(HTML_EM);
2205 PUTS(state == 'D'
2206 ? LABEL_SUBDIRECTORIES
2207 : LABEL_FILES);
2208 END(HTML_EM);
2209 END(HTML_H2);
2210 PUTC('\n');
2211 #ifndef LONG_LIST
2212 START(HTML_DIR);
2213 PUTC('\n');
2214 #endif /* !LONG_LIST */
2215 }
2216 #endif /* DIRED_SUPPORT */
2217 #ifndef LONG_LIST
2218 START(HTML_LI);
2219 #endif /* !LONG_LIST */
2220 }
2221 if (!preformatted) {
2222 START(HTML_PRE);
2223 PUTC('\n');
2224 preformatted = TRUE;
2225 }
2226 #ifdef LONG_LIST
2227 LYListFmtParse(list_format, entry, tmpfilename, target, tail);
2228 #else
2229 HTDirEntry(target, tail, entry->file_name);
2230 PUTS(entry->file_name);
2231 END(HTML_A);
2232 MAYBE_END(HTML_LI);
2233 PUTC('\n');
2234 #endif /* LONG_LIST */
2235
2236 next_element = HTBTree_next(bt, next_element);
2237 /* pick up the next element of the list;
2238 if none, return NULL */
2239
2240 /* optimize for expensive operation: */
2241 #ifdef DISP_PARTIAL
2242 if (num_of_entries_output %
2243 ((partial_threshold > 0)
2244 ? partial_threshold
2245 : display_lines) == 0) {
2246 /* num_of_entries, num_of_entries_output... */
2247 HTDisplayPartial();
2248
2249 if (HTCheckForInterrupt()) {
2250 _HTProgress(TRANSFER_INTERRUPTED);
2251 status = HT_PARTIAL_CONTENT;
2252 break;
2253 }
2254 }
2255 num_of_entries_output++;
2256 #endif /* DISP_PARTIAL */
2257
2258 } /* end while next_element */
2259
2260 if (status == HT_LOADED) {
2261 if (state == 'I') {
2262 START(HTML_P);
2263 PUTS("Empty Directory");
2264 }
2265 #ifndef LONG_LIST
2266 else
2267 END(HTML_DIR);
2268 #endif /* !LONG_LIST */
2269 }
2270 } /* end printing out the tree in order */
2271 if (preformatted) {
2272 END(HTML_PRE);
2273 PUTC('\n');
2274 }
2275 END(HTML_BODY);
2276 PUTC('\n');
2277
2278 FREE(tmpfilename);
2279 FREE(tail);
2280 HTBTreeAndObject_free(bt);
2281
2282 if (status == HT_LOADED) {
2283 if (HTDirReadme == HT_DIR_README_BOTTOM)
2284 do_readme(target, localname);
2285 FREE_TARGET;
2286 } else {
2287 ABORT_TARGET;
2288 }
2289 HTFinishDisplayPartial();
2290 return status; /* document loaded, maybe partial */
2291 }
2292 #endif /* HAVE_READDIR */
2293
2294 #ifndef VMS
HTStat(const char * filename,struct stat * data)2295 int HTStat(const char *filename,
2296 struct stat *data)
2297 {
2298 int result = -1;
2299 size_t len = strlen(filename);
2300
2301 if (len != 0 && LYIsPathSep(filename[len - 1])) {
2302 char *temp_name = NULL;
2303
2304 HTSprintf0(&temp_name, "%s.", filename);
2305 result = HTStat(temp_name, data);
2306 FREE(temp_name);
2307 } else {
2308 result = stat(filename, data);
2309 #ifdef _WINDOWS
2310 /*
2311 * Someone claims that stat() doesn't give the proper result for a
2312 * directory on Windows.
2313 */
2314 if (result == -1
2315 && access(filename, 0) == 0) {
2316 data->st_mode = S_IFDIR;
2317 result = 0;
2318 }
2319 #endif
2320 }
2321 return result;
2322 }
2323 #endif
2324
2325 #if defined(USE_ZLIB) || defined(USE_BZLIB)
sniffStream(FILE * fp,char * buffer,size_t needed)2326 static BOOL sniffStream(FILE *fp, char *buffer, size_t needed)
2327 {
2328 long offset = ftell(fp);
2329 BOOL result = FALSE;
2330
2331 if (offset >= 0) {
2332 if (fread(buffer, sizeof(char), needed, fp) == needed) {
2333 result = TRUE;
2334 }
2335 if (fseek(fp, offset, SEEK_SET) < 0) {
2336 CTRACE((tfp, "error seeking in stream\n"));
2337 result = FALSE;
2338 }
2339 }
2340 return result;
2341 }
2342 #endif
2343
2344 #ifdef USE_ZLIB
isGzipStream(FILE * fp)2345 static BOOL isGzipStream(FILE *fp)
2346 {
2347 char buffer[3];
2348 BOOL result;
2349
2350 if (sniffStream(fp, buffer, sizeof(buffer))
2351 && !MemCmp(buffer, "\037\213", sizeof(buffer) - 1)) {
2352 result = TRUE;
2353 } else {
2354 CTRACE((tfp, "not a gzip-stream\n"));
2355 result = FALSE;
2356 }
2357 return result;
2358 }
2359
2360 /*
2361 * Strictly speaking, DEFLATE has no header bytes. But decode what we can,
2362 * (to eliminate the one "reserved" pattern) and provide a trace. See RFC-1951
2363 * discussion of BFINAL and BTYPE.
2364 */
isDeflateStream(FILE * fp)2365 static BOOL isDeflateStream(FILE *fp)
2366 {
2367 char buffer[3];
2368 BOOL result = FALSE;
2369
2370 if (sniffStream(fp, buffer, sizeof(buffer))) {
2371 int bit1 = ((buffer[0] >> 0) & 1);
2372 int bit2 = ((buffer[0] >> 1) & 1);
2373 int bit3 = ((buffer[0] >> 2) & 1);
2374 int btype = ((bit3 << 1) + bit2);
2375
2376 if (!MemCmp(buffer, "\170\234", sizeof(buffer) - 1)) {
2377 result = TRUE;
2378 CTRACE((tfp, "isDeflate: assume zlib-wrapped deflate\n"));
2379 } else if (btype == 3) {
2380 CTRACE((tfp, "isDeflate: not a deflate-stream\n"));
2381 } else {
2382 CTRACE((tfp, "isDeflate: %send block, %s compression\n",
2383 (bit1 ? "" : "non-"),
2384 (btype == 0
2385 ? "no"
2386 : (btype == 1
2387 ? "static Huffman"
2388 : "dynamic Huffman"))));
2389 result = TRUE;
2390 }
2391 }
2392 return result;
2393 }
2394 #endif
2395
2396 #ifdef USE_BZLIB
isBzip2Stream(FILE * fp)2397 static BOOL isBzip2Stream(FILE *fp)
2398 {
2399 char buffer[6];
2400 BOOL result;
2401
2402 if (sniffStream(fp, buffer, sizeof(buffer))
2403 && !MemCmp(buffer, "BZh", 3)
2404 && isdigit(UCH(buffer[3]))
2405 && isdigit(UCH(buffer[4]))) {
2406 result = TRUE;
2407 } else {
2408 CTRACE((tfp, "not a bzip2-stream\n"));
2409 result = FALSE;
2410 }
2411 return result;
2412 }
2413 #endif
2414
2415 #ifdef VMS
2416 #define FOPEN_MODE(bin) "r", "shr=put", "shr=upd"
2417 #define DOT_STRING "._-" /* FIXME: should we check if suffix is after ']' or ':' ? */
2418 #else
2419 #define FOPEN_MODE(bin) (bin ? BIN_R : "r")
2420 #define DOT_STRING "."
2421 #endif
2422
decompressAndParse(HTParentAnchor * anchor,HTFormat format_out,HTStream * sink,char * nodename GCC_UNUSED,char * filename,HTAtom * myEncoding,HTFormat format,int * statusp)2423 static int decompressAndParse(HTParentAnchor *anchor,
2424 HTFormat format_out,
2425 HTStream *sink,
2426 char *nodename GCC_UNUSED,
2427 char *filename,
2428 HTAtom *myEncoding,
2429 HTFormat format,
2430 int *statusp)
2431 {
2432 HTAtom *encoding = 0;
2433
2434 #ifdef USE_ZLIB
2435 FILE *zzfp = 0;
2436 gzFile gzfp = 0;
2437 #endif /* USE_ZLIB */
2438 #ifdef USE_BZLIB
2439 BZFILE *bzfp = 0;
2440 #endif /* USE_ZLIB */
2441 #if defined(USE_ZLIB) || defined(USE_BZLIB)
2442 CompressFileType internal_decompress = cftNone;
2443 BOOL failed_decompress = NO;
2444 #endif
2445 int rootlen = 0;
2446 char *localname = filename;
2447 int bin;
2448 FILE *fp;
2449 int result = FALSE;
2450
2451 #ifdef VMS
2452 /*
2453 * Assume that the file is in Unix-style syntax if it contains a '/' after
2454 * the leading one. @@
2455 */
2456 localname = (StrChr(localname + 1, '/')
2457 ? HTVMS_name(nodename, localname)
2458 : localname + 1);
2459 #endif /* VMS */
2460
2461 bin = HTCompressFileType(filename, ".", &rootlen) != cftNone;
2462 fp = fopen(localname, FOPEN_MODE(bin));
2463
2464 #ifdef VMS
2465 /*
2466 * If the file wasn't VMS syntax, then perhaps it is Ultrix.
2467 */
2468 if (!fp) {
2469 char *ultrixname = 0;
2470
2471 CTRACE((tfp, "HTLoadFile: Can't open as %s\n", localname));
2472 HTSprintf0(&ultrixname, "%s::\"%s\"", nodename, filename);
2473 fp = fopen(ultrixname, FOPEN_MODE(bin));
2474 if (!fp) {
2475 CTRACE((tfp, "HTLoadFile: Can't open as %s\n", ultrixname));
2476 }
2477 FREE(ultrixname);
2478 }
2479 #endif /* VMS */
2480 CTRACE((tfp, "HTLoadFile: Opening `%s' gives %p\n", localname, (void *) fp));
2481 if (fp) { /* Good! */
2482 if (HTEditable(localname)) {
2483 HTAtom *put = HTAtom_for("PUT");
2484 HTList *methods = HTAnchor_methods(anchor);
2485
2486 if (HTList_indexOf(methods, put) == (-1)) {
2487 HTList_addObject(methods, put);
2488 }
2489 }
2490 /*
2491 * Fake a Content-Encoding for compressed files. - FM
2492 */
2493 if (!IsUnityEnc(myEncoding)) {
2494 /*
2495 * We already know from the call to HTFileFormat that
2496 * this is a compressed file, no need to look at the filename
2497 * again. - kw
2498 */
2499 #if defined(USE_ZLIB) || defined(USE_BZLIB)
2500 CompressFileType method = HTEncodingToCompressType(HTAtom_name(myEncoding));
2501 #endif
2502
2503 #define isDOWNLOAD(m) (strcmp(format_out->name, "www/download") && (method == m))
2504 #ifdef USE_ZLIB
2505 if (isDOWNLOAD(cftGzip)) {
2506 if (isGzipStream(fp)) {
2507 fclose(fp);
2508 fp = 0;
2509 gzfp = gzopen(localname, BIN_R);
2510
2511 CTRACE((tfp, "HTLoadFile: gzopen of `%s' gives %p\n",
2512 localname, (void *) gzfp));
2513 }
2514 internal_decompress = cftGzip;
2515 } else if (isDOWNLOAD(cftDeflate)) {
2516 if (isDeflateStream(fp)) {
2517 zzfp = fp;
2518 fp = 0;
2519
2520 CTRACE((tfp, "HTLoadFile: zzopen of `%s' gives %p\n",
2521 localname, (void *) zzfp));
2522 }
2523 internal_decompress = cftDeflate;
2524 } else
2525 #endif /* USE_ZLIB */
2526 #ifdef USE_BZLIB
2527 if (isDOWNLOAD(cftBzip2)) {
2528 if (isBzip2Stream(fp)) {
2529 fclose(fp);
2530 fp = 0;
2531 bzfp = BZ2_bzopen(localname, BIN_R);
2532
2533 CTRACE((tfp, "HTLoadFile: bzopen of `%s' gives %p\n",
2534 localname, bzfp));
2535 }
2536 internal_decompress = cftBzip2;
2537 } else
2538 #endif /* USE_BZLIB */
2539 {
2540 StrAllocCopy(anchor->content_type, format->name);
2541 StrAllocCopy(anchor->content_encoding, HTAtom_name(myEncoding));
2542 format = HTAtom_for("www/compressed");
2543 }
2544 } else {
2545 CompressFileType cft = HTCompressFileType(localname, DOT_STRING, &rootlen);
2546
2547 if (cft != cftNone) {
2548 char *cp = NULL;
2549
2550 StrAllocCopy(cp, localname);
2551 cp[rootlen] = '\0';
2552 format = HTFileFormat(cp, &encoding, NULL);
2553 FREE(cp);
2554 format = HTCharsetFormat(format, anchor,
2555 UCLYhndl_HTFile_for_unspec);
2556 StrAllocCopy(anchor->content_type, format->name);
2557 }
2558
2559 switch (cft) {
2560 case cftCompress:
2561 StrAllocCopy(anchor->content_encoding, "x-compress");
2562 format = HTAtom_for("www/compressed");
2563 break;
2564 case cftDeflate:
2565 StrAllocCopy(anchor->content_encoding, "x-deflate");
2566 #ifdef USE_ZLIB
2567 if (strcmp(format_out->name, "www/download") != 0) {
2568 if (isDeflateStream(fp)) {
2569 zzfp = fp;
2570 fp = 0;
2571
2572 CTRACE((tfp, "HTLoadFile: zzopen of `%s' gives %p\n",
2573 localname, (void *) zzfp));
2574 }
2575 internal_decompress = cftDeflate;
2576 }
2577 #else /* USE_ZLIB */
2578 format = HTAtom_for("www/compressed");
2579 #endif /* USE_ZLIB */
2580 break;
2581 case cftGzip:
2582 StrAllocCopy(anchor->content_encoding, "x-gzip");
2583 #ifdef USE_ZLIB
2584 if (strcmp(format_out->name, "www/download") != 0) {
2585 if (isGzipStream(fp)) {
2586 fclose(fp);
2587 fp = 0;
2588 gzfp = gzopen(localname, BIN_R);
2589
2590 CTRACE((tfp, "HTLoadFile: gzopen of `%s' gives %p\n",
2591 localname, (void *) gzfp));
2592 }
2593 internal_decompress = cftGzip;
2594 }
2595 #else /* USE_ZLIB */
2596 format = HTAtom_for("www/compressed");
2597 #endif /* USE_ZLIB */
2598 break;
2599 case cftBzip2:
2600 StrAllocCopy(anchor->content_encoding, "x-bzip2");
2601 #ifdef USE_BZLIB
2602 if (strcmp(format_out->name, "www/download") != 0) {
2603 if (isBzip2Stream(fp)) {
2604 fclose(fp);
2605 fp = 0;
2606 bzfp = BZ2_bzopen(localname, BIN_R);
2607
2608 CTRACE((tfp, "HTLoadFile: bzopen of `%s' gives %p\n",
2609 localname, bzfp));
2610 }
2611 internal_decompress = cftBzip2;
2612 }
2613 #else /* USE_BZLIB */
2614 format = HTAtom_for("www/compressed");
2615 #endif /* USE_BZLIB */
2616 break;
2617 case cftNone:
2618 break;
2619 }
2620 }
2621 #if defined(USE_ZLIB) || defined(USE_BZLIB)
2622 if (internal_decompress != cftNone) {
2623 switch (internal_decompress) {
2624 #ifdef USE_ZLIB
2625 case cftDeflate:
2626 failed_decompress = (BOOLEAN) (zzfp == NULL);
2627 break;
2628 case cftCompress:
2629 case cftGzip:
2630 failed_decompress = (BOOLEAN) (gzfp == NULL);
2631 break;
2632 #endif
2633 #ifdef USE_BZLIB
2634 case cftBzip2:
2635 failed_decompress = (BOOLEAN) (bzfp == NULL);
2636 break;
2637 #endif
2638 default:
2639 failed_decompress = YES;
2640 break;
2641 }
2642 if (failed_decompress) {
2643 *statusp = HTLoadError(NULL,
2644 -(HT_ERROR),
2645 FAILED_OPEN_COMPRESSED_FILE);
2646 } else {
2647 char *sugfname = NULL;
2648
2649 if (anchor->SugFname) {
2650 StrAllocCopy(sugfname, anchor->SugFname);
2651 } else {
2652 char *anchor_path = HTParse(anchor->address, "",
2653 PARSE_PATH + PARSE_PUNCTUATION);
2654 char *lastslash;
2655
2656 HTUnEscape(anchor_path);
2657 lastslash = strrchr(anchor_path, '/');
2658 if (lastslash)
2659 StrAllocCopy(sugfname, lastslash + 1);
2660 FREE(anchor_path);
2661 }
2662 FREE(anchor->content_encoding);
2663 if (sugfname && *sugfname)
2664 HTCheckFnameForCompression(&sugfname, anchor,
2665 TRUE);
2666 if (sugfname && *sugfname)
2667 StrAllocCopy(anchor->SugFname, sugfname);
2668 FREE(sugfname);
2669 #ifdef USE_BZLIB
2670 if (bzfp)
2671 *statusp = HTParseBzFile(format, format_out,
2672 anchor,
2673 bzfp, sink);
2674 #endif
2675 #ifdef USE_ZLIB
2676 if (gzfp)
2677 *statusp = HTParseGzFile(format, format_out,
2678 anchor,
2679 gzfp, sink);
2680 else if (zzfp)
2681 *statusp = HTParseZzFile(format, format_out,
2682 anchor,
2683 zzfp, sink);
2684 #endif
2685 }
2686 } else
2687 #endif /* USE_ZLIB || USE_BZLIB */
2688 {
2689 *statusp = HTParseFile(format, format_out, anchor, fp, sink);
2690 }
2691 if (fp != 0) {
2692 fclose(fp);
2693 fp = 0;
2694 }
2695 result = TRUE;
2696 } /* If successful open */
2697 return result;
2698 }
2699
2700 /* Load a document.
2701 * ----------------
2702 *
2703 * On entry:
2704 * addr must point to the fully qualified hypertext reference.
2705 * This is the physical address of the file
2706 *
2707 * On exit:
2708 * returns <0 Error has occurred.
2709 * HTLOADED OK
2710 *
2711 */
HTLoadFile(const char * addr,HTParentAnchor * anchor,HTFormat format_out,HTStream * sink)2712 int HTLoadFile(const char *addr,
2713 HTParentAnchor *anchor,
2714 HTFormat format_out,
2715 HTStream *sink)
2716 {
2717 char *filename = NULL;
2718 char *acc_method = NULL;
2719 HTFormat format;
2720 char *nodename = NULL;
2721 char *newname = NULL; /* Simplified name of file */
2722 HTAtom *myEncoding = NULL; /* enc of this file, may be gzip etc. */
2723 int status = -1;
2724
2725 #ifndef DISABLE_FTP
2726 char *ftp_newhost;
2727 #endif
2728
2729 #ifdef VMS
2730 struct stat stat_info;
2731 #endif /* VMS */
2732
2733 /*
2734 * Reduce the filename to a basic form (hopefully unique!).
2735 */
2736 StrAllocCopy(newname, addr);
2737 filename = HTParse(newname, "", PARSE_PATH | PARSE_PUNCTUATION);
2738 nodename = HTParse(newname, "", PARSE_HOST);
2739
2740 /*
2741 * If access is ftp, or file is on another host, invoke ftp now.
2742 */
2743 acc_method = HTParse(newname, "", PARSE_ACCESS);
2744 if (strcmp("ftp", acc_method) == 0 ||
2745 (!LYSameHostname("localhost", nodename) &&
2746 !LYSameHostname(nodename, HTHostName()))) {
2747 status = -1;
2748 FREE(newname);
2749 FREE(filename);
2750 FREE(nodename);
2751 FREE(acc_method);
2752 #ifndef DISABLE_FTP
2753 ftp_newhost = HTParse(addr, "", PARSE_HOST);
2754 if (strcmp(ftp_lasthost, ftp_newhost))
2755 ftp_local_passive = ftp_passive;
2756
2757 status = HTFTPLoad(addr, anchor, format_out, sink);
2758
2759 if (ftp_passive == ftp_local_passive) {
2760 if ((status >= 400) || (status < 0)) {
2761 ftp_local_passive = (BOOLEAN) !ftp_passive;
2762 status = HTFTPLoad(addr, anchor, format_out, sink);
2763 }
2764 }
2765
2766 free(ftp_lasthost);
2767 ftp_lasthost = ftp_newhost;
2768 #endif /* DISABLE_FTP */
2769 return status;
2770 } else {
2771 FREE(newname);
2772 FREE(acc_method);
2773 }
2774 #if defined(VMS) || defined(USE_DOS_DRIVES)
2775 HTUnEscape(filename);
2776 #endif /* VMS */
2777
2778 /*
2779 * Determine the format and encoding mapped to any suffix.
2780 */
2781 if (anchor->content_type && anchor->content_encoding) {
2782 /*
2783 * If content_type and content_encoding are BOTH already set in the
2784 * anchor object, we believe it and don't try to derive format and
2785 * encoding from the filename. - kw
2786 */
2787 format = HTAtom_for(anchor->content_type);
2788 myEncoding = HTAtom_for(anchor->content_encoding);
2789 } else {
2790 int default_UCLYhndl = UCLYhndl_HTFile_for_unspec;
2791
2792 if (force_old_UCLYhndl_on_reload) {
2793 force_old_UCLYhndl_on_reload = FALSE;
2794 default_UCLYhndl = forced_UCLYhdnl;
2795 }
2796
2797 format = HTFileFormat(filename, &myEncoding, NULL);
2798
2799 /*
2800 * Check the format for an extended MIME charset value, and act on it
2801 * if present. Otherwise, assume what is indicated by the last
2802 * parameter (fallback will effectively be UCLYhndl_for_unspec, by
2803 * default ISO-8859-1). - kw
2804 */
2805 format = HTCharsetFormat(format, anchor, default_UCLYhndl);
2806 }
2807
2808 #ifdef VMS
2809 /*
2810 * Check to see if the 'filename' is in fact a directory. If it is create
2811 * a new hypertext object containing a list of files and subdirectories
2812 * contained in the directory. All of these are links to the directories
2813 * or files listed.
2814 */
2815 if (HTStat(filename, &stat_info) == -1) {
2816 CTRACE((tfp, "HTLoadFile: Can't stat %s\n", filename));
2817 } else {
2818 if (S_ISDIR(stat_info.st_mode)) {
2819 if (HTDirAccess == HT_DIR_FORBID) {
2820 FREE(filename);
2821 FREE(nodename);
2822 return HTLoadError(sink, 403, DISALLOWED_DIR_SCAN);
2823 }
2824
2825 if (HTDirAccess == HT_DIR_SELECTIVE) {
2826 char *enable_file_name = NULL;
2827
2828 HTSprintf0(&enable_file_name, "%s/%s", filename, HT_DIR_ENABLE_FILE);
2829 if (HTStat(enable_file_name, &stat_info) == -1) {
2830 FREE(filename);
2831 FREE(nodename);
2832 FREE(enable_file_name);
2833 return HTLoadError(sink, 403, DISALLOWED_SELECTIVE_ACCESS);
2834 }
2835 }
2836
2837 FREE(filename);
2838 FREE(nodename);
2839 return HTVMSBrowseDir(addr, anchor, format_out, sink);
2840 }
2841 }
2842
2843 if (decompressAndParse(anchor,
2844 format_out,
2845 sink,
2846 nodename,
2847 filename,
2848 myEncoding,
2849 format,
2850 &status)) {
2851 FREE(nodename);
2852 FREE(filename);
2853 return status;
2854 }
2855 FREE(filename);
2856
2857 #else /* not VMS: */
2858
2859 FREE(filename);
2860
2861 /*
2862 * For unix, we try to translate the name into the name of a transparently
2863 * mounted file.
2864 *
2865 * Not allowed in secure (HTClientHost) situations. TBL 921019
2866 */
2867 #ifndef NO_UNIX_IO
2868 /* Need protection here for telnet server but not httpd server. */
2869
2870 if (!HTSecure) { /* try local file system */
2871 char *localname = HTLocalName(addr);
2872 struct stat dir_info;
2873
2874 #ifdef HAVE_READDIR
2875 /*
2876 * Multiformat handling.
2877 *
2878 * If needed, scan directory to find a good file. Bug: We don't stat
2879 * the file to find the length.
2880 */
2881 if ((strlen(localname) > strlen(MULTI_SUFFIX)) &&
2882 (0 == strcmp(localname + strlen(localname) - strlen(MULTI_SUFFIX),
2883 MULTI_SUFFIX))) {
2884 DIR *dp = 0;
2885 BOOL forget_multi = NO;
2886
2887 STRUCT_DIRENT *dirbuf;
2888 float best = (float) NO_VALUE_FOUND; /* So far best is bad */
2889 HTFormat best_rep = NULL; /* Set when rep found */
2890 HTAtom *best_enc = NULL;
2891 char *best_name = NULL; /* Best dir entry so far */
2892
2893 char *base = strrchr(localname, '/');
2894 size_t baselen = 0;
2895
2896 if (!base || base == localname) {
2897 forget_multi = YES;
2898 } else {
2899 *base++ = '\0'; /* Just got directory name */
2900 baselen = strlen(base) - strlen(MULTI_SUFFIX);
2901 base[baselen] = '\0'; /* Chop off suffix */
2902
2903 dp = opendir(localname);
2904 }
2905 if (forget_multi || !dp) {
2906 FREE(localname);
2907 FREE(nodename);
2908 return HTLoadError(sink, 500, FAILED_DIR_SCAN);
2909 }
2910
2911 while ((dirbuf = readdir(dp)) != NULL) {
2912 /*
2913 * While there are directory entries to be read...
2914 */
2915 #ifdef STRUCT_DIRENT__D_INO
2916 if (dirbuf->d_ino == 0)
2917 continue; /* if the entry is not being used, skip it */
2918 #endif
2919 if (strlen(dirbuf->d_name) > baselen && /* Match? */
2920 !StrNCmp(dirbuf->d_name, base, baselen)) {
2921 HTAtom *enc;
2922 HTFormat rep = HTFileFormat(dirbuf->d_name, &enc, NULL);
2923 float filevalue = HTFileValue(dirbuf->d_name);
2924 float value = HTStackValue(rep, format_out,
2925 filevalue,
2926 0L /* @@@@@@ */ );
2927
2928 if (value <= 0.0) {
2929 int rootlen = 0;
2930 const char *atomname = NULL;
2931 CompressFileType cft =
2932 HTCompressFileType(dirbuf->d_name, ".", &rootlen);
2933 char *cp = NULL;
2934
2935 enc = NULL;
2936 if (cft != cftNone) {
2937 StrAllocCopy(cp, dirbuf->d_name);
2938 cp[rootlen] = '\0';
2939 format = HTFileFormat(cp, NULL, NULL);
2940 FREE(cp);
2941 value = HTStackValue(format, format_out,
2942 filevalue, 0L);
2943 }
2944 switch (cft) {
2945 case cftCompress:
2946 atomname = "application/x-compressed";
2947 break;
2948 case cftGzip:
2949 atomname = "application/x-gzip";
2950 break;
2951 case cftDeflate:
2952 atomname = "application/x-deflate";
2953 break;
2954 case cftBzip2:
2955 atomname = "application/x-bzip2";
2956 break;
2957 case cftNone:
2958 break;
2959 }
2960
2961 if (atomname != NULL) {
2962 value = HTStackValue(format, format_out,
2963 filevalue, 0L);
2964 if (value <= 0.0) {
2965 format = HTAtom_for(atomname);
2966 value = HTStackValue(format, format_out,
2967 filevalue, 0L);
2968 }
2969 if (value <= 0.0) {
2970 format = HTAtom_for("www/compressed");
2971 value = HTStackValue(format, format_out,
2972 filevalue, 0L);
2973 }
2974 }
2975 }
2976 if (value < NO_VALUE_FOUND) {
2977 CTRACE((tfp,
2978 "HTLoadFile: value of presenting %s is %f\n",
2979 HTAtom_name(rep), value));
2980 if (value > best) {
2981 best_rep = rep;
2982 best_enc = enc;
2983 best = value;
2984 StrAllocCopy(best_name, dirbuf->d_name);
2985 }
2986 } /* if best so far */
2987 }
2988 /* if match */
2989 } /* end while directory entries left to read */
2990 closedir(dp);
2991
2992 if (best_rep) {
2993 format = best_rep;
2994 myEncoding = best_enc;
2995 base[-1] = '/'; /* Restore directory name */
2996 base[0] = '\0';
2997 StrAllocCat(localname, best_name);
2998 FREE(best_name);
2999 } else { /* If not found suitable file */
3000 FREE(localname);
3001 FREE(nodename);
3002 return HTLoadError(sink, 403, FAILED_NO_REPRESENTATION);
3003 }
3004 /*NOTREACHED */
3005 }
3006 /* if multi suffix */
3007 /*
3008 * Check to see if the 'localname' is in fact a directory. If it is
3009 * create a new hypertext object containing a list of files and
3010 * subdirectories contained in the directory. All of these are links
3011 * to the directories or files listed. NB This assumes the existence
3012 * of a type 'STRUCT_DIRENT', which will hold the directory entry, and
3013 * a type 'DIR' which is used to point to the current directory being
3014 * read.
3015 */
3016 #if defined(USE_DOS_DRIVES)
3017 if (strlen(localname) == 2 && LYIsDosDrive(localname))
3018 LYAddPathSep(&localname);
3019 #endif
3020 if (HTStat(localname, &dir_info) == -1) /* get file information */
3021 {
3022 /* if can't read file information */
3023 CTRACE((tfp, "HTLoadFile: can't stat %s\n", localname));
3024
3025 } else { /* Stat was OK */
3026
3027 if (S_ISDIR(dir_info.st_mode)) {
3028 /*
3029 * If localname is a directory.
3030 */
3031 DIR *dp;
3032 struct stat file_info;
3033
3034 CTRACE((tfp, "%s is a directory\n", localname));
3035
3036 /*
3037 * Check directory access. Selective access means only those
3038 * directories containing a marker file can be browsed.
3039 */
3040 if (HTDirAccess == HT_DIR_FORBID) {
3041 FREE(localname);
3042 FREE(nodename);
3043 return HTLoadError(sink, 403, DISALLOWED_DIR_SCAN);
3044 }
3045
3046 if (HTDirAccess == HT_DIR_SELECTIVE) {
3047 char *enable_file_name = NULL;
3048
3049 HTSprintf0(&enable_file_name, "%s/%s", localname, HT_DIR_ENABLE_FILE);
3050 if (stat(enable_file_name, &file_info) != 0) {
3051 FREE(localname);
3052 FREE(nodename);
3053 FREE(enable_file_name);
3054 return HTLoadError(sink, 403, DISALLOWED_SELECTIVE_ACCESS);
3055 }
3056 }
3057
3058 CTRACE((tfp, "Opening directory %s\n", localname));
3059 dp = opendir(localname);
3060 if (!dp) {
3061 FREE(localname);
3062 FREE(nodename);
3063 return HTLoadError(sink, 403, FAILED_DIR_UNREADABLE);
3064 }
3065
3066 /*
3067 * Directory access is allowed and possible.
3068 */
3069
3070 status = print_local_dir(dp, localname,
3071 anchor, format_out, sink);
3072 closedir(dp);
3073 FREE(localname);
3074 FREE(nodename);
3075 return status; /* document loaded, maybe partial */
3076
3077 }
3078 /* end if localname is a directory */
3079 if (S_ISREG(dir_info.st_mode)) {
3080 #ifdef LONG_MAX
3081 if (dir_info.st_size <= LONG_MAX)
3082 #endif
3083 anchor->content_length = (long) dir_info.st_size;
3084 }
3085
3086 } /* end if file stat worked */
3087
3088 /* End of directory reading section
3089 */
3090 #endif /* HAVE_READDIR */
3091 if (decompressAndParse(anchor,
3092 format_out,
3093 sink,
3094 nodename,
3095 localname,
3096 myEncoding,
3097 format,
3098 &status)) {
3099 FREE(nodename);
3100 FREE(localname);
3101 return status;
3102 }
3103 FREE(localname);
3104 } /* local unix file system */
3105 #endif /* !NO_UNIX_IO */
3106 #endif /* VMS */
3107
3108 #ifndef DECNET
3109 /*
3110 * Now, as transparently mounted access has failed, we try FTP.
3111 */
3112 {
3113 /*
3114 * Deal with case-sensitivity differences on VMS versus Unix.
3115 */
3116 #ifdef VMS
3117 if (strcasecomp(nodename, HTHostName()) != 0)
3118 #else
3119 if (strcmp(nodename, HTHostName()) != 0)
3120 #endif /* VMS */
3121 {
3122 status = -1;
3123 FREE(nodename);
3124 if (StrNCmp(addr, "file://localhost", 16)) {
3125 /* never go to ftp site when URL
3126 * is file://localhost
3127 */
3128 #ifndef DISABLE_FTP
3129 status = HTFTPLoad(addr, anchor, format_out, sink);
3130 #endif /* DISABLE_FTP */
3131 }
3132 return status;
3133 }
3134 FREE(nodename);
3135 }
3136 #endif /* !DECNET */
3137
3138 /*
3139 * All attempts have failed.
3140 */
3141 {
3142 CTRACE((tfp, "Can't open `%s', errno=%d\n", addr, SOCKET_ERRNO));
3143
3144 return HTLoadError(sink, 403, FAILED_FILE_UNREADABLE);
3145 }
3146 }
3147
3148 static const char *program_paths[pp_Last];
3149
3150 /*
3151 * Given a program number, return its path
3152 */
HTGetProgramPath(ProgramPaths code)3153 const char *HTGetProgramPath(ProgramPaths code)
3154 {
3155 const char *result = NULL;
3156
3157 if (code > ppUnknown && code < pp_Last)
3158 result = program_paths[code];
3159 return result;
3160 }
3161
3162 /*
3163 * Store a program's path. The caller must allocate the string used for 'path',
3164 * since HTInitProgramPaths() may free it.
3165 */
HTSetProgramPath(ProgramPaths code,const char * path)3166 void HTSetProgramPath(ProgramPaths code, const char *path)
3167 {
3168 if (code > ppUnknown && code < pp_Last) {
3169 program_paths[code] = isEmpty(path) ? 0 : path;
3170 }
3171 }
3172
3173 /*
3174 * Reset the list of known program paths to the ones that are compiled-in
3175 */
HTInitProgramPaths(BOOL init)3176 void HTInitProgramPaths(BOOL init)
3177 {
3178 ProgramPaths code;
3179 int n;
3180 const char *path;
3181 const char *test;
3182
3183 for (n = (int) ppUnknown + 1; n < (int) pp_Last; ++n) {
3184 switch (code = (ProgramPaths) n) {
3185 #ifdef BZIP2_PATH
3186 case ppBZIP2:
3187 path = BZIP2_PATH;
3188 break;
3189 #endif
3190 #ifdef CHMOD_PATH
3191 case ppCHMOD:
3192 path = CHMOD_PATH;
3193 break;
3194 #endif
3195 #ifdef COMPRESS_PATH
3196 case ppCOMPRESS:
3197 path = COMPRESS_PATH;
3198 break;
3199 #endif
3200 #ifdef COPY_PATH
3201 case ppCOPY:
3202 path = COPY_PATH;
3203 break;
3204 #endif
3205 #ifdef CSWING_PATH
3206 case ppCSWING:
3207 path = CSWING_PATH;
3208 break;
3209 #endif
3210 #ifdef GZIP_PATH
3211 case ppGZIP:
3212 path = GZIP_PATH;
3213 break;
3214 #endif
3215 #ifdef INFLATE_PATH
3216 case ppINFLATE:
3217 path = INFLATE_PATH;
3218 break;
3219 #endif
3220 #ifdef INSTALL_PATH
3221 case ppINSTALL:
3222 path = INSTALL_PATH;
3223 break;
3224 #endif
3225 #ifdef MKDIR_PATH
3226 case ppMKDIR:
3227 path = MKDIR_PATH;
3228 break;
3229 #endif
3230 #ifdef MV_PATH
3231 case ppMV:
3232 path = MV_PATH;
3233 break;
3234 #endif
3235 #ifdef RLOGIN_PATH
3236 case ppRLOGIN:
3237 path = RLOGIN_PATH;
3238 break;
3239 #endif
3240 #ifdef RM_PATH
3241 case ppRM:
3242 path = RM_PATH;
3243 break;
3244 #endif
3245 #ifdef RMDIR_PATH
3246 case ppRMDIR:
3247 path = RMDIR_PATH;
3248 break;
3249 #endif
3250 #ifdef SETFONT_PATH
3251 case ppSETFONT:
3252 path = SETFONT_PATH;
3253 break;
3254 #endif
3255 #ifdef TAR_PATH
3256 case ppTAR:
3257 path = TAR_PATH;
3258 break;
3259 #endif
3260 #ifdef TELNET_PATH
3261 case ppTELNET:
3262 path = TELNET_PATH;
3263 break;
3264 #endif
3265 #ifdef TN3270_PATH
3266 case ppTN3270:
3267 path = TN3270_PATH;
3268 break;
3269 #endif
3270 #ifdef TOUCH_PATH
3271 case ppTOUCH:
3272 path = TOUCH_PATH;
3273 break;
3274 #endif
3275 #ifdef UNCOMPRESS_PATH
3276 case ppUNCOMPRESS:
3277 path = UNCOMPRESS_PATH;
3278 break;
3279 #endif
3280 #ifdef UNZIP_PATH
3281 case ppUNZIP:
3282 path = UNZIP_PATH;
3283 break;
3284 #endif
3285 #ifdef UUDECODE_PATH
3286 case ppUUDECODE:
3287 path = UUDECODE_PATH;
3288 break;
3289 #endif
3290 #ifdef ZCAT_PATH
3291 case ppZCAT:
3292 path = ZCAT_PATH;
3293 break;
3294 #endif
3295 #ifdef ZIP_PATH
3296 case ppZIP:
3297 path = ZIP_PATH;
3298 break;
3299 #endif
3300 default:
3301 path = NULL;
3302 break;
3303 }
3304 test = HTGetProgramPath(code);
3305 if (test != NULL && test != path) {
3306 free(DeConst(test));
3307 }
3308 if (init) {
3309 HTSetProgramPath(code, path);
3310 }
3311 }
3312 }
3313
3314 /*
3315 * Protocol descriptors
3316 */
3317 #ifdef GLOBALDEF_IS_MACRO
3318 #define _HTFILE_C_1_INIT { "ftp", HTLoadFile, 0 }
3319 GLOBALDEF(HTProtocol, HTFTP, _HTFILE_C_1_INIT);
3320 #define _HTFILE_C_2_INIT { "file", HTLoadFile, HTFileSaveStream }
3321 GLOBALDEF(HTProtocol, HTFile, _HTFILE_C_2_INIT);
3322 #else
3323 GLOBALDEF HTProtocol HTFTP =
3324 {"ftp", HTLoadFile, 0};
3325 GLOBALDEF HTProtocol HTFile =
3326 {"file", HTLoadFile, HTFileSaveStream};
3327 #endif /* GLOBALDEF_IS_MACRO */
3328