1 /*
2  * html.c: implementation of html.h.
3  */
4 
5 #include "agedu.h"
6 #include "html.h"
7 #include "alloc.h"
8 #include "trie.h"
9 #include "index.h"
10 
11 #define MAXCOLOUR 511
12 
13 struct html {
14     char *buf;
15     size_t buflen, bufsize;
16     const void *t;
17     unsigned long long totalsize, oldest, newest;
18     char *path2;
19     char *oururi;
20     size_t hreflen;
21     const char *uriformat;
22     unsigned long long thresholds[MAXCOLOUR];
23     char *titletexts[MAXCOLOUR+1];
24     time_t now;
25 };
26 
vhtprintf(struct html * ctx,const char * fmt,va_list ap)27 static void vhtprintf(struct html *ctx, const char *fmt, va_list ap)
28 {
29     va_list ap2;
30     int size, size2;
31     char testbuf[2];
32 
33     va_copy(ap2, ap);
34     /*
35      * Some C libraries (Solaris, I'm looking at you) don't like
36      * an output buffer size of zero in vsnprintf, but will return
37      * sensible values given any non-zero buffer size. Hence, we
38      * use testbuf to gauge the length of the string.
39      */
40     size = vsnprintf(testbuf, 1, fmt, ap2);
41     va_end(ap2);
42 
43     if (ctx->buflen + size >= ctx->bufsize) {
44 	ctx->bufsize = (ctx->buflen + size) * 3 / 2 + 1024;
45 	ctx->buf = sresize(ctx->buf, ctx->bufsize, char);
46     }
47     size2 = vsnprintf(ctx->buf + ctx->buflen, ctx->bufsize - ctx->buflen,
48 		      fmt, ap);
49     assert(size == size2);
50     ctx->buflen += size;
51 }
52 
htprintf(struct html * ctx,const char * fmt,...)53 static void htprintf(struct html *ctx, const char *fmt, ...)
54 {
55     va_list ap;
56     va_start(ap, fmt);
57     vhtprintf(ctx, fmt, ap);
58     va_end(ap);
59 }
60 
round_and_format_age(struct html * ctx,unsigned long long age,char * buf,int direction)61 static unsigned long long round_and_format_age(struct html *ctx,
62 					       unsigned long long age,
63 					       char *buf, int direction)
64 {
65     struct tm tm, tm2;
66     char newbuf[80];
67     unsigned long long ret, newret;
68     int i;
69     int ym;
70     static const int minutes[] = { 5, 10, 15, 30, 45 };
71 
72     tm = *localtime(&ctx->now);
73     ym = tm.tm_year * 12 + tm.tm_mon;
74 
75     ret = ctx->now;
76     strcpy(buf, "Now");
77 
78     for (i = 0; i < lenof(minutes); i++) {
79 	newret = ctx->now - minutes[i] * 60;
80 	sprintf(newbuf, "%d minutes", minutes[i]);
81 	if (newret < age)
82 	    goto finish;
83 	strcpy(buf, newbuf);
84 	ret = newret;
85     }
86 
87     for (i = 1; i < 24; i++) {
88 	newret = ctx->now - i * (60*60);
89 	sprintf(newbuf, "%d hour%s", i, i==1 ? "" : "s");
90 	if (newret < age)
91 	    goto finish;
92 	strcpy(buf, newbuf);
93 	ret = newret;
94     }
95 
96     for (i = 1; i < 7; i++) {
97 	newret = ctx->now - i * (24*60*60);
98 	sprintf(newbuf, "%d day%s", i, i==1 ? "" : "s");
99 	if (newret < age)
100 	    goto finish;
101 	strcpy(buf, newbuf);
102 	ret = newret;
103     }
104 
105     for (i = 1; i < 4; i++) {
106 	newret = ctx->now - i * (7*24*60*60);
107 	sprintf(newbuf, "%d week%s", i, i==1 ? "" : "s");
108 	if (newret < age)
109 	    goto finish;
110 	strcpy(buf, newbuf);
111 	ret = newret;
112     }
113 
114     for (i = 1; i < 11; i++) {
115 	tm2 = tm;		       /* structure copy */
116 	tm2.tm_year = (ym - i) / 12;
117 	tm2.tm_mon = (ym - i) % 12;
118 	newret = mktime(&tm2);
119 	sprintf(newbuf, "%d month%s", i, i==1 ? "" : "s");
120 	if (newret < age)
121 	    goto finish;
122 	strcpy(buf, newbuf);
123 	ret = newret;
124     }
125 
126     for (i = 1;; i++) {
127 	tm2 = tm;		       /* structure copy */
128 	tm2.tm_year = (ym - i*12) / 12;
129 	tm2.tm_mon = (ym - i*12) % 12;
130 	newret = mktime(&tm2);
131 	sprintf(newbuf, "%d year%s", i, i==1 ? "" : "s");
132 	if (newret < age)
133 	    goto finish;
134 	strcpy(buf, newbuf);
135 	ret = newret;
136     }
137 
138     finish:
139     if (direction > 0) {
140 	/*
141 	 * Round toward newest, i.e. use the existing (buf,ret).
142 	 */
143     } else if (direction < 0) {
144 	/*
145 	 * Round toward oldest, i.e. use (newbuf,newret);
146 	 */
147 	strcpy(buf, newbuf);
148 	ret = newret;
149     } else {
150 	/*
151 	 * Round to nearest.
152 	 */
153 	if (ret - age > age - newret) {
154 	    strcpy(buf, newbuf);
155 	    ret = newret;
156 	}
157     }
158     return ret;
159 }
160 
get_indices(const void * t,char * path,unsigned long * xi1,unsigned long * xi2)161 static void get_indices(const void *t, char *path,
162 			unsigned long *xi1, unsigned long *xi2)
163 {
164     size_t pathlen = strlen(path);
165     int c1 = path[pathlen], c2 = (pathlen > 0 ? path[pathlen-1] : 0);
166 
167     *xi1 = trie_before(t, path);
168     make_successor(path);
169     *xi2 = trie_before(t, path);
170     path[pathlen] = c1;
171     if (pathlen > 0)
172 	path[pathlen-1] = c2;
173 }
174 
fetch_size(const void * t,unsigned long xi1,unsigned long xi2,unsigned long long atime)175 static unsigned long long fetch_size(const void *t,
176 				     unsigned long xi1, unsigned long xi2,
177 				     unsigned long long atime)
178 {
179     if (xi2 - xi1 == 1) {
180 	/*
181 	 * We are querying an individual file, so we should not
182 	 * depend on the index entries either side of the node,
183 	 * since they almost certainly don't both exist. Instead,
184 	 * just look up the file's size and atime in the main trie.
185 	 */
186 	const struct trie_file *f = trie_getfile(t, xi1);
187 	if (f->atime < atime)
188 	    return f->size;
189 	else
190 	    return 0;
191     } else {
192 	return index_query(t, xi2, atime) - index_query(t, xi1, atime);
193     }
194 }
195 
htescape(struct html * ctx,const char * s,int n,int italics)196 static void htescape(struct html *ctx, const char *s, int n, int italics)
197 {
198     while (n > 0 && *s) {
199 	unsigned char c = (unsigned char)*s++;
200 
201 	if (c == '&')
202 	    htprintf(ctx, "&amp;");
203 	else if (c == '<')
204 	    htprintf(ctx, "&lt;");
205 	else if (c == '>')
206 	    htprintf(ctx, "&gt;");
207 	else if (c >= ' ' && c < '\177')
208 	    htprintf(ctx, "%c", c);
209 	else {
210 	    if (italics) htprintf(ctx, "<i>");
211 	    htprintf(ctx, "[%02x]", c);
212 	    if (italics) htprintf(ctx, "</i>");
213 	}
214 
215 	n--;
216     }
217 }
218 
begin_colour_bar(struct html * ctx)219 static void begin_colour_bar(struct html *ctx)
220 {
221     htprintf(ctx, "<table cellspacing=0 cellpadding=0"
222 	     " style=\"border:0\">\n<tr>\n");
223 }
224 
add_to_colour_bar(struct html * ctx,int colour,int pixels)225 static void add_to_colour_bar(struct html *ctx, int colour, int pixels)
226 {
227     int r, g, b;
228 
229     if (colour >= 0 && colour < 256)   /* red -> yellow fade */
230 	r = 255, g = colour, b = 0;
231     else if (colour >= 256 && colour <= 511)   /* yellow -> green fade */
232 	r = 511 - colour, g = 255, b = 0;
233     else			       /* background grey */
234 	r = g = b = 240;
235 
236     if (pixels > 0) {
237 	htprintf(ctx, "<td style=\"width:%dpx; height:1em; "
238 		 "background-color:#%02x%02x%02x\"",
239 		 pixels, r, g, b);
240 	if (colour >= 0)
241 	    htprintf(ctx, " title=\"%s\"", ctx->titletexts[colour]);
242 	htprintf(ctx, "></td>\n");
243     }
244 }
245 
end_colour_bar(struct html * ctx)246 static void end_colour_bar(struct html *ctx)
247 {
248     htprintf(ctx, "</tr>\n</table>\n");
249 }
250 
251 struct vector {
252     bool want_href, essential;
253     char *name;
254     bool literal;   /* should the name be formatted in fixed-pitch? */
255     unsigned long index;
256     unsigned long long sizes[MAXCOLOUR+1];
257 };
258 
vec_compare(const void * av,const void * bv)259 int vec_compare(const void *av, const void *bv)
260 {
261     const struct vector *a = *(const struct vector **)av;
262     const struct vector *b = *(const struct vector **)bv;
263 
264     if (a->sizes[MAXCOLOUR] > b->sizes[MAXCOLOUR])
265 	return -1;
266     else if (a->sizes[MAXCOLOUR] < b->sizes[MAXCOLOUR])
267 	return +1;
268     else if (a->want_href < b->want_href)
269 	return +1;
270     else if (a->want_href > b->want_href)
271 	return -1;
272     else if (a->want_href)
273 	return strcmp(a->name, b->name);
274     else if (a->index < b->index)
275 	return -1;
276     else if (a->index > b->index)
277 	return +1;
278     else if (a->essential < b->essential)
279 	return +1;
280     else if (a->essential > b->essential)
281 	return -1;
282     return 0;
283 }
284 
make_vector(struct html * ctx,char * path,bool want_href,bool essential,char * name,bool literal)285 static struct vector *make_vector(struct html *ctx, char *path,
286 				  bool want_href, bool essential,
287 				  char *name, bool literal)
288 {
289     unsigned long xi1, xi2;
290     struct vector *vec = snew(struct vector);
291     int i;
292 
293     vec->want_href = want_href;
294     vec->essential = essential;
295     vec->name = name ? dupstr(name) : NULL;
296     vec->literal = literal;
297 
298     get_indices(ctx->t, path, &xi1, &xi2);
299 
300     vec->index = xi1;
301 
302     for (i = 0; i <= MAXCOLOUR; i++) {
303 	unsigned long long atime;
304 	if (i == MAXCOLOUR)
305 	    atime = ULLONG_MAX;
306 	else
307 	    atime = ctx->thresholds[i];
308 	vec->sizes[i] = fetch_size(ctx->t, xi1, xi2, atime);
309     }
310 
311     return vec;
312 }
313 
print_heading(struct html * ctx,const char * title)314 static void print_heading(struct html *ctx, const char *title)
315 {
316     htprintf(ctx, "<tr style=\"padding: 0.2em; background-color:#e0e0e0\">\n"
317 	     "<td colspan=4 align=center>%s</td>\n</tr>\n", title);
318 }
319 
compute_display_size(unsigned long long size,const char ** fmt,double * display_size)320 static void compute_display_size(unsigned long long size,
321 				 const char **fmt, double *display_size)
322 {
323     static const char *const fmts[] = {
324 	"%g B", "%g kB", "%#.1f MB", "%#.1f GB", "%#.1f TB",
325 	"%#.1f PB", "%#.1f EB", "%#.1f ZB", "%#.1f YB"
326     };
327     int shift = 0;
328     unsigned long long tmpsize;
329     double denominator;
330 
331     tmpsize = size;
332     denominator = 1.0;
333     while (tmpsize >= 1024 && shift < lenof(fmts)-1) {
334 	tmpsize >>= 10;
335         denominator *= 1024.0;
336 	shift++;
337     }
338     *display_size = size / denominator;
339     *fmt = fmts[shift];
340 }
341 
342 struct format_option {
343     const char *prefix, *suffix;       /* may include '%%' */
344     int prefixlen, suffixlen;          /* does not count '%%' */
345     char fmttype;                      /* 0 for none, or 'n' or 'p' */
346     bool translate_pathsep;            /* pathsep rendered as '/'? */
347     bool shorten_path;                 /* omit common prefix? */
348 };
349 
350 /*
351  * Gets the next format option from a format string. Advances '*fmt'
352  * past it, or sets it to NULL if nothing is left.
353  */
get_format_option(const char ** fmt)354 struct format_option get_format_option(const char **fmt)
355 {
356     struct format_option ret;
357 
358     /*
359      * Scan for prefix of format.
360      */
361     ret.prefix = *fmt;
362     ret.prefixlen = 0;
363     while (1) {
364         if (**fmt == '\0') {
365             /*
366              * No formatting directive, and this is the last option.
367              */
368             ret.suffix = *fmt;
369             ret.suffixlen = 0;
370             ret.fmttype = '\0';
371             *fmt = NULL;
372             return ret;
373         } else if (**fmt == '%') {
374             if ((*fmt)[1] == '%') {
375                 (*fmt) += 2;           /* just advance one extra */
376                 ret.prefixlen++;
377             } else if ((*fmt)[1] == '|') {
378                 /*
379                  * No formatting directive.
380                  */
381                 ret.suffix = *fmt;
382                 ret.suffixlen = 0;
383                 ret.fmttype = '\0';
384                 (*fmt) += 2;           /* advance to start of next option */
385                 return ret;
386             } else {
387                 break;
388             }
389         } else {
390             (*fmt)++;                  /* normal character */
391             ret.prefixlen++;
392         }
393     }
394 
395     /*
396      * Interpret formatting directive with flags.
397      */
398     (*fmt)++;
399     ret.translate_pathsep = true;
400     ret.shorten_path = true;
401     while (1) {
402         char c = *(*fmt)++;
403         assert(c);
404         if (c == '/') {
405             ret.translate_pathsep = false;
406         } else if (c == '-') {
407             ret.shorten_path = false;
408         } else {
409             assert(c == 'n' || c == 'p');
410             ret.fmttype = c;
411             break;
412         }
413     }
414 
415     /*
416      * Scan for suffix.
417      */
418     ret.suffix = *fmt;
419     ret.suffixlen = 0;
420     while (1) {
421         if (**fmt == '\0') {
422             /*
423              * This is the last option.
424              */
425             *fmt = NULL;
426             return ret;
427         } else if (**fmt != '%') {
428             (*fmt)++;                  /* normal character */
429             ret.suffixlen++;
430         } else {
431             if ((*fmt)[1] == '%') {
432                 (*fmt) += 2;           /* just advance one extra */
433                 ret.suffixlen++;
434             } else {
435                 assert((*fmt)[1] == '|');
436                 (*fmt) += 2;           /* advance to start of next option */
437                 return ret;
438             }
439         }
440     }
441 }
442 
format_string_inner(const char * fmt,int nescape,unsigned long index,const void * t)443 char *format_string_inner(const char *fmt, int nescape,
444                           unsigned long index, const void *t)
445 {
446     int maxlen;
447     char *ret = NULL, *p = NULL;
448     char *path = NULL, *q = NULL;
449     char pathsep = trie_pathsep(t);
450     int maxpathlen = trie_maxpathlen(t);
451     int charindex;
452 
453     while (fmt) {
454         struct format_option opt = get_format_option(&fmt);
455         if (index && !opt.fmttype)
456             continue; /* option is only good for the root, which this isn't */
457 
458         maxlen = opt.prefixlen + opt.suffixlen + 1;
459         switch (opt.fmttype) {
460           case 'n':
461             maxlen += 40;              /* generous length for an integer */
462             break;
463           case 'p':
464             maxlen += 3*maxpathlen;    /* might have to escape everything */
465             break;
466         }
467         ret = snewn(maxlen, char);
468         p = ret;
469         while (opt.prefixlen-- > 0) {
470             if ((*p++ = *opt.prefix++) == '%')
471                 opt.prefix++;
472         }
473         switch (opt.fmttype) {
474           case 'n':
475             p += sprintf(p, "%lu", index);
476             break;
477           case 'p':
478             path = snewn(1+trie_maxpathlen(t), char);
479             if (opt.shorten_path) {
480                 trie_getpath(t, 0, path);
481                 q = path + strlen(path);
482                 trie_getpath(t, index, path);
483                 if (*q == pathsep)
484                     q++;
485             } else {
486                 trie_getpath(t, index, path);
487                 q = path;
488             }
489             charindex = 0;
490             while (*q) {
491                 char c = *q++;
492                 if (c == pathsep && opt.translate_pathsep) {
493                     *p++ = '/';
494                     charindex = 0;
495                 } else if (charindex < nescape ||
496                            (!isalnum((unsigned char)c) &&
497                             ((charindex == 0 && c=='.') ||
498                              !strchr("-.@_", c)))) {
499                     p += sprintf(p, "=%02X", (unsigned char)c);
500                     charindex++;
501                 } else {
502                     *p++ = c;
503                     charindex++;
504                 }
505             }
506             sfree(path);
507             break;
508         }
509         while (opt.suffixlen-- > 0) {
510             if ((*p++ = *opt.suffix++) == '%')
511                 opt.suffix++;
512         }
513         *p = '\0';
514         assert(p - ret < maxlen);
515         return ret;
516     }
517     assert(!"Getting here implies an incomplete set of formats");
518 }
519 
parse_path(const void * t,const char * path,const char * fmt,unsigned long * index)520 int parse_path(const void *t, const char *path,
521                const char *fmt, unsigned long *index)
522 {
523     int len = strlen(path);
524     int midlen;
525     const char *p, *q;
526     char *r;
527     char pathsep = trie_pathsep(t);
528 
529     while (fmt) {
530         struct format_option opt = get_format_option(&fmt);
531 
532         /*
533          * Check prefix and suffix.
534          */
535         midlen = len - opt.prefixlen - opt.suffixlen;
536         if (midlen < 0)
537             continue;                  /* prefix and suffix don't even fit */
538 
539         p = path;
540         while (opt.prefixlen > 0) {
541             char c = *opt.prefix++;
542             if (c == '%')
543                 opt.prefix++;
544             if (*p != c)
545                 break;
546             p++;
547             opt.prefixlen--;
548         }
549         if (opt.prefixlen > 0)
550             continue;                  /* prefix didn't match */
551 
552         q = path + len - opt.suffixlen;
553         while (opt.suffixlen > 0) {
554             char c = *opt.suffix++;
555             if (c == '%')
556                 opt.suffix++;
557             if (*q != c)
558                 break;
559             q++;
560             opt.suffixlen--;
561         }
562         if (opt.suffixlen > 0)
563             continue;                  /* suffix didn't match */
564 
565         /*
566          * Check the data in between. p points at it, and it's midlen
567          * characters long.
568          */
569         if (opt.fmttype == '\0') {
570             if (midlen == 0) {
571                 /*
572                  * Successful match against a root format.
573                  */
574                 *index = 0;
575                 return 1;
576             }
577         } else if (opt.fmttype == 'n') {
578             *index = 0;
579             while (midlen > 0) {
580                 if (*p >= '0' && *p <= '9')
581                     *index = *index * 10 + (*p - '0');
582                 else
583                     break;
584                 midlen--;
585                 p++;
586             }
587             if (midlen == 0) {
588                 /*
589                  * Successful match against a numeric format.
590                  */
591                 return 1;
592             }
593         } else {
594             assert(opt.fmttype == 'p');
595 
596             int maxoutlen = trie_maxpathlen(t) + 1;
597             int maxinlen = midlen + 1;
598             char triepath[maxinlen+maxoutlen];
599 
600             if (opt.shorten_path) {
601                 trie_getpath(t, 0, triepath);
602                 r = triepath + strlen(triepath);
603                 if (r > triepath && r[-1] != pathsep)
604                     *r++ = pathsep;
605             } else {
606                 r = triepath;
607             }
608 
609             while (midlen > 0) {
610                 if (*p == '/' && opt.translate_pathsep) {
611                     *r++ = pathsep;
612                     p++;
613                     midlen--;
614                 } else if (*p == '=') {
615                     /*
616                      * We intentionally do not check whether the
617                      * escaped character _should_ have been escaped
618                      * according to the rules in html_format_path.
619                      *
620                      * All clients of this parsing function, after a
621                      * successful parse, call html_format_path to find
622                      * the canonical URI for the same index and return
623                      * an HTTP redirect if the provided URI was not
624                      * exactly equal to that canonical form. This is
625                      * critical when the correction involves adding or
626                      * removing a trailing slash (because then
627                      * relative hrefs on the generated page can be
628                      * computed with respect to the canonical URI
629                      * instead of having to remember what the actual
630                      * URI was), but also has the useful effect that
631                      * if a user attempts to type in (guess) a URI by
632                      * hand they don't have to remember the escaping
633                      * rules - as long as they type _something_ that
634                      * this code can parse into a recognisable
635                      * pathname, it will be automatically 301ed into
636                      * the canonical form.
637                      */
638                     if (midlen < 3 ||
639                         !isxdigit((unsigned char)p[1]) ||
640                         !isxdigit((unsigned char)p[2]))
641                         break;         /* faulty escape encoding */
642                     char x[3];
643                     unsigned cval;
644                     x[0] = p[1];
645                     x[1] = p[2];
646                     x[2] = '\0';
647                     sscanf(x, "%x", &cval);
648                     *r++ = cval;
649                     p += 3;
650                     midlen -= 3;
651                 } else {
652                     *r++ = *p;
653                     p++;
654                     midlen--;
655                 }
656             }
657             if (midlen > 0)
658                 continue;      /* something went wrong in that loop */
659             assert(r - triepath < maxinlen+maxoutlen);
660             *r = '\0';
661 
662             unsigned long gotidx = trie_before(t, triepath);
663             if (gotidx >= trie_count(t))
664                 continue;              /* index out of range */
665             char retpath[1+maxoutlen];
666             trie_getpath(t, gotidx, retpath);
667             if (strcmp(triepath, retpath))
668                 continue;           /* exact path not found in trie */
669             if (!index_has_root(t, gotidx))
670                 continue;              /* path is not a directory */
671 
672             /*
673              * Successful path-based match.
674              */
675             *index = gotidx;
676             return 1;
677         }
678     }
679 
680     return 0;                    /* no match from any format option */
681 }
682 
format_string(const char * fmt,unsigned long index,const void * t)683 char *format_string(const char *fmt, unsigned long index, const void *t)
684 {
685     unsigned long indexout;
686     char *ret;
687     int nescape = 0;
688 
689     /*
690      * Format the string using whichever format option first works.
691      */
692     ret = format_string_inner(fmt, 0, index, t);
693 
694     /*
695      * Now re-_parse_ the string, to see if it gives the same index
696      * back. It might not, if a pathname is valid in two formats: for
697      * instance, if you use '-H -d max' to generate a static HTML dump
698      * from scanning a directory which has a subdir called 'index',
699      * you might well find that the top-level file wants to be called
700      * index.html and so does the one for that subdir.
701      *
702      * We fix this by formatting the string again with more and more
703      * characters escaped, so that the non-root 'index.html' becomes
704      * (e.g.) '=69ndex.html', or '=69=6edex.html' if that doesn't
705      * work, etc.
706      */
707     while (1) {
708         /*
709          * Parse the pathname and see if it gives the right index.
710          */
711         int parseret = parse_path(t, ret, fmt, &indexout);
712         assert(parseret != 0);
713         if (indexout == index)
714             break;                     /* path now parses successfully */
715 
716         /*
717          * If not, try formatting it again.
718          */
719         char *new = format_string_inner(fmt, ++nescape, index, t);
720         assert(strcmp(new, ret));      /* if nescape gets too big, give up */
721         sfree(ret);
722         ret = new;
723     }
724 
725     return ret;
726 }
727 
html_format_path(const void * t,const struct html_config * cfg,unsigned long index)728 char *html_format_path(const void *t, const struct html_config *cfg,
729                        unsigned long index)
730 {
731     return format_string(cfg->uriformat, index, t);
732 }
733 
html_parse_path(const void * t,const char * path,const struct html_config * cfg,unsigned long * index)734 int html_parse_path(const void *t, const char *path,
735                     const struct html_config *cfg, unsigned long *index)
736 {
737     return parse_path(t, path, cfg->uriformat, index);
738 }
739 
make_href(const char * source,const char * target)740 char *make_href(const char *source, const char *target)
741 {
742     /*
743      * We insist that both source and target URIs start with a /, or
744      * else we won't be reliably able to construct relative hrefs
745      * between them (e.g. because we've got a suffix on the end of
746      * some CGI pathname that this function doesn't know the final
747      * component of).
748      */
749     assert(*source == '/');
750     assert(*target == '/');
751 
752     /*
753      * Find the last / in source. Everything up to but not including
754      * that is the directory to which the output href will be
755      * relative. We enforce by assertion that there must be a /
756      * somewhere in source, or else we can't construct a relative href
757      * at all
758      */
759     const char *sourceend = strrchr(source, '/');
760     assert(sourceend != NULL);
761 
762     /*
763      * See how far the target URI agrees with the source one, up to
764      * and including that /.
765      */
766     const char *s = source, *t = target;
767     while (s <= sourceend && *s == *t)
768         s++, t++;
769 
770     /*
771      * We're only interested in agreement of complete path components,
772      * so back off until we're sitting just after a shared /.
773      */
774     while (s > source && s[-1] != '/')
775         s--, t--;
776     assert(s > source);
777 
778     /*
779      * Now we need some number of levels of "../" to get from source
780      * to here, and then we just replicate the rest of 'target'.
781      */
782     int levels = 0;
783     while (s <= sourceend) {
784         if (*s == '/')
785             levels++;
786         s++;
787     }
788     int len = 3*levels + strlen(t);
789     if (len == 0) {
790         /* One last special case: if target has no tail _and_ we
791          * haven't written out any "../". */
792         return dupstr("./");
793     } else {
794         char *ret = snewn(len+1, char);
795         char *p = ret;
796         while (levels-- > 0) {
797             *p++ = '.';
798             *p++ = '.';
799             *p++ = '/';
800         }
801         strcpy(p, t);
802         return ret;
803     }
804 }
805 
806 #define PIXEL_SIZE 600		       /* FIXME: configurability? */
write_report_line(struct html * ctx,struct vector * vec)807 static void write_report_line(struct html *ctx, struct vector *vec)
808 {
809     unsigned long long size, asize, divisor;
810     double display_size;
811     int pix, newpix;
812     int i;
813     const char *unitsfmt;
814 
815     /*
816      * A line with literally zero space usage should not be
817      * printed at all if it's a link to a subdirectory (since it
818      * probably means the whole thing was excluded by some
819      * --exclude-path wildcard). If it's [files] or the top-level
820      * line, though, we must always print _something_, and in that
821      * case we must fiddle about to prevent divisions by zero in
822      * the code below.
823      */
824     if (!vec->sizes[MAXCOLOUR] && !vec->essential)
825 	return;
826     divisor = ctx->totalsize;
827     if (!divisor) {
828 	divisor = 1;
829     }
830 
831     /*
832      * Find the total size of this subdirectory.
833      */
834     size = vec->sizes[MAXCOLOUR];
835     compute_display_size(size, &unitsfmt, &display_size);
836     htprintf(ctx, "<tr>\n"
837               "<td style=\"padding: 0.2em; text-align: right\">");
838     htprintf(ctx, unitsfmt, display_size);
839     htprintf(ctx, "</td>\n");
840 
841     /*
842      * Generate a colour bar.
843      */
844     htprintf(ctx, "<td style=\"padding: 0.2em\">\n");
845     begin_colour_bar(ctx);
846     pix = 0;
847     for (i = 0; i <= MAXCOLOUR; i++) {
848 	asize = vec->sizes[i];
849 	newpix = asize * PIXEL_SIZE / divisor;
850 	add_to_colour_bar(ctx, i, newpix - pix);
851 	pix = newpix;
852     }
853     add_to_colour_bar(ctx, -1, PIXEL_SIZE - pix);
854     end_colour_bar(ctx);
855     htprintf(ctx, "</td>\n");
856 
857     /*
858      * Output size as a percentage of totalsize.
859      */
860     htprintf(ctx, "<td style=\"padding: 0.2em; text-align: right\">"
861 	     "%.2f%%</td>\n", (double)size / divisor * 100.0);
862 
863     /*
864      * Output a subdirectory marker.
865      */
866     htprintf(ctx, "<td style=\"padding: 0.2em\">");
867     if (vec->name) {
868 	bool doing_href = false;
869 
870 	if (ctx->uriformat && vec->want_href) {
871 	    char *targeturi = format_string(ctx->uriformat, vec->index,
872                                             ctx->t);
873             char *link = make_href(ctx->oururi, targeturi);
874 	    htprintf(ctx, "<a href=\"%s\">", link);
875             sfree(link);
876             sfree(targeturi);
877 	    doing_href = true;
878 	}
879 	if (vec->literal)
880 	    htprintf(ctx, "<code>");
881 	htescape(ctx, vec->name, strlen(vec->name), 1);
882 	if (vec->literal)
883 	    htprintf(ctx, "</code>");
884 	if (doing_href)
885 	    htprintf(ctx, "</a>");
886     }
887     htprintf(ctx, "</td>\n</tr>\n");
888 }
889 
strcmptrailingpathsep(const char * a,const char * b)890 int strcmptrailingpathsep(const char *a, const char *b)
891 {
892     while (*a == *b && *a)
893 	a++, b++;
894 
895     if ((*a == pathsep && !a[1] && !*b) ||
896 	(*b == pathsep && !b[1] && !*a))
897 	return 0;
898 
899     return (int)(unsigned char)*a - (int)(unsigned char)*b;
900 }
901 
html_query(const void * t,unsigned long index,const struct html_config * cfg,bool downlink)902 char *html_query(const void *t, unsigned long index,
903 		 const struct html_config *cfg, bool downlink)
904 {
905     struct html actx, *ctx = &actx;
906     char *path, *path2, *p, *q;
907     char agebuf1[80], agebuf2[80];
908     size_t pathlen, subdirpos;
909     unsigned long index2;
910     int i;
911     struct vector **vecs;
912     int nvecs, vecsize;
913     unsigned long xi1, xi2, xj1, xj2;
914 
915     if (index >= trie_count(t))
916 	return NULL;
917 
918     ctx->buf = NULL;
919     ctx->buflen = ctx->bufsize = 0;
920     ctx->t = t;
921     ctx->uriformat = cfg->uriformat;
922     htprintf(ctx, "<html>\n");
923 
924     path = snewn(1+trie_maxpathlen(t), char);
925     ctx->path2 = path2 = snewn(1+trie_maxpathlen(t), char);
926     if (cfg->uriformat)
927         ctx->oururi = format_string(cfg->uriformat, index, t);
928     else
929         ctx->oururi = NULL;
930 
931     /*
932      * HEAD section.
933      */
934     htprintf(ctx, "<head>\n");
935     trie_getpath(t, index, path);
936     htprintf(ctx, "<title>");
937     htescape(ctx, cfg->html_title, strlen(cfg->html_title), 0);
938     htprintf(ctx, ": ");
939     htescape(ctx, path, strlen(path), 0);
940     htprintf(ctx, "</title>\n");
941     htprintf(ctx, "</head>\n");
942 
943     /*
944      * Begin BODY section.
945      */
946     htprintf(ctx, "<body>\n");
947     htprintf(ctx, "<h3 align=center>Disk space breakdown by"
948 	     " last-access time</h3>\n");
949 
950     /*
951      * Show the pathname we're centred on, with hyperlinks to
952      * parent directories where available.
953      */
954     htprintf(ctx, "<p align=center>\n<code>");
955     q = path;
956     for (p = strchr(path, pathsep); p && p[1]; p = strchr(p, pathsep)) {
957 	int doing_href = 0;
958 	char c, *zp;
959 
960 	/*
961 	 * See if this path prefix exists in the trie. If so,
962 	 * generate a hyperlink.
963 	 */
964 	zp = p;
965 	if (p == path)		       /* special case for "/" at start */
966 	    zp++;
967 
968 	p++;
969 
970 	c = *zp;
971 	*zp = '\0';
972 	index2 = trie_before(t, path);
973 	trie_getpath(t, index2, path2);
974 	if (!strcmptrailingpathsep(path, path2) && cfg->uriformat) {
975 	    char *targeturi = format_string(cfg->uriformat, index2, t);
976             char *link = make_href(ctx->oururi, targeturi);
977 	    htprintf(ctx, "<a href=\"%s\">", link);
978             sfree(link);
979             sfree(targeturi);
980 	    doing_href = 1;
981 	}
982 	*zp = c;
983 	htescape(ctx, q, zp - q, 1);
984 	if (doing_href)
985 	    htprintf(ctx, "</a>");
986 	htescape(ctx, zp, p - zp, 1);
987 	q = p;
988     }
989     htescape(ctx, q, strlen(q), 1);
990     htprintf(ctx, "</code>\n");
991 
992     /*
993      * Decide on the age limit of our colour coding, establish the
994      * colour thresholds, and write out a key.
995      */
996     ctx->now = time(NULL);
997     if (cfg->autoage) {
998 	ctx->oldest = index_order_stat(t, 0.05);
999 	ctx->newest = index_order_stat(t, 1.0);
1000 	ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, -1);
1001 	ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, +1);
1002     } else {
1003 	ctx->oldest = cfg->oldest;
1004 	ctx->newest = cfg->newest;
1005 	ctx->oldest = round_and_format_age(ctx, ctx->oldest, agebuf1, 0);
1006 	ctx->newest = round_and_format_age(ctx, ctx->newest, agebuf2, 0);
1007     }
1008     for (i = 0; i < MAXCOLOUR; i++) {
1009 	ctx->thresholds[i] =
1010 	    ctx->oldest + (ctx->newest - ctx->oldest) * i / (MAXCOLOUR-1);
1011     }
1012     for (i = 0; i <= MAXCOLOUR; i++) {
1013 	char buf[80];
1014 
1015 	if (i == 0) {
1016 	    strcpy(buf, "&gt; ");
1017 	    round_and_format_age(ctx, ctx->thresholds[0], buf+5, 0);
1018 	} else if (i == MAXCOLOUR) {
1019 	    strcpy(buf, "&lt; ");
1020 	    round_and_format_age(ctx, ctx->thresholds[MAXCOLOUR-1], buf+5, 0);
1021 	} else {
1022 	    unsigned long long midrange =
1023 		(ctx->thresholds[i-1] + ctx->thresholds[i]) / 2;
1024 	    round_and_format_age(ctx, midrange, buf, 0);
1025 	}
1026 
1027 	ctx->titletexts[i] = dupstr(buf);
1028     }
1029     htprintf(ctx, "<p align=center>Key to colour coding (mouse over for more detail):\n");
1030     htprintf(ctx, "<p align=center style=\"padding: 0; margin-top:0.4em; "
1031 	     "margin-bottom:1em\">");
1032     begin_colour_bar(ctx);
1033     htprintf(ctx, "<td style=\"padding-right:1em\">%s</td>\n", agebuf1);
1034     for (i = 0; i < MAXCOLOUR; i++)
1035 	add_to_colour_bar(ctx, i, 1);
1036     htprintf(ctx, "<td style=\"padding-left:1em\">%s</td>\n", agebuf2);
1037     end_colour_bar(ctx);
1038 
1039     /*
1040      * Begin the main table.
1041      */
1042     htprintf(ctx, "<p align=center>\n<table style=\"margin:0; border:0\">\n");
1043 
1044     /*
1045      * Find the total size of our entire subdirectory. We'll use
1046      * that as the scale for all the colour bars in this report.
1047      */
1048     get_indices(t, path, &xi1, &xi2);
1049     ctx->totalsize = fetch_size(t, xi1, xi2, ULLONG_MAX);
1050 
1051     /*
1052      * Generate a report line for the whole subdirectory.
1053      */
1054     vecsize = 64;
1055     vecs = snewn(vecsize, struct vector *);
1056     nvecs = 1;
1057     vecs[0] = make_vector(ctx, path, false, true, NULL, false);
1058     print_heading(ctx, "Overall");
1059     write_report_line(ctx, vecs[0]);
1060 
1061     /*
1062      * Now generate report lines for all its children, and the
1063      * files contained in it.
1064      */
1065     print_heading(ctx, "Subdirectories");
1066 
1067     if (cfg->showfiles) {
1068         /* Every file directly in this directory is going to end up in
1069          * its own entry in the loop below, and then it'll be
1070          * subtracted from vecs[0]. So vecs[0] will end up tracking
1071          * the size of the _directory inode_ only. */
1072         vecs[0]->name = dupstr("[directory]");
1073     } else {
1074         /* Otherwise, vecs[0] will track everything that wasn't part
1075          * of a subdirectory, which includes the directory inode but
1076          * also all the files within it. Use this more general name. */
1077         vecs[0]->name = dupstr("[files]");
1078     }
1079     get_indices(t, path, &xi1, &xi2);
1080     xi1++;
1081     pathlen = strlen(path);
1082     subdirpos = pathlen + 1;
1083     if (pathlen > 0 && path[pathlen-1] == pathsep)
1084 	subdirpos--;
1085     while (xi1 < xi2) {
1086 	trie_getpath(t, xi1, path2);
1087 	get_indices(t, ctx->path2, &xj1, &xj2);
1088 	xi1 = xj2;
1089 	if (!cfg->showfiles && xj2 - xj1 <= 1)
1090 	    continue;		       /* skip individual files */
1091 	if (nvecs >= vecsize) {
1092 	    vecsize = nvecs * 3 / 2 + 64;
1093 	    vecs = sresize(vecs, vecsize, struct vector *);
1094 	}
1095 	assert(strlen(path2) > pathlen);
1096 	vecs[nvecs] = make_vector(ctx, path2, downlink && (xj2 - xj1 > 1),
1097                                   false, path2 + subdirpos, 1);
1098 	for (i = 0; i <= MAXCOLOUR; i++)
1099 	    vecs[0]->sizes[i] -= vecs[nvecs]->sizes[i];
1100 	nvecs++;
1101     }
1102 
1103     qsort(vecs, nvecs, sizeof(vecs[0]), vec_compare);
1104 
1105     for (i = 0; i < nvecs; i++)
1106         if (vecs[i]->sizes[MAXCOLOUR])
1107             write_report_line(ctx, vecs[i]);
1108 
1109     /*
1110      * Close the main table.
1111      */
1112     htprintf(ctx, "</table>\n");
1113 
1114     /*
1115      * Finish up and tidy up.
1116      */
1117     htprintf(ctx, "</body>\n");
1118     htprintf(ctx, "</html>\n");
1119     sfree(ctx->oururi);
1120     sfree(path2);
1121     sfree(path);
1122     for (i = 0; i < nvecs; i++) {
1123 	sfree(vecs[i]->name);
1124 	sfree(vecs[i]);
1125     }
1126     sfree(vecs);
1127 
1128     return ctx->buf;
1129 }
1130 
html_dump(const void * t,unsigned long index,unsigned long endindex,int maxdepth,const struct html_config * cfg,const char * pathprefix)1131 int html_dump(const void *t, unsigned long index, unsigned long endindex,
1132 	      int maxdepth, const struct html_config *cfg,
1133 	      const char *pathprefix)
1134 {
1135     /*
1136      * Determine the filename for this file.
1137      */
1138     assert(cfg->fileformat != NULL);
1139     char *filename = format_string(cfg->fileformat, index, t);
1140     char *path = dupfmt("%s%s", pathprefix, filename);
1141     sfree(filename);
1142 
1143     /*
1144      * Create the HTML itself. Don't write out downlinks from our
1145      * deepest level.
1146      */
1147     char *html = html_query(t, index, cfg, maxdepth != 0);
1148 
1149     /*
1150      * Write it out.
1151      */
1152     FILE *fp = fopen(path, "w");
1153     if (!fp) {
1154 	fprintf(stderr, "%s: %s: open: %s\n", PNAME, path, strerror(errno));
1155 	return 1;
1156     }
1157     if (fputs(html, fp) < 0) {
1158 	fprintf(stderr, "%s: %s: write: %s\n", PNAME, path, strerror(errno));
1159 	fclose(fp);
1160 	return 1;
1161     }
1162     if (fclose(fp) < 0) {
1163 	fprintf(stderr, "%s: %s: fclose: %s\n", PNAME, path, strerror(errno));
1164 	return 1;
1165     }
1166     sfree(path);
1167 
1168     /*
1169      * Recurse.
1170      */
1171     if (maxdepth != 0) {
1172 	unsigned long subindex, subendindex;
1173 	int newdepth = (maxdepth > 0 ? maxdepth - 1 : maxdepth);
1174 	char rpath[1+trie_maxpathlen(t)];
1175 
1176 	index++;
1177 	while (index < endindex) {
1178 	    trie_getpath(t, index, rpath);
1179 	    get_indices(t, rpath, &subindex, &subendindex);
1180 	    index = subendindex;
1181 	    if (subendindex - subindex > 1) {
1182 		if (html_dump(t, subindex, subendindex, newdepth,
1183 			      cfg, pathprefix))
1184 		    return 1;
1185 	    }
1186 	}
1187     }
1188     return 0;
1189 }
1190