1 /* Cache subsystem */
2 
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
6 
7 #include <string.h>
8 
9 #include "elinks.h"
10 
11 #include "bfu/dialog.h"
12 #include "cache/cache.h"
13 #include "cache/dialogs.h"
14 #include "config/options.h"
15 #include "main/main.h"
16 #include "main/object.h"
17 #include "network/connection.h"
18 #include "protocol/protocol.h"
19 #include "protocol/proxy.h"
20 #include "protocol/uri.h"
21 #include "util/error.h"
22 #include "util/memory.h"
23 #include "util/string.h"
24 
25 /* The list of cache entries */
26 static INIT_LIST_HEAD(cache_entries);
27 
28 static unsigned longlong cache_size;
29 static int id_counter = 1;
30 
31 static void truncate_entry(struct cache_entry *cached, off_t offset, int final);
32 
33 /* Change 0 to 1 to enable cache debugging features (redirect stderr to a file). */
34 #if 0
35 #define DEBUG_CACHE
36 #endif
37 
38 #ifdef DEBUG_CACHE
39 
40 #define dump_frag(frag, count) \
41 do { \
42 	DBG(" [%d] f=%p offset=%" OFF_T_FORMAT " length=%" OFF_T_FORMAT \
43 	    " real_length=%" OFF_T_FORMAT, \
44 	    count, frag, frag->offset, frag->length, frag->real_length); \
45 } while (0)
46 
47 #define dump_frags(entry, comment) \
48 do { \
49 	struct fragment *frag; \
50         int count = 0;	\
51  \
52 	DBG("%s: url=%s, cache_size=%li", comment, struri(entry->uri), cache_size); \
53 	foreach (frag, entry->frag) \
54 		dump_frag(frag, ++count); \
55 } while (0)
56 
57 #else
58 #define dump_frags(entry, comment)
59 #endif /* DEBUG_CACHE */
60 
61 unsigned longlong
get_cache_size(void)62 get_cache_size(void)
63 {
64 	return cache_size;
65 }
66 
67 int
get_cache_entry_count(void)68 get_cache_entry_count(void)
69 {
70 	return list_size(&cache_entries);
71 }
72 
73 int
get_cache_entry_used_count(void)74 get_cache_entry_used_count(void)
75 {
76 	struct cache_entry *cached;
77 	int i = 0;
78 
79 	foreach (cached, cache_entries)
80 		i += is_object_used(cached);
81 
82 	return i;
83 }
84 
85 int
get_cache_entry_loading_count(void)86 get_cache_entry_loading_count(void)
87 {
88 	struct cache_entry *cached;
89 	int i = 0;
90 
91 	foreach (cached, cache_entries)
92 		i += is_entry_used(cached);
93 
94 	return i;
95 }
96 
97 struct cache_entry *
find_in_cache(struct uri * uri)98 find_in_cache(struct uri *uri)
99 {
100 	struct cache_entry *cached;
101 	int proxy = (uri->protocol == PROTOCOL_PROXY);
102 
103 	foreach (cached, cache_entries) {
104 		struct uri *c_uri;
105 
106 		if (!cached->valid) continue;
107 
108 		c_uri = proxy ? cached->proxy_uri : cached->uri;
109 		if (!compare_uri(c_uri, uri, URI_BASE))
110 			continue;
111 
112 		move_to_top_of_list(cache_entries, cached);
113 
114 		return cached;
115 	}
116 
117 	return NULL;
118 }
119 
120 struct cache_entry *
get_cache_entry(struct uri * uri)121 get_cache_entry(struct uri *uri)
122 {
123 	struct cache_entry *cached = find_in_cache(uri);
124 
125 	assertm(!uri->fragment, "Fragment in URI (%s)", struri(uri));
126 
127 	if (cached) return cached;
128 
129 	shrink_memory(0);
130 
131 	cached = mem_calloc(1, sizeof(*cached));
132 	if (!cached) return NULL;
133 
134 	cached->uri = get_proxied_uri(uri);
135 	if (!cached->uri) {
136 		mem_free(cached);
137 		return NULL;
138 	}
139 
140 	cached->proxy_uri = get_proxy_uri(uri, NULL);
141 	if (!cached->proxy_uri) {
142 		done_uri(cached->uri);
143 		mem_free(cached);
144 		return NULL;
145 	}
146 	cached->incomplete = 1;
147 	cached->valid = 1;
148 
149 	init_list(cached->frag);
150 	cached->id = id_counter++;
151 	object_nolock(cached, "cache_entry"); /* Debugging purpose. */
152 
153 	cached->box_item = add_listbox_leaf(&cache_browser, NULL, cached);
154 
155 	add_to_list(cache_entries, cached);
156 
157 	return cached;
158 }
159 
160 static int
cache_entry_has_expired(struct cache_entry * cached)161 cache_entry_has_expired(struct cache_entry *cached)
162 {
163 	timeval_T now;
164 
165 	timeval_now(&now);
166 
167 	return timeval_cmp(&cached->max_age, &now) <= 0;
168 }
169 
170 struct cache_entry *
get_validated_cache_entry(struct uri * uri,enum cache_mode cache_mode)171 get_validated_cache_entry(struct uri *uri, enum cache_mode cache_mode)
172 {
173 	struct cache_entry *cached;
174 
175 	/* We have to check if something should be reloaded */
176 	if (cache_mode > CACHE_MODE_NORMAL)
177 		return NULL;
178 
179 	/* We only consider complete entries */
180 	cached = find_in_cache(uri);
181 	if (!cached || cached->incomplete)
182 		return NULL;
183 
184 	/* Check if the entry can be deleted */
185 	/* FIXME: This does not make sense to me. Why should the usage pattern
186 	 * of the cache entry matter? Only reason I can think of is to avoid
187 	 * reloading when spawning a new tab which could potentially be a big
188 	 * penalty but shouldn't that be taken care of on a higher level?
189 	 * --jonas */
190 	if (is_object_used(cached)) {
191 #if 0
192 		/* Never use expired entries. */
193 		/* Disabled because it hurts usability too much. */
194 		if (cached->expire && cache_entry_has_expired(cached))
195 			return NULL;
196 #endif
197 		return cached;
198 	}
199 
200 	/* A bit of a gray zone. Delete the entry if the it has the stricktest
201 	 * cache mode and we don't want the most aggressive mode or we have to
202 	 * remove the redirect or the entry expired. Please enlighten me.
203 	 * --jonas */
204 	if ((cached->cache_mode == CACHE_MODE_NEVER && cache_mode != CACHE_MODE_ALWAYS)
205 	    || (cached->redirect && !get_opt_bool("document.cache.cache_redirects"))
206 	    || (cached->expire && cache_entry_has_expired(cached))) {
207 		delete_cache_entry(cached);
208 		return NULL;
209 	}
210 
211 	return cached;
212 }
213 
214 int
cache_entry_is_valid(struct cache_entry * cached)215 cache_entry_is_valid(struct cache_entry *cached)
216 {
217 	struct cache_entry *valid_cached;
218 
219 	foreach (valid_cached, cache_entries) {
220 		if (valid_cached == cached)
221 			return 1;
222 	}
223 
224 	return 0;
225 }
226 
227 
228 struct cache_entry *
follow_cached_redirects(struct cache_entry * cached)229 follow_cached_redirects(struct cache_entry *cached)
230 {
231 	int redirects = 0;
232 
233 	while (cached) {
234 		if (!cached->redirect) {
235 			/* XXX: This is not quite true, but does that difference
236 			 * matter here? */
237 			return cached;
238 		}
239 
240 		if (++redirects > MAX_REDIRECTS) break;
241 
242 		cached = find_in_cache(cached->redirect);
243 	}
244 
245 	return NULL;
246 }
247 
248 struct cache_entry *
get_redirected_cache_entry(struct uri * uri)249 get_redirected_cache_entry(struct uri *uri)
250 {
251 	struct cache_entry *cached = find_in_cache(uri);
252 
253 	return cached ? follow_cached_redirects(cached) : NULL;
254 }
255 
256 
257 static inline void
enlarge_entry(struct cache_entry * cached,off_t size)258 enlarge_entry(struct cache_entry *cached, off_t size)
259 {
260 	cached->data_size += size;
261 	assertm(cached->data_size >= 0,
262 		"cache entry data_size underflow: %ld", cached->data_size);
263 	if_assert_failed { cached->data_size = 0; }
264 
265 	cache_size += size;
266 	assertm(cache_size >= 0, "cache_size underflow: %ld", cache_size);
267 	if_assert_failed { cache_size = 0; }
268 }
269 
270 
271 #define CACHE_PAD(x) (((x) | 0x3fff) + 1)
272 
273 /* One byte is reserved for data in struct fragment. */
274 #define FRAGSIZE(x) (sizeof(struct fragment) + (x) - 1)
275 
276 /* We store the fragments themselves in a private vault, safely separated from
277  * the rest of memory structures. If we lived in the main libc memory pool, we
278  * would trigger annoying pathological behaviour like artificially enlarging
279  * the memory pool to 50M, then securing it with some stupid cookie record at
280  * the top and then no matter how you flush the cache the data segment is still
281  * 50M big.
282  *
283  * Cool, but we don't want that, so fragments (where the big data is stored)
284  * live in their little mmap()ed worlds. There is some overhead, but if we
285  * assume single fragment per cache entry and page size (mmap() allocation
286  * granularity) 4096, for a squad of ten 1kb documents this amounts 30kb.
287  * That's not *that* horrible when you realize that the freshmeat front page
288  * takes 300kb in memory and we usually do not deal with documents so small
289  * that max. 4kb overhead would be visible there.
290  *
291  * The alternative would be of course to manage an entire custom memory pool,
292  * but that is feasible only when we are able to resize it efficiently. We
293  * aren't, except on Linux.
294  *
295  * Of course for all this to really completely prevent the pathological cases,
296  * we need to stuff the rendered documents in too, because they seem to amount
297  * the major memory bursts. */
298 
299 static struct fragment *
frag_alloc(size_t size)300 frag_alloc(size_t size)
301 {
302 	struct fragment *f = mem_mmap_alloc(FRAGSIZE(size));
303 
304 	if (!f) return NULL;
305 	memset(f, 0, FRAGSIZE(size));
306 	return f;
307 }
308 
309 static struct fragment *
frag_realloc(struct fragment * f,size_t size)310 frag_realloc(struct fragment *f, size_t size)
311 {
312 	return mem_mmap_realloc(f, FRAGSIZE(f->real_length), FRAGSIZE(size));
313 }
314 
315 static void
frag_free(struct fragment * f)316 frag_free(struct fragment *f)
317 {
318 	mem_mmap_free(f, FRAGSIZE(f->real_length));
319 }
320 
321 
322 /* Concatenate overlapping fragments. */
323 static void
remove_overlaps(struct cache_entry * cached,struct fragment * f,int * trunc)324 remove_overlaps(struct cache_entry *cached, struct fragment *f, int *trunc)
325 {
326 	off_t f_end_offset = f->offset + f->length;
327 
328 	/* Iterate thru all fragments we still overlap to. */
329 	while (list_has_next(cached->frag, f)
330 		&& f_end_offset > f->next->offset) {
331 		struct fragment *nf;
332 		off_t end_offset = f->next->offset + f->next->length;
333 
334 		if (f_end_offset < end_offset) {
335 			/* We end before end of the following fragment, though.
336 			 * So try to append overlapping part of that fragment
337 			 * to us. */
338 			nf = frag_realloc(f, end_offset - f->offset);
339 			if (nf) {
340 				nf->prev->next = nf;
341 				nf->next->prev = nf;
342 				f = nf;
343 
344 				if (memcmp(f->data + f->next->offset - f->offset,
345 					   f->next->data,
346 					   f->offset + f->length - f->next->offset))
347 					*trunc = 1;
348 
349 				memcpy(f->data + f->length,
350 				       f->next->data + f_end_offset - f->next->offset,
351 				       end_offset - f_end_offset);
352 
353 				enlarge_entry(cached, end_offset - f_end_offset);
354 				f->length = f->real_length = end_offset - f->offset;
355 			}
356 
357 		} else {
358 			/* We will just discard this, it's complete subset of
359 			 * our new fragment. */
360 			if (memcmp(f->data + f->next->offset - f->offset,
361 				   f->next->data,
362 				   f->next->length))
363 				*trunc = 1;
364 		}
365 
366 		/* Remove the fragment, it influences our new one! */
367 		nf = f->next;
368 		enlarge_entry(cached, -nf->length);
369 		del_from_list(nf);
370 		frag_free(nf);
371 	}
372 }
373 
374 /* Note that this function is maybe overcommented, but I'm certainly not
375  * unhappy from that. */
376 int
add_fragment(struct cache_entry * cached,off_t offset,const unsigned char * data,ssize_t length)377 add_fragment(struct cache_entry *cached, off_t offset,
378 	     const unsigned char *data, ssize_t length)
379 {
380 	struct fragment *f, *nf;
381 	int trunc = 0;
382 	off_t end_offset;
383 
384 	if (!length) return 0;
385 
386 	end_offset = offset + length;
387 	if (cached->length < end_offset)
388 		cached->length = end_offset;
389 
390 	/* id marks each entry, and change each time it's modified,
391 	 * used in HTML renderer. */
392 	cached->id = id_counter++;
393 
394 	/* Possibly insert the new data in the middle of existing fragment. */
395 	foreach (f, cached->frag) {
396 		int ret = 0;
397 		off_t f_end_offset = f->offset + f->length;
398 
399 		/* No intersection? */
400 		if (f->offset > offset) break;
401 		if (f_end_offset < offset) continue;
402 
403 		if (end_offset > f_end_offset) {
404 			/* Overlap - we end further than original fragment. */
405 
406 			if (end_offset - f->offset <= f->real_length) {
407 				/* We fit here, so let's enlarge it by delta of
408 				 * old and new end.. */
409 				enlarge_entry(cached, end_offset - f_end_offset);
410 				/* ..and length is now total length. */
411 				f->length = end_offset - f->offset;
412 
413 				ret = 1; /* It was enlarged. */
414 			} else {
415 				/* We will reduce fragment length only to the
416 				 * starting non-interjecting size and add new
417 				 * fragment directly after this one. */
418 				f->length = offset - f->offset;
419 				f = f->next;
420 				break;
421 			}
422 
423 		} /* else We are subset of original fragment. */
424 
425 		/* Copy the stuff over there. */
426 		memcpy(f->data + offset - f->offset, data, length);
427 
428 		remove_overlaps(cached, f, &trunc);
429 
430 		/* We truncate the entry even if the data contents is the
431 		 * same as what we have in the fragment, because that does
432 		 * not mean that what is going to follow won't differ, This
433 		 * is a serious problem when rendering HTML frame with onload
434 		 * snippets - we "guess" the rest of the document here,
435 		 * interpret the snippet, then it turns out in the real
436 		 * document the snippet is different and we are in trouble.
437 		 *
438 		 * Debugging this took me about 1.5 day (really), the diff with
439 		 * all the debugging print commands amounted about 20kb (gdb
440 		 * wasn't much useful since it stalled the download, de facto
441 		 * eliminating the bad behaviour). */
442 		truncate_entry(cached, end_offset, 0);
443 
444 		dump_frags(cached, "add_fragment");
445 
446 		return ret;
447 	}
448 
449 	/* Make up new fragment. */
450 	nf = frag_alloc(CACHE_PAD(length));
451 	if (!nf) return -1;
452 
453 	nf->offset = offset;
454 	nf->length = length;
455 	nf->real_length = CACHE_PAD(length);
456 	memcpy(nf->data, data, length);
457 	add_at_pos(f->prev, nf);
458 
459 	enlarge_entry(cached, length);
460 
461 	remove_overlaps(cached, nf, &trunc);
462 	if (trunc) truncate_entry(cached, end_offset, 0);
463 
464 	dump_frags(cached, "add_fragment");
465 
466 	return 1;
467 }
468 
469 /* Try to defragment the cache entry. Defragmentation will not be possible
470  * if there is a gap in the fragments; if we have bytes 1-100 in one fragment
471  * and bytes 201-300 in the second, we must leave those two fragments separate
472  * so that the fragment for bytes 101-200 can later be inserted. However,
473  * if we have the fragments for bytes 1-100, 101-200, and 201-300, we will
474  * catenate them into one new fragment and replace the original fragments
475  * with that new fragment.
476  *
477  * If are no fragments, return NULL. If there is no fragment with byte 1,
478  * return NULL. Otherwise, return the first fragment, whether or not it was
479  * possible to fully defragment the entry. */
480 struct fragment *
get_cache_fragment(struct cache_entry * cached)481 get_cache_fragment(struct cache_entry *cached)
482 {
483 	struct fragment *first_frag, *adj_frag, *frag, *new_frag;
484 	int new_frag_len;
485 
486 	if (list_empty(cached->frag))
487 		return NULL;
488 
489 	first_frag = cached->frag.next;
490 	if (first_frag->offset)
491 		return NULL;
492 
493 	/* Only one fragment so no defragmentation is needed */
494 	if (list_is_singleton(cached->frag))
495 		return first_frag;
496 
497 	/* Find the first pair of fragments with a gap in between. Only
498 	 * fragments up to the first gap can be defragmented. */
499 	for (adj_frag = first_frag->next; adj_frag != (void *) &cached->frag;
500 	     adj_frag = adj_frag->next) {
501 		long gap = adj_frag->offset
502 			    - (adj_frag->prev->offset + adj_frag->prev->length);
503 
504 		if (gap > 0) break;
505 		if (gap == 0) continue;
506 
507 		INTERNAL("fragments overlap");
508 		return NULL;
509 	}
510 
511 	/* There is a gap between the first two fragments, so we can't
512 	 * defragment anything. */
513 	if (adj_frag == first_frag->next)
514 		return first_frag;
515 
516 	/* Calculate the length of the defragmented fragment. */
517 	for (new_frag_len = 0, frag = first_frag;
518 	     frag != adj_frag;
519 	     frag = frag->next)
520 		new_frag_len += frag->length;
521 
522 	/* XXX: If the defragmentation fails because of allocation failure,
523 	 * fall back to return the first fragment and pretend all is well. */
524 	/* FIXME: Is this terribly brain-dead? It corresponds to the semantic of
525 	 * the code this extended version of the old defrag_entry() is supposed
526 	 * to replace. --jonas */
527 	new_frag = frag_alloc(new_frag_len);
528 	if (!new_frag)
529 		return first_frag->length ? first_frag : NULL;
530 
531 	new_frag->length = new_frag_len;
532 	new_frag->real_length = new_frag_len;
533 
534 	for (new_frag_len = 0, frag = first_frag;
535 	     frag != adj_frag;
536 	     frag = frag->next) {
537 		struct fragment *tmp = frag;
538 
539 		memcpy(new_frag->data + new_frag_len, frag->data, frag->length);
540 		new_frag_len += frag->length;
541 
542 		frag = frag->prev;
543 		del_from_list(tmp);
544 		frag_free(tmp);
545 	}
546 
547 	add_to_list(cached->frag, new_frag);
548 
549 	dump_frags(cached, "get_cache_fragment");
550 
551 	return new_frag;
552 }
553 
554 static void
delete_fragment(struct cache_entry * cached,struct fragment * f)555 delete_fragment(struct cache_entry *cached, struct fragment *f)
556 {
557 	while ((void *) f != &cached->frag) {
558 		struct fragment *tmp = f->next;
559 
560 		enlarge_entry(cached, -f->length);
561 		del_from_list(f);
562 		frag_free(f);
563 		f = tmp;
564 	}
565 }
566 
567 static void
truncate_entry(struct cache_entry * cached,off_t offset,int final)568 truncate_entry(struct cache_entry *cached, off_t offset, int final)
569 {
570 	struct fragment *f;
571 
572 	if (cached->length > offset) {
573 		cached->length = offset;
574 		cached->incomplete = 1;
575 	}
576 
577 	foreach (f, cached->frag) {
578 		off_t size = offset - f->offset;
579 
580 		/* XXX: is zero length fragment really legal here ? --Zas */
581 		assert(f->length >= 0);
582 
583 		if (size >= f->length) continue;
584 
585 		if (size > 0) {
586 			enlarge_entry(cached, -(f->length - size));
587 			f->length = size;
588 
589 			if (final) {
590 				struct fragment *nf;
591 
592 				nf = frag_realloc(f, f->length);
593 				if (nf) {
594 					nf->next->prev = nf;
595 					nf->prev->next = nf;
596 					f = nf;
597 					f->real_length = f->length;
598 				}
599 			}
600 
601 			f = f->next;
602 		}
603 
604 		delete_fragment(cached, f);
605 
606 		dump_frags(cached, "truncate_entry");
607 		return;
608 	}
609 }
610 
611 void
free_entry_to(struct cache_entry * cached,off_t offset)612 free_entry_to(struct cache_entry *cached, off_t offset)
613 {
614 	struct fragment *f;
615 
616 	foreach (f, cached->frag) {
617 		if (f->offset + f->length <= offset) {
618 			struct fragment *tmp = f;
619 
620 			enlarge_entry(cached, -f->length);
621 			f = f->prev;
622 			del_from_list(tmp);
623 			frag_free(tmp);
624 		} else if (f->offset < offset) {
625 			off_t size = offset - f->offset;
626 
627 			enlarge_entry(cached, -size);
628 			f->length -= size;
629 			memmove(f->data, f->data + size, f->length);
630 			f->offset = offset;
631 		} else break;
632 	}
633 }
634 
635 void
delete_entry_content(struct cache_entry * cached)636 delete_entry_content(struct cache_entry *cached)
637 {
638 	enlarge_entry(cached, -cached->data_size);
639 
640 	while (cached->frag.next != (void *) &cached->frag) {
641 		struct fragment *f = cached->frag.next;
642 
643 		del_from_list(f);
644 		frag_free(f);
645 	}
646 	cached->id = id_counter++;
647 	cached->length = 0;
648 	cached->incomplete = 1;
649 
650 	mem_free_set(&cached->last_modified, NULL);
651 	mem_free_set(&cached->etag, NULL);
652 }
653 
654 static void
done_cache_entry(struct cache_entry * cached)655 done_cache_entry(struct cache_entry *cached)
656 {
657 	assertm(!is_object_used(cached), "deleting locked cache entry");
658 	assertm(!is_entry_used(cached), "deleting loading cache entry");
659 
660 	delete_entry_content(cached);
661 
662 	if (cached->box_item) done_listbox_item(&cache_browser, cached->box_item);
663 
664 	if (cached->uri) done_uri(cached->uri);
665 	if (cached->proxy_uri) done_uri(cached->proxy_uri);
666 	if (cached->redirect) done_uri(cached->redirect);
667 
668 	mem_free_if(cached->head);
669 	mem_free_if(cached->content_type);
670 	mem_free_if(cached->last_modified);
671 	mem_free_if(cached->ssl_info);
672 	mem_free_if(cached->encoding_info);
673 	mem_free_if(cached->etag);
674 
675 	mem_free(cached);
676 }
677 
678 void
delete_cache_entry(struct cache_entry * cached)679 delete_cache_entry(struct cache_entry *cached)
680 {
681 	del_from_list(cached);
682 
683 	done_cache_entry(cached);
684 }
685 
686 
687 void
normalize_cache_entry(struct cache_entry * cached,off_t truncate_length)688 normalize_cache_entry(struct cache_entry *cached, off_t truncate_length)
689 {
690 	if (truncate_length < 0)
691 		return;
692 
693 	truncate_entry(cached, truncate_length, 1);
694 	cached->incomplete = 0;
695 	cached->preformatted = 0;
696 }
697 
698 
699 struct uri *
redirect_cache(struct cache_entry * cached,unsigned char * location,int get,int incomplete)700 redirect_cache(struct cache_entry *cached, unsigned char *location,
701 	       int get, int incomplete)
702 {
703 	unsigned char *uristring;
704 
705 	/* XXX: I am a little puzzled whether we should only use the cache
706 	 * entry's URI if it is valid. Hopefully always using it won't hurt.
707 	 * Currently we handle direction redirects where "/" should be appended
708 	 * special dunno if join_urls() could be made to handle that.
709 	 * --jonas */
710 	/* XXX: We are assuming here that incomplete will only be zero when
711 	 * doing these fake redirects which only purpose is to add an ending
712 	 * slash *cough* dirseparator to the end of the URI. */
713 	if (incomplete == 0 && location[0] == '/' && location[1] == 0) {
714 		/* To be sure use get_uri_string() to get rid of post data */
715 		uristring = get_uri_string(cached->uri, URI_ORIGINAL);
716 		if (uristring) add_to_strn(&uristring, location);
717 	} else {
718 		uristring = join_urls(cached->uri, location);
719 	}
720 
721 	if (!uristring) return NULL;
722 
723 	/* Only add the post data if the redirect should not use GET method.
724 	 * This is tied to the HTTP handling of the 303 and (if the
725 	 * protocol.http.bugs.broken_302_redirect is enabled) the 302 status
726 	 * code handling. */
727 	if (cached->uri->post
728 	    && !cached->redirect_get
729 	    && !get) {
730 		/* XXX: Add POST_CHAR and post data assuming URI components
731 		 * belong to one string. */
732 
733 		/* To be certain we don't append post data twice in some
734 		 * conditions... --Zas */
735 		assert(!strchr(uristring, POST_CHAR));
736 
737 		add_to_strn(&uristring, cached->uri->post - 1);
738 	}
739 
740 	if (cached->redirect) done_uri(cached->redirect);
741 	cached->redirect = get_uri(uristring, 0);
742 	cached->redirect_get = get;
743 	if (incomplete >= 0) cached->incomplete = incomplete;
744 
745 	mem_free(uristring);
746 
747 	return cached->redirect;
748 }
749 
750 
751 void
garbage_collection(int whole)752 garbage_collection(int whole)
753 {
754 	struct cache_entry *cached;
755 	/* We recompute cache_size when scanning cache entries, to ensure
756 	 * consistency. */
757 	unsigned longlong old_cache_size = 0;
758 	/* The maximal cache size tolerated by user. Note that this is only
759 	 * size of the "just stored" unused cache entries, used cache entries
760 	 * are not counted to that. */
761 	unsigned longlong opt_cache_size = get_opt_long("document.cache.memory.size");
762 	/* The low-treshold cache size. Basically, when the cache size is
763 	 * higher than opt_cache_size, we free the cache so that there is no
764 	 * more than this value in the cache anymore. This is to make sure we
765 	 * aren't cleaning cache too frequently when working with a lot of
766 	 * small cache entries but rather free more and then let it grow a
767 	 * little more as well. */
768 	unsigned longlong gc_cache_size = opt_cache_size * MEMORY_CACHE_GC_PERCENT / 100;
769 	/* The cache size we aim to reach. */
770 	unsigned longlong new_cache_size = cache_size;
771 #ifdef DEBUG_CACHE
772 	/* Whether we've hit an used (unfreeable) entry when collecting
773 	 * garbage. */
774 	int obstacle_entry = 0;
775 #endif
776 
777 #ifdef DEBUG_CACHE
778 	DBG("gc whole=%d opt_cache_size=%ld gc_cache_size=%ld",
779 	      whole, opt_cache_size,gc_cache_size);
780 #endif
781 
782 	if (!whole && cache_size <= opt_cache_size) return;
783 
784 
785 	/* Scanning cache, pass #1:
786 	 * Weed out the used cache entries from @new_cache_size, so that we
787 	 * will work only with the unused entries from then on. Also ensure
788 	 * that @cache_size is in sync. */
789 
790 	foreach (cached, cache_entries) {
791 		old_cache_size += cached->data_size;
792 
793 		if (!is_object_used(cached) && !is_entry_used(cached))
794 			continue;
795 
796 		assertm(new_cache_size >= cached->data_size,
797 			"cache_size (%ld) underflow: subtracting %ld from %ld",
798 			cache_size, cached->data_size, new_cache_size);
799 
800 		new_cache_size -= cached->data_size;
801 
802 		if_assert_failed { new_cache_size = 0; }
803 	}
804 
805 	assertm(old_cache_size == cache_size,
806 		"cache_size out of sync: %ld != (actual) %ld",
807 		cache_size, old_cache_size);
808 	if_assert_failed { cache_size = old_cache_size; }
809 
810 	if (!whole && new_cache_size <= opt_cache_size) return;
811 
812 
813 	/* Scanning cache, pass #2:
814 	 * Mark potential targets for destruction, from the oldest to the
815 	 * newest. */
816 
817 	foreachback (cached, cache_entries) {
818 		/* We would have shrinked enough already? */
819 		if (!whole && new_cache_size <= gc_cache_size)
820 			goto shrinked_enough;
821 
822 		/* Skip used cache entries. */
823 		if (is_object_used(cached) || is_entry_used(cached)) {
824 #ifdef DEBUG_CACHE
825 			obstacle_entry = 1;
826 #endif
827 			cached->gc_target = 0;
828 			continue;
829 		}
830 
831 		/* FIXME: Optionally take cached->max_age into consideration,
832 		 * but that will probably complicate things too much. We'd have
833 		 * to sort entries so prioritize removing the oldest entries. */
834 
835 		assertm(new_cache_size >= cached->data_size,
836 			"cache_size (%ld) underflow: subtracting %ld from %ld",
837 			cache_size, cached->data_size, new_cache_size);
838 
839 		/* Mark me for destruction, sir. */
840 		cached->gc_target = 1;
841 		new_cache_size -= cached->data_size;
842 
843 		if_assert_failed { new_cache_size = 0; }
844 	}
845 
846 	/* If we'd free the whole cache... */
847 	assertm(new_cache_size == 0,
848 		"cache_size (%ld) overflow: %ld",
849 		cache_size, new_cache_size);
850 	if_assert_failed { new_cache_size = 0; }
851 
852 shrinked_enough:
853 
854 
855 	/* Now turn around and start walking in the opposite direction. */
856 	cached = cached->next;
857 
858 	/* Something is strange when we decided all is ok before dropping any
859 	 * cache entry. */
860 	if ((void *) cached == &cache_entries) return;
861 
862 
863 	if (!whole) {
864 		struct cache_entry *entry;
865 
866 		/* Scanning cache, pass #3:
867 		 * Walk back in the cache and unmark the cache entries which
868 		 * could still fit into the cache. */
869 
870 		/* This makes sense when the newest entry is HUGE and after it,
871 		 * there's just plenty of tiny entries. By this point, all the
872 		 * tiny entries would be marked for deletion even though it'd
873 		 * be enough to free the huge entry. This actually fixes that
874 		 * situation. */
875 
876 		for (entry = cached; (void *) entry != &cache_entries; entry = entry->next) {
877 			unsigned longlong newer_cache_size = new_cache_size + entry->data_size;
878 
879 			if (newer_cache_size > gc_cache_size)
880 				continue;
881 
882 			new_cache_size = newer_cache_size;
883 			entry->gc_target = 0;
884 		}
885 	}
886 
887 
888 	/* Scanning cache, pass #4:
889 	 * Destroy the marked entries. So sad, but that's life, bro'. */
890 
891 	for (; (void *) cached != &cache_entries; ) {
892 		cached = cached->next;
893 		if (cached->prev->gc_target)
894 			delete_cache_entry(cached->prev);
895 	}
896 
897 
898 #ifdef DEBUG_CACHE
899 	if ((whole || !obstacle_entry) && cache_size > gc_cache_size) {
900 		DBG("garbage collection doesn't work, cache size %ld > %ld, "
901 		      "document.cache.memory.size set to: %ld bytes",
902 		      cache_size, gc_cache_size,
903 		      get_opt_long("document.cache.memory.size"));
904 	}
905 #endif
906 }
907