1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * soup-cache.c
4  *
5  * Copyright (C) 2009, 2010 Igalia S.L.
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Library General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Library General Public License for more details.
16  *
17  * You should have received a copy of the GNU Library General Public License
18  * along with this library; see the file COPYING.LIB.  If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /* TODO:
24  * - Need to hook the feature in the sync SoupSession.
25  * - Need more tests.
26  */
27 
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31 
32 #include <string.h>
33 #include <glib/gstdio.h>
34 
35 #include "soup-cache.h"
36 #include "soup-body-input-stream.h"
37 #include "soup-cache-client-input-stream.h"
38 #include "soup-cache-input-stream.h"
39 #include "soup-cache-private.h"
40 #include "soup-content-processor.h"
41 #include "soup-message-private.h"
42 #include "soup.h"
43 #include "soup-message-private.h"
44 
45 /**
46  * SECTION:soup-cache
47  * @short_description: Caching support
48  *
49  * #SoupCache implements a file-based cache for HTTP resources.
50  */
51 
52 static SoupSessionFeatureInterface *soup_cache_default_feature_interface;
53 static void soup_cache_session_feature_init (SoupSessionFeatureInterface *feature_interface, gpointer interface_data);
54 
55 static SoupContentProcessorInterface *soup_cache_default_content_processor_interface;
56 static void soup_cache_content_processor_init (SoupContentProcessorInterface *interface, gpointer interface_data);
57 
58 #define DEFAULT_MAX_SIZE 50 * 1024 * 1024
59 #define MAX_ENTRY_DATA_PERCENTAGE 10 /* Percentage of the total size
60 	                                of the cache that can be
61 	                                filled by a single entry */
62 
63 /*
64  * Version 2: cache is now saved in soup.cache2. Added the version
65  * number to the beginning of the file.
66  *
67  * Version 3: added HTTP status code to the cache entries.
68  *
69  * Version 4: replaced several types.
70  *   - freshness_lifetime,corrected_initial_age,response_time: time_t -> guint32
71  *   - status_code: guint -> guint16
72  *   - hits: guint -> guint32
73  *
74  * Version 5: key is no longer stored on disk as it can be easily
75  * built from the URI. Apart from that some fields in the
76  * SoupCacheEntry have changed:
77  *   - entry key is now a uint32 instead of a (char *).
78  *   - added uri, used to check for collisions
79  *   - removed filename, it's built from the entry key.
80  */
81 #define SOUP_CACHE_CURRENT_VERSION 5
82 
83 #define OLD_SOUP_CACHE_FILE "soup.cache"
84 #define SOUP_CACHE_FILE "soup.cache2"
85 
86 #define SOUP_CACHE_HEADERS_FORMAT "{ss}"
87 #define SOUP_CACHE_PHEADERS_FORMAT "(sbuuuuuqa" SOUP_CACHE_HEADERS_FORMAT ")"
88 #define SOUP_CACHE_ENTRIES_FORMAT "(qa" SOUP_CACHE_PHEADERS_FORMAT ")"
89 
90 /* Basically the same format than above except that some strings are
91    prepended with &. This way the GVariant returns a pointer to the
92    data instead of duplicating the string */
93 #define SOUP_CACHE_DECODE_HEADERS_FORMAT "{&s&s}"
94 
95 
96 typedef struct _SoupCacheEntry {
97 	guint32 key;
98 	char *uri;
99 	guint32 freshness_lifetime;
100 	gboolean must_revalidate;
101 	gsize length;
102 	guint32 corrected_initial_age;
103 	guint32 response_time;
104 	gboolean dirty;
105 	gboolean being_validated;
106 	SoupMessageHeaders *headers;
107 	guint32 hits;
108 	GCancellable *cancellable;
109 	guint16 status_code;
110 } SoupCacheEntry;
111 
112 struct _SoupCachePrivate {
113 	char *cache_dir;
114 	GHashTable *cache;
115 	guint n_pending;
116 	SoupSession *session;
117 	SoupCacheType cache_type;
118 	guint size;
119 	guint max_size;
120 	guint max_entry_data_size; /* Computed value. Here for performance reasons */
121 	GList *lru_start;
122 };
123 
124 enum {
125 	PROP_0,
126 	PROP_CACHE_DIR,
127 	PROP_CACHE_TYPE
128 };
129 
130 G_DEFINE_TYPE_WITH_CODE (SoupCache, soup_cache, G_TYPE_OBJECT,
131                          G_ADD_PRIVATE (SoupCache)
132 			 G_IMPLEMENT_INTERFACE (SOUP_TYPE_SESSION_FEATURE,
133 						soup_cache_session_feature_init)
134 			 G_IMPLEMENT_INTERFACE (SOUP_TYPE_CONTENT_PROCESSOR,
135 						soup_cache_content_processor_init))
136 
137 static gboolean soup_cache_entry_remove (SoupCache *cache, SoupCacheEntry *entry, gboolean purge);
138 static void make_room_for_new_entry (SoupCache *cache, guint length_to_add);
139 static gboolean cache_accepts_entries_of_size (SoupCache *cache, guint length_to_add);
140 
141 static GFile *
get_file_from_entry(SoupCache * cache,SoupCacheEntry * entry)142 get_file_from_entry (SoupCache *cache, SoupCacheEntry *entry)
143 {
144 	char *filename = g_strdup_printf ("%s%s%u", cache->priv->cache_dir,
145 					  G_DIR_SEPARATOR_S, (guint) entry->key);
146 	GFile *file = g_file_new_for_path (filename);
147 	g_free (filename);
148 
149 	return file;
150 }
151 
152 static SoupCacheability
get_cacheability(SoupCache * cache,SoupMessage * msg)153 get_cacheability (SoupCache *cache, SoupMessage *msg)
154 {
155 	SoupCacheability cacheability;
156 	const char *cache_control, *content_type;
157 	gboolean has_max_age = FALSE;
158 
159 	/* 1. The request method must be cacheable */
160 	if (msg->method == SOUP_METHOD_GET)
161 		cacheability = SOUP_CACHE_CACHEABLE;
162 	else if (msg->method == SOUP_METHOD_HEAD ||
163 		 msg->method == SOUP_METHOD_TRACE ||
164 		 msg->method == SOUP_METHOD_CONNECT)
165 		return SOUP_CACHE_UNCACHEABLE;
166 	else
167 		return (SOUP_CACHE_UNCACHEABLE | SOUP_CACHE_INVALIDATES);
168 
169 	content_type = soup_message_headers_get_content_type (msg->response_headers, NULL);
170 	if (content_type && !g_ascii_strcasecmp (content_type, "multipart/x-mixed-replace"))
171 		return SOUP_CACHE_UNCACHEABLE;
172 
173 	cache_control = soup_message_headers_get_list (msg->response_headers, "Cache-Control");
174 	if (cache_control && *cache_control) {
175 		GHashTable *hash;
176 		SoupCachePrivate *priv = soup_cache_get_instance_private (cache);
177 
178 		hash = soup_header_parse_param_list (cache_control);
179 
180 		/* Shared caches MUST NOT store private resources */
181 		if (priv->cache_type == SOUP_CACHE_SHARED) {
182 			if (g_hash_table_lookup_extended (hash, "private", NULL, NULL)) {
183 				soup_header_free_param_list (hash);
184 				return SOUP_CACHE_UNCACHEABLE;
185 			}
186 		}
187 
188 		/* 2. The 'no-store' cache directive does not appear in the
189 		 * headers
190 		 */
191 		if (g_hash_table_lookup_extended (hash, "no-store", NULL, NULL)) {
192 			soup_header_free_param_list (hash);
193 			return SOUP_CACHE_UNCACHEABLE;
194 		}
195 
196 		if (g_hash_table_lookup_extended (hash, "max-age", NULL, NULL))
197 			has_max_age = TRUE;
198 
199 		/* This does not appear in section 2.1, but I think it makes
200 		 * sense to check it too?
201 		 */
202 		if (g_hash_table_lookup_extended (hash, "no-cache", NULL, NULL)) {
203 			soup_header_free_param_list (hash);
204 			return SOUP_CACHE_UNCACHEABLE;
205 		}
206 
207 		soup_header_free_param_list (hash);
208 	}
209 
210 	/* Section 13.9 */
211 	if ((soup_message_get_uri (msg))->query &&
212 	    !soup_message_headers_get_one (msg->response_headers, "Expires") &&
213 	    !has_max_age)
214 		return SOUP_CACHE_UNCACHEABLE;
215 
216 	switch (msg->status_code) {
217 	case SOUP_STATUS_PARTIAL_CONTENT:
218 		/* We don't cache partial responses, but they only
219 		 * invalidate cached full responses if the headers
220 		 * don't match.
221 		 */
222 		cacheability = SOUP_CACHE_UNCACHEABLE;
223 		break;
224 
225 	case SOUP_STATUS_NOT_MODIFIED:
226 		/* A 304 response validates an existing cache entry */
227 		cacheability = SOUP_CACHE_VALIDATES;
228 		break;
229 
230 	case SOUP_STATUS_MULTIPLE_CHOICES:
231 	case SOUP_STATUS_MOVED_PERMANENTLY:
232 	case SOUP_STATUS_GONE:
233 		/* FIXME: cacheable unless indicated otherwise */
234 		cacheability = SOUP_CACHE_UNCACHEABLE;
235 		break;
236 
237 	case SOUP_STATUS_FOUND:
238 	case SOUP_STATUS_TEMPORARY_REDIRECT:
239 		/* FIXME: cacheable if explicitly indicated */
240 		cacheability = SOUP_CACHE_UNCACHEABLE;
241 		break;
242 
243 	case SOUP_STATUS_SEE_OTHER:
244 	case SOUP_STATUS_FORBIDDEN:
245 	case SOUP_STATUS_NOT_FOUND:
246 	case SOUP_STATUS_METHOD_NOT_ALLOWED:
247 		return (SOUP_CACHE_UNCACHEABLE | SOUP_CACHE_INVALIDATES);
248 
249 	default:
250 		/* Any 5xx status or any 4xx status not handled above
251 		 * is uncacheable but doesn't break the cache.
252 		 */
253 		if ((msg->status_code >= SOUP_STATUS_BAD_REQUEST &&
254 		     msg->status_code <= SOUP_STATUS_FAILED_DEPENDENCY) ||
255 		    msg->status_code >= SOUP_STATUS_INTERNAL_SERVER_ERROR)
256 			return SOUP_CACHE_UNCACHEABLE;
257 
258 		/* An unrecognized 2xx, 3xx, or 4xx response breaks
259 		 * the cache.
260 		 */
261 		if ((msg->status_code > SOUP_STATUS_PARTIAL_CONTENT &&
262 		     msg->status_code < SOUP_STATUS_MULTIPLE_CHOICES) ||
263 		    (msg->status_code > SOUP_STATUS_TEMPORARY_REDIRECT &&
264 		     msg->status_code < SOUP_STATUS_INTERNAL_SERVER_ERROR))
265 			return (SOUP_CACHE_UNCACHEABLE | SOUP_CACHE_INVALIDATES);
266 		break;
267 	}
268 
269 	return cacheability;
270 }
271 
272 /* NOTE: this function deletes the file pointed by the file argument
273  * and also unref's the GFile object representing it.
274  */
275 static void
soup_cache_entry_free(SoupCacheEntry * entry)276 soup_cache_entry_free (SoupCacheEntry *entry)
277 {
278 	g_free (entry->uri);
279 	g_clear_pointer (&entry->headers, soup_message_headers_free);
280 	g_clear_object (&entry->cancellable);
281 
282 	g_slice_free (SoupCacheEntry, entry);
283 }
284 
285 static void
copy_headers(const char * name,const char * value,SoupMessageHeaders * headers)286 copy_headers (const char *name, const char *value, SoupMessageHeaders *headers)
287 {
288 	soup_message_headers_append (headers, name, value);
289 }
290 
291 static void
remove_headers(const char * name,const char * value,SoupMessageHeaders * headers)292 remove_headers (const char *name, const char *value, SoupMessageHeaders *headers)
293 {
294 	soup_message_headers_remove (headers, name);
295 }
296 
297 static char *hop_by_hop_headers[] = {"Connection", "Keep-Alive", "Proxy-Authenticate", "Proxy-Authorization", "TE", "Trailer", "Transfer-Encoding", "Upgrade"};
298 
299 static void
copy_end_to_end_headers(SoupMessageHeaders * source,SoupMessageHeaders * destination)300 copy_end_to_end_headers (SoupMessageHeaders *source, SoupMessageHeaders *destination)
301 {
302 	int i;
303 
304 	soup_message_headers_foreach (source, (SoupMessageHeadersForeachFunc) copy_headers, destination);
305 	for (i = 0; i < G_N_ELEMENTS (hop_by_hop_headers); i++)
306 		soup_message_headers_remove (destination, hop_by_hop_headers[i]);
307 	soup_message_headers_clean_connection_headers (destination);
308 }
309 
310 static guint
soup_cache_entry_get_current_age(SoupCacheEntry * entry)311 soup_cache_entry_get_current_age (SoupCacheEntry *entry)
312 {
313 	time_t now = time (NULL);
314 	time_t resident_time;
315 
316 	resident_time = now - entry->response_time;
317 	return entry->corrected_initial_age + resident_time;
318 }
319 
320 static gboolean
soup_cache_entry_is_fresh_enough(SoupCacheEntry * entry,gint min_fresh)321 soup_cache_entry_is_fresh_enough (SoupCacheEntry *entry, gint min_fresh)
322 {
323 	guint limit = (min_fresh == -1) ? soup_cache_entry_get_current_age (entry) : (guint) min_fresh;
324 	return entry->freshness_lifetime > limit;
325 }
326 
327 static inline guint32
get_cache_key_from_uri(const char * uri)328 get_cache_key_from_uri (const char *uri)
329 {
330 	return (guint32) g_str_hash (uri);
331 }
332 
333 static void
soup_cache_entry_set_freshness(SoupCacheEntry * entry,SoupMessage * msg,SoupCache * cache)334 soup_cache_entry_set_freshness (SoupCacheEntry *entry, SoupMessage *msg, SoupCache *cache)
335 {
336 	const char *cache_control;
337 	const char *expires, *date, *last_modified;
338 
339 	/* Reset these values. We have to do this to ensure that
340 	 * revalidations overwrite previous values for the headers.
341 	 */
342 	entry->must_revalidate = FALSE;
343 	entry->freshness_lifetime = 0;
344 
345 	cache_control = soup_message_headers_get_list (entry->headers, "Cache-Control");
346 	if (cache_control && *cache_control) {
347 		const char *max_age, *s_maxage;
348 		gint64 freshness_lifetime = 0;
349 		GHashTable *hash;
350 		SoupCachePrivate *priv = soup_cache_get_instance_private (cache);
351 
352 		hash = soup_header_parse_param_list (cache_control);
353 
354 		/* Should we re-validate the entry when it goes stale */
355 		entry->must_revalidate = g_hash_table_lookup_extended (hash, "must-revalidate", NULL, NULL);
356 
357 		/* Section 2.3.1 */
358 		if (priv->cache_type == SOUP_CACHE_SHARED) {
359 			s_maxage = g_hash_table_lookup (hash, "s-maxage");
360 			if (s_maxage) {
361 				freshness_lifetime = g_ascii_strtoll (s_maxage, NULL, 10);
362 				if (freshness_lifetime) {
363 					/* Implies proxy-revalidate. TODO: is it true? */
364 					entry->must_revalidate = TRUE;
365 					soup_header_free_param_list (hash);
366 					return;
367 				}
368 			}
369 		}
370 
371 		/* If 'max-age' cache directive is present, use that */
372 		max_age = g_hash_table_lookup (hash, "max-age");
373 		if (max_age)
374 			freshness_lifetime = g_ascii_strtoll (max_age, NULL, 10);
375 
376 		if (freshness_lifetime) {
377 			entry->freshness_lifetime = (guint32) MIN (freshness_lifetime, G_MAXUINT32);
378 			soup_header_free_param_list (hash);
379 			return;
380 		}
381 
382 		soup_header_free_param_list (hash);
383 	}
384 
385 	/* If the 'Expires' response header is present, use its value
386 	 * minus the value of the 'Date' response header
387 	 */
388 	expires = soup_message_headers_get_one (entry->headers, "Expires");
389 	date = soup_message_headers_get_one (entry->headers, "Date");
390 	if (expires && date) {
391 		SoupDate *expires_d, *date_d;
392 		time_t expires_t, date_t;
393 
394 		expires_d = soup_date_new_from_string (expires);
395 		if (expires_d) {
396 			date_d = soup_date_new_from_string (date);
397 
398 			expires_t = soup_date_to_time_t (expires_d);
399 			date_t = soup_date_to_time_t (date_d);
400 
401 			soup_date_free (expires_d);
402 			soup_date_free (date_d);
403 
404 			if (expires_t && date_t) {
405 				entry->freshness_lifetime = (guint32) MAX (expires_t - date_t, 0);
406 				return;
407 			}
408 		} else {
409 			/* If Expires is not a valid date we should
410 			   treat it as already expired, see section
411 			   3.3 */
412 			entry->freshness_lifetime = 0;
413 			return;
414 		}
415 	}
416 
417 	/* Otherwise an heuristic may be used */
418 
419 	/* Heuristics MUST NOT be used with stored responses with
420 	   these status codes (section 2.3.1.1) */
421 	if (entry->status_code != SOUP_STATUS_OK &&
422 	    entry->status_code != SOUP_STATUS_NON_AUTHORITATIVE &&
423 	    entry->status_code != SOUP_STATUS_PARTIAL_CONTENT &&
424 	    entry->status_code != SOUP_STATUS_MULTIPLE_CHOICES &&
425 	    entry->status_code != SOUP_STATUS_MOVED_PERMANENTLY &&
426 	    entry->status_code != SOUP_STATUS_GONE)
427 		goto expire;
428 
429 	/* TODO: attach warning 113 if response's current_age is more
430 	   than 24h (section 2.3.1.1) when using heuristics */
431 
432 	/* Last-Modified based heuristic */
433 	last_modified = soup_message_headers_get_one (entry->headers, "Last-Modified");
434 	if (last_modified) {
435 		SoupDate *soup_date;
436 		time_t now, last_modified_t;
437 
438 		soup_date = soup_date_new_from_string (last_modified);
439 		last_modified_t = soup_date_to_time_t (soup_date);
440 		now = time (NULL);
441 
442 #define HEURISTIC_FACTOR 0.1 /* From Section 2.3.1.1 */
443 
444 		entry->freshness_lifetime = MAX (0, (now - last_modified_t) * HEURISTIC_FACTOR);
445 		soup_date_free (soup_date);
446 	}
447 
448 	return;
449 
450  expire:
451 	/* If all else fails, make the entry expire immediately */
452 	entry->freshness_lifetime = 0;
453 }
454 
455 static SoupCacheEntry *
soup_cache_entry_new(SoupCache * cache,SoupMessage * msg,time_t request_time,time_t response_time)456 soup_cache_entry_new (SoupCache *cache, SoupMessage *msg, time_t request_time, time_t response_time)
457 {
458 	SoupCacheEntry *entry;
459 	const char *date;
460 
461 	entry = g_slice_new0 (SoupCacheEntry);
462 	entry->dirty = FALSE;
463 	entry->being_validated = FALSE;
464 	entry->status_code = msg->status_code;
465 	entry->response_time = response_time;
466 	entry->uri = soup_uri_to_string (soup_message_get_uri (msg), FALSE);
467 
468 	/* Headers */
469 	entry->headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_RESPONSE);
470 	copy_end_to_end_headers (msg->response_headers, entry->headers);
471 
472 	/* LRU list */
473 	entry->hits = 0;
474 
475 	/* Section 2.3.1, Freshness Lifetime */
476 	soup_cache_entry_set_freshness (entry, msg, cache);
477 
478 	/* Section 2.3.2, Calculating Age */
479 	date = soup_message_headers_get_one (entry->headers, "Date");
480 
481 	if (date) {
482 		SoupDate *soup_date;
483 		const char *age;
484 		time_t date_value, apparent_age, corrected_received_age, response_delay, age_value = 0;
485 
486 		soup_date = soup_date_new_from_string (date);
487 		date_value = soup_date_to_time_t (soup_date);
488 		soup_date_free (soup_date);
489 
490 		age = soup_message_headers_get_one (entry->headers, "Age");
491 		if (age)
492 			age_value = g_ascii_strtoll (age, NULL, 10);
493 
494 		apparent_age = MAX (0, entry->response_time - date_value);
495 		corrected_received_age = MAX (apparent_age, age_value);
496 		response_delay = entry->response_time - request_time;
497 		entry->corrected_initial_age = corrected_received_age + response_delay;
498 	} else {
499 		/* Is this correct ? */
500 		entry->corrected_initial_age = time (NULL);
501 	}
502 
503 	return entry;
504 }
505 
506 static gboolean
soup_cache_entry_remove(SoupCache * cache,SoupCacheEntry * entry,gboolean purge)507 soup_cache_entry_remove (SoupCache *cache, SoupCacheEntry *entry, gboolean purge)
508 {
509 	GList *lru_item;
510 
511 	if (entry->dirty) {
512 		g_cancellable_cancel (entry->cancellable);
513 		return FALSE;
514 	}
515 
516 	g_assert (!entry->dirty);
517 	g_assert (g_list_length (cache->priv->lru_start) == g_hash_table_size (cache->priv->cache));
518 
519 	if (!g_hash_table_remove (cache->priv->cache, GUINT_TO_POINTER (entry->key)))
520 		return FALSE;
521 
522 	/* Remove from LRU */
523 	lru_item = g_list_find (cache->priv->lru_start, entry);
524 	cache->priv->lru_start = g_list_delete_link (cache->priv->lru_start, lru_item);
525 
526 	/* Adjust cache size */
527 	cache->priv->size -= entry->length;
528 
529 	g_assert (g_list_length (cache->priv->lru_start) == g_hash_table_size (cache->priv->cache));
530 
531 	/* Free resources */
532 	if (purge) {
533 		GFile *file = get_file_from_entry (cache, entry);
534 		g_file_delete (file, NULL, NULL);
535 		g_object_unref (file);
536 	}
537 	soup_cache_entry_free (entry);
538 
539 	return TRUE;
540 }
541 
542 static gint
lru_compare_func(gconstpointer a,gconstpointer b)543 lru_compare_func (gconstpointer a, gconstpointer b)
544 {
545 	SoupCacheEntry *entry_a = (SoupCacheEntry *)a;
546 	SoupCacheEntry *entry_b = (SoupCacheEntry *)b;
547 
548 	/* The rationale of this sorting func is
549 	 *
550 	 * 1. sort by hits -> LRU algorithm, then
551 	 *
552 	 * 2. sort by freshness lifetime, we better discard first
553 	 * entries that are close to expire
554 	 *
555 	 * 3. sort by size, replace first small size resources as they
556 	 * are cheaper to download
557 	 */
558 
559 	/* Sort by hits */
560 	if (entry_a->hits != entry_b->hits)
561 		return entry_a->hits - entry_b->hits;
562 
563 	/* Sort by freshness_lifetime */
564 	if (entry_a->freshness_lifetime != entry_b->freshness_lifetime)
565 		return entry_a->freshness_lifetime - entry_b->freshness_lifetime;
566 
567 	/* Sort by size */
568 	return entry_a->length - entry_b->length;
569 }
570 
571 static gboolean
cache_accepts_entries_of_size(SoupCache * cache,guint length_to_add)572 cache_accepts_entries_of_size (SoupCache *cache, guint length_to_add)
573 {
574 	/* We could add here some more heuristics. TODO: review how
575 	   this is done by other HTTP caches */
576 
577 	return length_to_add <= cache->priv->max_entry_data_size;
578 }
579 
580 static void
make_room_for_new_entry(SoupCache * cache,guint length_to_add)581 make_room_for_new_entry (SoupCache *cache, guint length_to_add)
582 {
583 	GList *lru_entry = cache->priv->lru_start;
584 
585 	/* Check that there is enough room for the new entry. This is
586 	   an approximation as we're not working out the size of the
587 	   cache file or the size of the headers for performance
588 	   reasons. TODO: check if that would be really that expensive */
589 
590 	while (lru_entry &&
591 	       (length_to_add + cache->priv->size > cache->priv->max_size)) {
592 		SoupCacheEntry *old_entry = (SoupCacheEntry *)lru_entry->data;
593 
594 		/* Discard entries. Once cancelled resources will be
595 		 * freed in close_ready_cb
596 		 */
597 		if (soup_cache_entry_remove (cache, old_entry, TRUE))
598 			lru_entry = cache->priv->lru_start;
599 		else
600 			lru_entry = g_list_next (lru_entry);
601 	}
602 }
603 
604 static gboolean
soup_cache_entry_insert(SoupCache * cache,SoupCacheEntry * entry,gboolean sort)605 soup_cache_entry_insert (SoupCache *cache,
606 			 SoupCacheEntry *entry,
607 			 gboolean sort)
608 {
609 	guint length_to_add = 0;
610 	SoupCacheEntry *old_entry;
611 
612 	/* Fill the key */
613 	entry->key = get_cache_key_from_uri ((const char *) entry->uri);
614 
615 	if (soup_message_headers_get_encoding (entry->headers) == SOUP_ENCODING_CONTENT_LENGTH)
616 		length_to_add = soup_message_headers_get_content_length (entry->headers);
617 
618 	/* Check if we are going to store the resource depending on its size */
619 	if (length_to_add) {
620 		if (!cache_accepts_entries_of_size (cache, length_to_add))
621 			return FALSE;
622 
623 		/* Make room for new entry if needed */
624 		make_room_for_new_entry (cache, length_to_add);
625 	}
626 
627 	/* Remove any previous entry */
628 	if ((old_entry = g_hash_table_lookup (cache->priv->cache, GUINT_TO_POINTER (entry->key))) != NULL) {
629 		if (!soup_cache_entry_remove (cache, old_entry, TRUE))
630 			return FALSE;
631 	}
632 
633 	/* Add to hash table */
634 	g_hash_table_insert (cache->priv->cache, GUINT_TO_POINTER (entry->key), entry);
635 
636 	/* Compute new cache size */
637 	cache->priv->size += length_to_add;
638 
639 	/* Update LRU */
640 	if (sort)
641 		cache->priv->lru_start = g_list_insert_sorted (cache->priv->lru_start, entry, lru_compare_func);
642 	else
643 		cache->priv->lru_start = g_list_prepend (cache->priv->lru_start, entry);
644 
645 	g_assert (g_list_length (cache->priv->lru_start) == g_hash_table_size (cache->priv->cache));
646 
647 	return TRUE;
648 }
649 
650 static SoupCacheEntry*
soup_cache_entry_lookup(SoupCache * cache,SoupMessage * msg)651 soup_cache_entry_lookup (SoupCache *cache,
652 			 SoupMessage *msg)
653 {
654 	SoupCacheEntry *entry;
655 	guint32 key;
656 	char *uri = NULL;
657 
658 	uri = soup_uri_to_string (soup_message_get_uri (msg), FALSE);
659 	key = get_cache_key_from_uri ((const char *) uri);
660 
661 	entry = g_hash_table_lookup (cache->priv->cache, GUINT_TO_POINTER (key));
662 
663 	if (entry != NULL && (strcmp (entry->uri, uri) != 0))
664 		entry = NULL;
665 
666 	g_free (uri);
667 	return entry;
668 }
669 
670 GInputStream *
soup_cache_send_response(SoupCache * cache,SoupMessage * msg)671 soup_cache_send_response (SoupCache *cache, SoupMessage *msg)
672 {
673 	SoupCacheEntry *entry;
674 	GInputStream *file_stream, *body_stream, *cache_stream, *client_stream;
675 	GFile *file;
676 
677 	g_return_val_if_fail (SOUP_IS_CACHE (cache), NULL);
678 	g_return_val_if_fail (SOUP_IS_MESSAGE (msg), NULL);
679 
680 	entry = soup_cache_entry_lookup (cache, msg);
681 	g_return_val_if_fail (entry, NULL);
682 
683 	file = get_file_from_entry (cache, entry);
684 	file_stream = G_INPUT_STREAM (g_file_read (file, NULL, NULL));
685 	g_object_unref (file);
686 
687 	/* Do not change the original message if there is no resource */
688 	if (!file_stream)
689 		return NULL;
690 
691 	body_stream = soup_body_input_stream_new (file_stream, SOUP_ENCODING_CONTENT_LENGTH, entry->length);
692 	g_object_unref (file_stream);
693 
694 	if (!body_stream)
695 		return NULL;
696 
697 	/* If we are told to send a response from cache any validation
698 	   in course is over by now */
699 	entry->being_validated = FALSE;
700 
701 	/* Message starting */
702 	soup_message_starting (msg);
703 
704 	/* Status */
705 	soup_message_set_status (msg, entry->status_code);
706 
707 	/* Headers */
708 	copy_end_to_end_headers (entry->headers, msg->response_headers);
709 
710 	/* Create the cache stream. */
711 	soup_message_disable_feature (msg, SOUP_TYPE_CACHE);
712 	cache_stream = soup_message_setup_body_istream (body_stream, msg,
713 							cache->priv->session,
714 							SOUP_STAGE_ENTITY_BODY);
715 	g_object_unref (body_stream);
716 
717 	client_stream = soup_cache_client_input_stream_new (cache_stream);
718 	g_object_unref (cache_stream);
719 
720 	return client_stream;
721 }
722 
723 static void
msg_got_headers_cb(SoupMessage * msg,gpointer user_data)724 msg_got_headers_cb (SoupMessage *msg, gpointer user_data)
725 {
726 	g_object_set_data (G_OBJECT (msg), "response-time", GINT_TO_POINTER (time (NULL)));
727 	g_signal_handlers_disconnect_by_func (msg, msg_got_headers_cb, user_data);
728 }
729 
730 static void
msg_starting_cb(SoupMessage * msg,gpointer user_data)731 msg_starting_cb (SoupMessage *msg, gpointer user_data)
732 {
733 	g_object_set_data (G_OBJECT (msg), "request-time", GINT_TO_POINTER (time (NULL)));
734 	g_signal_connect (msg, "got-headers", G_CALLBACK (msg_got_headers_cb), user_data);
735 	g_signal_handlers_disconnect_by_func (msg, msg_starting_cb, user_data);
736 }
737 
738 static void
request_queued(SoupSessionFeature * feature,SoupSession * session,SoupMessage * msg)739 request_queued (SoupSessionFeature *feature, SoupSession *session, SoupMessage *msg)
740 {
741 	g_signal_connect (msg, "starting", G_CALLBACK (msg_starting_cb), feature);
742 }
743 
744 static void
attach(SoupSessionFeature * feature,SoupSession * session)745 attach (SoupSessionFeature *feature, SoupSession *session)
746 {
747 	SoupCache *cache = SOUP_CACHE (feature);
748 	cache->priv->session = session;
749 
750 	soup_cache_default_feature_interface->attach (feature, session);
751 }
752 
753 static void
soup_cache_session_feature_init(SoupSessionFeatureInterface * feature_interface,gpointer interface_data)754 soup_cache_session_feature_init (SoupSessionFeatureInterface *feature_interface,
755 					gpointer interface_data)
756 {
757 	soup_cache_default_feature_interface =
758 		g_type_default_interface_peek (SOUP_TYPE_SESSION_FEATURE);
759 
760 	feature_interface->attach = attach;
761 	feature_interface->request_queued = request_queued;
762 }
763 
764 typedef struct {
765 	SoupCache *cache;
766 	SoupCacheEntry *entry;
767 } StreamHelper;
768 
769 static void
istream_caching_finished(SoupCacheInputStream * istream,gsize bytes_written,GError * error,gpointer user_data)770 istream_caching_finished (SoupCacheInputStream *istream,
771 			  gsize                 bytes_written,
772 			  GError               *error,
773 			  gpointer              user_data)
774 {
775 	StreamHelper *helper = (StreamHelper *) user_data;
776 	SoupCache *cache = helper->cache;
777 	SoupCacheEntry *entry = helper->entry;
778 
779 	--cache->priv->n_pending;
780 
781 	entry->dirty = FALSE;
782 	entry->length = bytes_written;
783 	g_clear_object (&entry->cancellable);
784 
785 	if (error) {
786 		/* Update cache size */
787 		if (soup_message_headers_get_encoding (entry->headers) == SOUP_ENCODING_CONTENT_LENGTH)
788 			cache->priv->size -= soup_message_headers_get_content_length (entry->headers);
789 
790 		soup_cache_entry_remove (cache, entry, TRUE);
791 		helper->entry = entry = NULL;
792 		goto cleanup;
793 	}
794 
795 	if (soup_message_headers_get_encoding (entry->headers) != SOUP_ENCODING_CONTENT_LENGTH) {
796 
797 		if (cache_accepts_entries_of_size (cache, entry->length)) {
798 			make_room_for_new_entry (cache, entry->length);
799 			cache->priv->size += entry->length;
800 		} else {
801 			soup_cache_entry_remove (cache, entry, TRUE);
802 			helper->entry = entry = NULL;
803 		}
804 	}
805 
806  cleanup:
807 	g_object_unref (helper->cache);
808 	g_slice_free (StreamHelper, helper);
809 }
810 
811 static GInputStream*
soup_cache_content_processor_wrap_input(SoupContentProcessor * processor,GInputStream * base_stream,SoupMessage * msg,GError ** error)812 soup_cache_content_processor_wrap_input (SoupContentProcessor *processor,
813 					 GInputStream *base_stream,
814 					 SoupMessage *msg,
815 					 GError **error)
816 {
817 	SoupCache *cache = (SoupCache*) processor;
818 	SoupCacheEntry *entry;
819 	SoupCacheability cacheability;
820 	GInputStream *istream;
821 	GFile *file;
822 	StreamHelper *helper;
823 	time_t request_time, response_time;
824 
825 	/* First of all, check if we should cache the resource. */
826 	cacheability = soup_cache_get_cacheability (cache, msg);
827 	entry = soup_cache_entry_lookup (cache, msg);
828 
829 	if (cacheability & SOUP_CACHE_INVALIDATES) {
830 		if (entry)
831 			soup_cache_entry_remove (cache, entry, TRUE);
832 		return NULL;
833 	}
834 
835 	if (cacheability & SOUP_CACHE_VALIDATES) {
836 		/* It's possible to get a CACHE_VALIDATES with no
837 		 * entry in the hash table. This could happen if for
838 		 * example the soup client is the one creating the
839 		 * conditional request.
840 		 */
841 		if (entry)
842 			soup_cache_update_from_conditional_request (cache, msg);
843 		return NULL;
844 	}
845 
846 	if (!(cacheability & SOUP_CACHE_CACHEABLE))
847 		return NULL;
848 
849 	/* Check if we are already caching this resource */
850 	if (entry && (entry->dirty || entry->being_validated))
851 		return NULL;
852 
853 	/* Create a new entry, deleting any old one if present */
854 	if (entry)
855 		soup_cache_entry_remove (cache, entry, TRUE);
856 
857 	request_time = GPOINTER_TO_INT (g_object_get_data (G_OBJECT (msg), "request-time"));
858 	response_time = GPOINTER_TO_INT (g_object_get_data (G_OBJECT (msg), "response-time"));
859 	entry = soup_cache_entry_new (cache, msg, request_time, response_time);
860 	entry->hits = 1;
861 	entry->dirty = TRUE;
862 
863 	/* Do not continue if it can not be stored */
864 	if (!soup_cache_entry_insert (cache, entry, TRUE)) {
865 		soup_cache_entry_free (entry);
866 		return NULL;
867 	}
868 
869 	entry->cancellable = g_cancellable_new ();
870 	++cache->priv->n_pending;
871 
872 	helper = g_slice_new (StreamHelper);
873 	helper->cache = g_object_ref (cache);
874 	helper->entry = entry;
875 
876 	file = get_file_from_entry (cache, entry);
877 	istream = soup_cache_input_stream_new (base_stream, file);
878 	g_object_unref (file);
879 
880 	g_signal_connect (istream, "caching-finished", G_CALLBACK (istream_caching_finished), helper);
881 
882 	return istream;
883 }
884 
885 static void
soup_cache_content_processor_init(SoupContentProcessorInterface * processor_interface,gpointer interface_data)886 soup_cache_content_processor_init (SoupContentProcessorInterface *processor_interface,
887 				   gpointer interface_data)
888 {
889 	soup_cache_default_content_processor_interface =
890 		g_type_default_interface_peek (SOUP_TYPE_CONTENT_PROCESSOR);
891 
892 	processor_interface->processing_stage = SOUP_STAGE_ENTITY_BODY;
893 	processor_interface->wrap_input = soup_cache_content_processor_wrap_input;
894 }
895 
896 static void
soup_cache_init(SoupCache * cache)897 soup_cache_init (SoupCache *cache)
898 {
899 	SoupCachePrivate *priv;
900 
901 	priv = cache->priv = soup_cache_get_instance_private (cache);
902 
903 	priv->cache = g_hash_table_new (g_direct_hash, g_direct_equal);
904 	/* LRU */
905 	priv->lru_start = NULL;
906 
907 	/* */
908 	priv->n_pending = 0;
909 
910 	/* Cache size */
911 	priv->max_size = DEFAULT_MAX_SIZE;
912 	priv->max_entry_data_size = priv->max_size / MAX_ENTRY_DATA_PERCENTAGE;
913 	priv->size = 0;
914 }
915 
916 static void
remove_cache_item(gpointer data,gpointer user_data)917 remove_cache_item (gpointer data,
918 		   gpointer user_data)
919 {
920 	soup_cache_entry_remove ((SoupCache *) user_data, (SoupCacheEntry *) data, FALSE);
921 }
922 
923 static void
soup_cache_finalize(GObject * object)924 soup_cache_finalize (GObject *object)
925 {
926 	SoupCachePrivate *priv;
927 	GList *entries;
928 
929 	priv = SOUP_CACHE (object)->priv;
930 
931 	/* Cannot use g_hash_table_foreach as callbacks must not modify the hash table */
932 	entries = g_hash_table_get_values (priv->cache);
933 	g_list_foreach (entries, remove_cache_item, object);
934 	g_list_free (entries);
935 
936 	g_hash_table_destroy (priv->cache);
937 	g_free (priv->cache_dir);
938 
939 	g_list_free (priv->lru_start);
940 
941 	G_OBJECT_CLASS (soup_cache_parent_class)->finalize (object);
942 }
943 
944 static void
soup_cache_set_property(GObject * object,guint prop_id,const GValue * value,GParamSpec * pspec)945 soup_cache_set_property (GObject *object, guint prop_id,
946 				const GValue *value, GParamSpec *pspec)
947 {
948 	SoupCachePrivate *priv = SOUP_CACHE (object)->priv;
949 
950 	switch (prop_id) {
951 	case PROP_CACHE_DIR:
952 		g_assert (!priv->cache_dir);
953 
954 		priv->cache_dir = g_value_dup_string (value);
955 
956 		if (!priv->cache_dir)
957 			/* Set a default cache dir, different for each user */
958 			priv->cache_dir = g_build_filename (g_get_user_cache_dir (),
959 							    "httpcache",
960 							    NULL);
961 
962 		/* Create directory if it does not exist */
963 		if (!g_file_test (priv->cache_dir, G_FILE_TEST_EXISTS | G_FILE_TEST_IS_DIR))
964 			g_mkdir_with_parents (priv->cache_dir, 0700);
965 		break;
966 	case PROP_CACHE_TYPE:
967 		priv->cache_type = g_value_get_enum (value);
968 		/* TODO: clear private entries and issue a warning if moving to shared? */
969 		break;
970 	default:
971 		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
972 		break;
973 	}
974 }
975 
976 static void
soup_cache_get_property(GObject * object,guint prop_id,GValue * value,GParamSpec * pspec)977 soup_cache_get_property (GObject *object, guint prop_id,
978 			 GValue *value, GParamSpec *pspec)
979 {
980 	SoupCachePrivate *priv = SOUP_CACHE (object)->priv;
981 
982 	switch (prop_id) {
983 	case PROP_CACHE_DIR:
984 		g_value_set_string (value, priv->cache_dir);
985 		break;
986 	case PROP_CACHE_TYPE:
987 		g_value_set_enum (value, priv->cache_type);
988 		break;
989 	default:
990 		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
991 		break;
992 	}
993 }
994 
995 static void
soup_cache_class_init(SoupCacheClass * cache_class)996 soup_cache_class_init (SoupCacheClass *cache_class)
997 {
998 	GObjectClass *gobject_class = (GObjectClass *)cache_class;
999 
1000 	gobject_class->finalize = soup_cache_finalize;
1001 	gobject_class->set_property = soup_cache_set_property;
1002 	gobject_class->get_property = soup_cache_get_property;
1003 
1004 	cache_class->get_cacheability = get_cacheability;
1005 
1006 	g_object_class_install_property (gobject_class, PROP_CACHE_DIR,
1007 					 g_param_spec_string ("cache-dir",
1008 							      "Cache directory",
1009 							      "The directory to store the cache files",
1010 							      NULL,
1011 							      G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
1012 							      G_PARAM_STATIC_STRINGS));
1013 
1014 	g_object_class_install_property (gobject_class, PROP_CACHE_TYPE,
1015 					 g_param_spec_enum ("cache-type",
1016 							    "Cache type",
1017 							    "Whether the cache is private or shared",
1018 							    SOUP_TYPE_CACHE_TYPE,
1019 							    SOUP_CACHE_SINGLE_USER,
1020 							    G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY |
1021 							    G_PARAM_STATIC_STRINGS));
1022 }
1023 
1024 /**
1025  * SoupCacheType:
1026  * @SOUP_CACHE_SINGLE_USER: a single-user cache
1027  * @SOUP_CACHE_SHARED: a shared cache
1028  *
1029  * The type of cache; this affects what kinds of responses will be
1030  * saved.
1031  *
1032  * Since: 2.34
1033  */
1034 
1035 /**
1036  * soup_cache_new:
1037  * @cache_dir: (allow-none): the directory to store the cached data, or %NULL
1038  *   to use the default one. Note that since the cache isn't safe to access for
1039  *   multiple processes at once, and the default directory isn't namespaced by
1040  *   process, clients are strongly discouraged from passing %NULL.
1041  * @cache_type: the #SoupCacheType of the cache
1042  *
1043  * Creates a new #SoupCache.
1044  *
1045  * Returns: a new #SoupCache
1046  *
1047  * Since: 2.34
1048  */
1049 SoupCache *
soup_cache_new(const char * cache_dir,SoupCacheType cache_type)1050 soup_cache_new (const char *cache_dir, SoupCacheType cache_type)
1051 {
1052 	return g_object_new (SOUP_TYPE_CACHE,
1053 			     "cache-dir", cache_dir,
1054 			     "cache-type", cache_type,
1055 			     NULL);
1056 }
1057 
1058 /**
1059  * soup_cache_has_response:
1060  * @cache: a #SoupCache
1061  * @msg: a #SoupMessage
1062  *
1063  * This function calculates whether the @cache object has a proper
1064  * response for the request @msg given the flags both in the request
1065  * and the cached reply and the time ellapsed since it was cached.
1066  *
1067  * Returns: whether or not the @cache has a valid response for @msg
1068  *
1069  * Since: 2.34
1070  */
1071 SoupCacheResponse
soup_cache_has_response(SoupCache * cache,SoupMessage * msg)1072 soup_cache_has_response (SoupCache *cache, SoupMessage *msg)
1073 {
1074 	SoupCacheEntry *entry;
1075 	const char *cache_control;
1076 	gpointer value;
1077 	int max_age, max_stale, min_fresh;
1078 	GList *lru_item, *item;
1079 
1080 	entry = soup_cache_entry_lookup (cache, msg);
1081 
1082 	/* 1. The presented Request-URI and that of stored response
1083 	 * match
1084 	 */
1085 	if (!entry)
1086 		return SOUP_CACHE_RESPONSE_STALE;
1087 
1088 	/* Increase hit count. Take sorting into account */
1089 	entry->hits++;
1090 	lru_item = g_list_find (cache->priv->lru_start, entry);
1091 	item = lru_item;
1092 	while (item->next && lru_compare_func (item->data, item->next->data) > 0)
1093 		item = g_list_next (item);
1094 
1095 	if (item != lru_item) {
1096 		cache->priv->lru_start = g_list_remove_link (cache->priv->lru_start, lru_item);
1097 		item = g_list_insert_sorted (item, lru_item->data, lru_compare_func);
1098 		g_list_free (lru_item);
1099 	}
1100 
1101 	if (entry->dirty || entry->being_validated)
1102 		return SOUP_CACHE_RESPONSE_STALE;
1103 
1104 	/* 2. The request method associated with the stored response
1105 	 *  allows it to be used for the presented request
1106 	 */
1107 
1108 	/* In practice this means we only return our resource for GET,
1109 	 * cacheability for other methods is a TODO in the RFC
1110 	 * (TODO: although we could return the headers for HEAD
1111 	 * probably).
1112 	 */
1113 	if (msg->method != SOUP_METHOD_GET)
1114 		return SOUP_CACHE_RESPONSE_STALE;
1115 
1116 	/* 3. Selecting request-headers nominated by the stored
1117 	 * response (if any) match those presented.
1118 	 */
1119 
1120 	/* TODO */
1121 
1122 	/* 4. The request is a conditional request issued by the client.
1123 	 */
1124 	if (soup_message_headers_get_one (msg->request_headers, "If-Modified-Since") ||
1125 	    soup_message_headers_get_list (msg->request_headers, "If-None-Match"))
1126 		return SOUP_CACHE_RESPONSE_STALE;
1127 
1128 	/* 5. The presented request and stored response are free from
1129 	 * directives that would prevent its use.
1130 	 */
1131 
1132 	max_age = max_stale = min_fresh = -1;
1133 
1134 	/* For HTTP 1.0 compatibility. RFC2616 section 14.9.4
1135 	 */
1136 	if (soup_message_headers_header_contains (msg->request_headers, "Pragma", "no-cache"))
1137 		return SOUP_CACHE_RESPONSE_STALE;
1138 
1139 	cache_control = soup_message_headers_get_list (msg->request_headers, "Cache-Control");
1140 	if (cache_control && *cache_control) {
1141 		GHashTable *hash = soup_header_parse_param_list (cache_control);
1142 
1143 		if (g_hash_table_lookup_extended (hash, "no-store", NULL, NULL)) {
1144 			soup_header_free_param_list (hash);
1145 			return SOUP_CACHE_RESPONSE_STALE;
1146 		}
1147 
1148 		if (g_hash_table_lookup_extended (hash, "no-cache", NULL, NULL)) {
1149 			soup_header_free_param_list (hash);
1150 			return SOUP_CACHE_RESPONSE_STALE;
1151 		}
1152 
1153 		if (g_hash_table_lookup_extended (hash, "max-age", NULL, &value) && value) {
1154 			max_age = (int)MIN (g_ascii_strtoll (value, NULL, 10), G_MAXINT32);
1155 			/* Forcing cache revalidaton
1156 			 */
1157 			if (!max_age) {
1158 				soup_header_free_param_list (hash);
1159 				return SOUP_CACHE_RESPONSE_NEEDS_VALIDATION;
1160 			}
1161 		}
1162 
1163 		/* max-stale can have no value set, we need to use _extended */
1164 		if (g_hash_table_lookup_extended (hash, "max-stale", NULL, &value)) {
1165 			if (value)
1166 				max_stale = (int)MIN (g_ascii_strtoll (value, NULL, 10), G_MAXINT32);
1167 			else
1168 				max_stale = G_MAXINT32;
1169 		}
1170 
1171 		value = g_hash_table_lookup (hash, "min-fresh");
1172 		if (value)
1173 			min_fresh = (int)MIN (g_ascii_strtoll (value, NULL, 10), G_MAXINT32);
1174 
1175 		soup_header_free_param_list (hash);
1176 
1177 		if (max_age > 0) {
1178 			guint current_age = soup_cache_entry_get_current_age (entry);
1179 
1180 			/* If we are over max-age and max-stale is not
1181 			   set, do not use the value from the cache
1182 			   without validation */
1183 			if ((guint) max_age <= current_age && max_stale == -1)
1184 				return SOUP_CACHE_RESPONSE_NEEDS_VALIDATION;
1185 		}
1186 	}
1187 
1188 	/* 6. The stored response is either: fresh, allowed to be
1189 	 * served stale or succesfully validated
1190 	 */
1191 	if (!soup_cache_entry_is_fresh_enough (entry, min_fresh)) {
1192 		/* Not fresh, can it be served stale? */
1193 
1194 		/* When the must-revalidate directive is present in a
1195 		 * response received by a cache, that cache MUST NOT
1196 		 * use the entry after it becomes stale
1197 		 */
1198 		/* TODO consider also proxy-revalidate & s-maxage */
1199 		if (entry->must_revalidate)
1200 			return SOUP_CACHE_RESPONSE_NEEDS_VALIDATION;
1201 
1202 		if (max_stale != -1) {
1203 			/* G_MAXINT32 means we accept any staleness */
1204 			if (max_stale == G_MAXINT32)
1205 				return SOUP_CACHE_RESPONSE_FRESH;
1206 
1207 			if ((soup_cache_entry_get_current_age (entry) - entry->freshness_lifetime) <= (guint) max_stale)
1208 				return SOUP_CACHE_RESPONSE_FRESH;
1209 		}
1210 
1211 		return SOUP_CACHE_RESPONSE_NEEDS_VALIDATION;
1212 	}
1213 
1214 	return SOUP_CACHE_RESPONSE_FRESH;
1215 }
1216 
1217 /**
1218  * soup_cache_get_cacheability:
1219  * @cache: a #SoupCache
1220  * @msg: a #SoupMessage
1221  *
1222  * Calculates whether the @msg can be cached or not.
1223  *
1224  * Returns: a #SoupCacheability value indicating whether the @msg can be cached or not.
1225  *
1226  * Since: 2.34
1227  */
1228 SoupCacheability
soup_cache_get_cacheability(SoupCache * cache,SoupMessage * msg)1229 soup_cache_get_cacheability (SoupCache *cache, SoupMessage *msg)
1230 {
1231 	g_return_val_if_fail (SOUP_IS_CACHE (cache), SOUP_CACHE_UNCACHEABLE);
1232 	g_return_val_if_fail (SOUP_IS_MESSAGE (msg), SOUP_CACHE_UNCACHEABLE);
1233 
1234 	return SOUP_CACHE_GET_CLASS (cache)->get_cacheability (cache, msg);
1235 }
1236 
1237 static gboolean
force_flush_timeout(gpointer data)1238 force_flush_timeout (gpointer data)
1239 {
1240 	gboolean *forced = (gboolean *)data;
1241 	*forced = TRUE;
1242 
1243 	return FALSE;
1244 }
1245 
1246 /**
1247  * soup_cache_flush:
1248  * @cache: a #SoupCache
1249  *
1250  * This function will force all pending writes in the @cache to be
1251  * committed to disk. For doing so it will iterate the #GMainContext
1252  * associated with @cache's session as long as needed.
1253  *
1254  * Contrast with soup_cache_dump(), which writes out the cache index
1255  * file.
1256  *
1257  * Since: 2.34
1258  */
1259 void
soup_cache_flush(SoupCache * cache)1260 soup_cache_flush (SoupCache *cache)
1261 {
1262 	GMainContext *async_context;
1263 	SoupSession *session;
1264 	GSource *timeout;
1265 	gboolean forced = FALSE;
1266 
1267 	g_return_if_fail (SOUP_IS_CACHE (cache));
1268 
1269 	session = cache->priv->session;
1270 	g_return_if_fail (SOUP_IS_SESSION (session));
1271 	async_context = soup_session_get_async_context (session);
1272 
1273 	/* We give cache 10 secs to finish */
1274 	timeout = soup_add_timeout (async_context, 10000, force_flush_timeout, &forced);
1275 
1276 	while (!forced && cache->priv->n_pending > 0)
1277 		g_main_context_iteration (async_context, FALSE);
1278 
1279 	if (!forced)
1280 		g_source_destroy (timeout);
1281 	else
1282 		g_warning ("Cache flush finished despite %d pending requests", cache->priv->n_pending);
1283 }
1284 
1285 typedef void (* SoupCacheForeachFileFunc) (SoupCache *cache, const char *name, gpointer user_data);
1286 
1287 static void
soup_cache_foreach_file(SoupCache * cache,SoupCacheForeachFileFunc func,gpointer user_data)1288 soup_cache_foreach_file (SoupCache *cache, SoupCacheForeachFileFunc func, gpointer user_data)
1289 {
1290 	GDir *dir;
1291 	const char *name;
1292 	SoupCachePrivate *priv = cache->priv;
1293 
1294 	dir = g_dir_open (priv->cache_dir, 0, NULL);
1295 	while ((name = g_dir_read_name (dir))) {
1296 		if (g_str_has_prefix (name, "soup."))
1297 		    continue;
1298 
1299 		func (cache, name, user_data);
1300 	}
1301 	g_dir_close (dir);
1302 }
1303 
1304 static void
clear_cache_item(gpointer data,gpointer user_data)1305 clear_cache_item (gpointer data,
1306 		  gpointer user_data)
1307 {
1308 	soup_cache_entry_remove ((SoupCache *) user_data, (SoupCacheEntry *) data, TRUE);
1309 }
1310 
1311 static void
delete_cache_file(SoupCache * cache,const char * name,gpointer user_data)1312 delete_cache_file (SoupCache *cache, const char *name, gpointer user_data)
1313 {
1314 	gchar *path;
1315 
1316 	path = g_build_filename (cache->priv->cache_dir, name, NULL);
1317 	g_unlink (path);
1318 	g_free (path);
1319 }
1320 
1321 static void
clear_cache_files(SoupCache * cache)1322 clear_cache_files (SoupCache *cache)
1323 {
1324 	soup_cache_foreach_file (cache, delete_cache_file, NULL);
1325 }
1326 
1327 /**
1328  * soup_cache_clear:
1329  * @cache: a #SoupCache
1330  *
1331  * Will remove all entries in the @cache plus all the cache files.
1332  *
1333  * Since: 2.34
1334  */
1335 void
soup_cache_clear(SoupCache * cache)1336 soup_cache_clear (SoupCache *cache)
1337 {
1338 	GList *entries;
1339 
1340 	g_return_if_fail (SOUP_IS_CACHE (cache));
1341 	g_return_if_fail (cache->priv->cache);
1342 
1343 	/* Cannot use g_hash_table_foreach as callbacks must not modify the hash table */
1344 	entries = g_hash_table_get_values (cache->priv->cache);
1345 	g_list_foreach (entries, clear_cache_item, cache);
1346 	g_list_free (entries);
1347 
1348 	/* Remove also any file not associated with a cache entry. */
1349 	clear_cache_files (cache);
1350 }
1351 
1352 SoupMessage *
soup_cache_generate_conditional_request(SoupCache * cache,SoupMessage * original)1353 soup_cache_generate_conditional_request (SoupCache *cache, SoupMessage *original)
1354 {
1355 	SoupMessage *msg;
1356 	SoupURI *uri;
1357 	SoupCacheEntry *entry;
1358 	const char *last_modified, *etag;
1359 	GList *disabled_features, *f;
1360 
1361 	g_return_val_if_fail (SOUP_IS_CACHE (cache), NULL);
1362 	g_return_val_if_fail (SOUP_IS_MESSAGE (original), NULL);
1363 
1364 	/* Add the validator entries in the header from the cached data */
1365 	entry = soup_cache_entry_lookup (cache, original);
1366 	g_return_val_if_fail (entry, NULL);
1367 
1368 	last_modified = soup_message_headers_get_one (entry->headers, "Last-Modified");
1369 	etag = soup_message_headers_get_one (entry->headers, "ETag");
1370 
1371 	if (!last_modified && !etag)
1372 		return NULL;
1373 
1374 	entry->being_validated = TRUE;
1375 
1376 	/* Copy the data we need from the original message */
1377 	uri = soup_message_get_uri (original);
1378 	msg = soup_message_new_from_uri (original->method, uri);
1379 	soup_message_set_flags (msg, soup_message_get_flags (original));
1380 	soup_message_disable_feature (msg, SOUP_TYPE_CACHE);
1381 
1382 	soup_message_headers_foreach (original->request_headers,
1383 				      (SoupMessageHeadersForeachFunc)copy_headers,
1384 				      msg->request_headers);
1385 
1386 	disabled_features = soup_message_get_disabled_features (original);
1387 	for (f = disabled_features; f; f = f->next)
1388 		soup_message_disable_feature (msg, (GType) GPOINTER_TO_SIZE (f->data));
1389 	g_list_free (disabled_features);
1390 
1391 	if (last_modified)
1392 		soup_message_headers_append (msg->request_headers,
1393 					     "If-Modified-Since",
1394 					     last_modified);
1395 	if (etag)
1396 		soup_message_headers_append (msg->request_headers,
1397 					     "If-None-Match",
1398 					     etag);
1399 
1400 	return msg;
1401 }
1402 
1403 void
soup_cache_cancel_conditional_request(SoupCache * cache,SoupMessage * msg)1404 soup_cache_cancel_conditional_request (SoupCache   *cache,
1405 				       SoupMessage *msg)
1406 {
1407 	SoupCacheEntry *entry;
1408 
1409 	entry = soup_cache_entry_lookup (cache, msg);
1410 	if (entry)
1411 		entry->being_validated = FALSE;
1412 
1413 	soup_session_cancel_message (cache->priv->session, msg, SOUP_STATUS_CANCELLED);
1414 }
1415 
1416 void
soup_cache_update_from_conditional_request(SoupCache * cache,SoupMessage * msg)1417 soup_cache_update_from_conditional_request (SoupCache   *cache,
1418 					    SoupMessage *msg)
1419 {
1420 	SoupCacheEntry *entry = soup_cache_entry_lookup (cache, msg);
1421 	if (!entry)
1422 		return;
1423 
1424 	entry->being_validated = FALSE;
1425 
1426 	if (msg->status_code == SOUP_STATUS_NOT_MODIFIED) {
1427 		soup_message_headers_foreach (msg->response_headers,
1428 					      (SoupMessageHeadersForeachFunc) remove_headers,
1429 					      entry->headers);
1430 		copy_end_to_end_headers (msg->response_headers, entry->headers);
1431 
1432 		soup_cache_entry_set_freshness (entry, msg, cache);
1433 	}
1434 }
1435 
1436 static void
pack_entry(gpointer data,gpointer user_data)1437 pack_entry (gpointer data,
1438 	    gpointer user_data)
1439 {
1440 	SoupCacheEntry *entry = (SoupCacheEntry *) data;
1441 	SoupMessageHeadersIter iter;
1442 	const char *header_key, *header_value;
1443 	GVariantBuilder *entries_builder = (GVariantBuilder *)user_data;
1444 
1445 	/* Do not store non-consolidated entries */
1446 	if (entry->dirty || !entry->key)
1447 		return;
1448 
1449 	g_variant_builder_open (entries_builder, G_VARIANT_TYPE (SOUP_CACHE_PHEADERS_FORMAT));
1450 	g_variant_builder_add (entries_builder, "s", entry->uri);
1451 	g_variant_builder_add (entries_builder, "b", entry->must_revalidate);
1452 	g_variant_builder_add (entries_builder, "u", entry->freshness_lifetime);
1453 	g_variant_builder_add (entries_builder, "u", entry->corrected_initial_age);
1454 	g_variant_builder_add (entries_builder, "u", entry->response_time);
1455 	g_variant_builder_add (entries_builder, "u", entry->hits);
1456 	g_variant_builder_add (entries_builder, "u", entry->length);
1457 	g_variant_builder_add (entries_builder, "q", entry->status_code);
1458 
1459 	/* Pack headers */
1460 	g_variant_builder_open (entries_builder, G_VARIANT_TYPE ("a" SOUP_CACHE_HEADERS_FORMAT));
1461 	soup_message_headers_iter_init (&iter, entry->headers);
1462 	while (soup_message_headers_iter_next (&iter, &header_key, &header_value)) {
1463 		if (g_utf8_validate (header_value, -1, NULL))
1464 			g_variant_builder_add (entries_builder, SOUP_CACHE_HEADERS_FORMAT,
1465 					       header_key, header_value);
1466 	}
1467 	g_variant_builder_close (entries_builder); /* "a" SOUP_CACHE_HEADERS_FORMAT */
1468 	g_variant_builder_close (entries_builder); /* SOUP_CACHE_PHEADERS_FORMAT */
1469 }
1470 
1471 /**
1472  * soup_cache_dump:
1473  * @cache: a #SoupCache
1474  *
1475  * Synchronously writes the cache index out to disk. Contrast with
1476  * soup_cache_flush(), which writes pending cache
1477  * <emphasis>entries</emphasis> to disk.
1478  *
1479  * You must call this before exiting if you want your cache data to
1480  * persist between sessions.
1481  *
1482  * Since: 2.34.
1483  */
1484 void
soup_cache_dump(SoupCache * cache)1485 soup_cache_dump (SoupCache *cache)
1486 {
1487 	SoupCachePrivate *priv = soup_cache_get_instance_private (cache);
1488 	char *filename;
1489 	GVariantBuilder entries_builder;
1490 	GVariant *cache_variant;
1491 
1492 	if (!g_list_length (cache->priv->lru_start))
1493 		return;
1494 
1495 	/* Create the builder and iterate over all entries */
1496 	g_variant_builder_init (&entries_builder, G_VARIANT_TYPE (SOUP_CACHE_ENTRIES_FORMAT));
1497 	g_variant_builder_add (&entries_builder, "q", SOUP_CACHE_CURRENT_VERSION);
1498 	g_variant_builder_open (&entries_builder, G_VARIANT_TYPE ("a" SOUP_CACHE_PHEADERS_FORMAT));
1499 	g_list_foreach (cache->priv->lru_start, pack_entry, &entries_builder);
1500 	g_variant_builder_close (&entries_builder);
1501 
1502 	/* Serialize and dump */
1503 	cache_variant = g_variant_builder_end (&entries_builder);
1504 	g_variant_ref_sink (cache_variant);
1505 	filename = g_build_filename (priv->cache_dir, SOUP_CACHE_FILE, NULL);
1506 	g_file_set_contents (filename, (const char *) g_variant_get_data (cache_variant),
1507 			     g_variant_get_size (cache_variant), NULL);
1508 	g_free (filename);
1509 	g_variant_unref (cache_variant);
1510 }
1511 
1512 static inline guint32
get_key_from_cache_filename(const char * name)1513 get_key_from_cache_filename (const char *name)
1514 {
1515 	guint64 key;
1516 
1517 	key = g_ascii_strtoull (name, NULL, 10);
1518 	return key ? (guint32)key : 0;
1519 }
1520 
1521 static void
insert_cache_file(SoupCache * cache,const char * name,GHashTable * leaked_entries)1522 insert_cache_file (SoupCache *cache, const char *name, GHashTable *leaked_entries)
1523 {
1524 	gchar *path;
1525 
1526 	path = g_build_filename (cache->priv->cache_dir, name, NULL);
1527 	if (g_file_test (path, G_FILE_TEST_IS_REGULAR)) {
1528 		guint32 key = get_key_from_cache_filename (name);
1529 
1530 		if (key) {
1531 			g_hash_table_insert (leaked_entries, GUINT_TO_POINTER (key), path);
1532 			return;
1533 		}
1534 	}
1535 	g_free (path);
1536 }
1537 
1538 /**
1539  * soup_cache_load:
1540  * @cache: a #SoupCache
1541  *
1542  * Loads the contents of @cache's index into memory.
1543  *
1544  * Since: 2.34
1545  */
1546 void
soup_cache_load(SoupCache * cache)1547 soup_cache_load (SoupCache *cache)
1548 {
1549 	gboolean must_revalidate;
1550 	guint32 freshness_lifetime, hits;
1551 	guint32 corrected_initial_age, response_time;
1552 	char *url, *filename = NULL, *contents = NULL;
1553 	GVariant *cache_variant;
1554 	GVariantIter *entries_iter = NULL, *headers_iter = NULL;
1555 	gsize length;
1556 	SoupCacheEntry *entry;
1557 	SoupCachePrivate *priv = cache->priv;
1558 	guint16 version, status_code;
1559 	GHashTable *leaked_entries = NULL;
1560 	GHashTableIter iter;
1561 	gpointer value;
1562 
1563 	filename = g_build_filename (priv->cache_dir, SOUP_CACHE_FILE, NULL);
1564 	if (!g_file_get_contents (filename, &contents, &length, NULL)) {
1565 		g_free (filename);
1566 		g_free (contents);
1567 		clear_cache_files (cache);
1568 		return;
1569 	}
1570 	g_free (filename);
1571 
1572 	cache_variant = g_variant_new_from_data (G_VARIANT_TYPE (SOUP_CACHE_ENTRIES_FORMAT),
1573 						 (const char *) contents, length, FALSE, g_free, contents);
1574 	g_variant_get (cache_variant, SOUP_CACHE_ENTRIES_FORMAT, &version, &entries_iter);
1575 	if (version != SOUP_CACHE_CURRENT_VERSION) {
1576 		g_variant_iter_free (entries_iter);
1577 		g_variant_unref (cache_variant);
1578 		clear_cache_files (cache);
1579 		return;
1580 	}
1581 
1582 	leaked_entries = g_hash_table_new_full (g_direct_hash, g_direct_equal, NULL, g_free);
1583 	soup_cache_foreach_file (cache, (SoupCacheForeachFileFunc)insert_cache_file, leaked_entries);
1584 
1585 	while (g_variant_iter_loop (entries_iter, SOUP_CACHE_PHEADERS_FORMAT,
1586 				    &url, &must_revalidate, &freshness_lifetime, &corrected_initial_age,
1587 				    &response_time, &hits, &length, &status_code,
1588 				    &headers_iter)) {
1589 		const char *header_key, *header_value;
1590 		SoupMessageHeaders *headers;
1591 		SoupMessageHeadersIter soup_headers_iter;
1592 
1593 		/* SoupMessage Headers */
1594 		headers = soup_message_headers_new (SOUP_MESSAGE_HEADERS_RESPONSE);
1595 		while (g_variant_iter_loop (headers_iter, SOUP_CACHE_HEADERS_FORMAT, &header_key, &header_value))
1596 			if (*header_key && *header_value)
1597 				soup_message_headers_append (headers, header_key, header_value);
1598 
1599 		/* Check that we have headers */
1600 		soup_message_headers_iter_init (&soup_headers_iter, headers);
1601 		if (!soup_message_headers_iter_next (&soup_headers_iter, &header_key, &header_value)) {
1602 			soup_message_headers_free (headers);
1603 			continue;
1604 		}
1605 
1606 		/* Insert in cache */
1607 		entry = g_slice_new0 (SoupCacheEntry);
1608 		entry->uri = g_strdup (url);
1609 		entry->must_revalidate = must_revalidate;
1610 		entry->freshness_lifetime = freshness_lifetime;
1611 		entry->corrected_initial_age = corrected_initial_age;
1612 		entry->response_time = response_time;
1613 		entry->hits = hits;
1614 		entry->length = length;
1615 		entry->headers = headers;
1616 		entry->status_code = status_code;
1617 
1618 		if (!soup_cache_entry_insert (cache, entry, FALSE))
1619 			soup_cache_entry_free (entry);
1620 		else
1621 			g_hash_table_remove (leaked_entries, GUINT_TO_POINTER (entry->key));
1622 	}
1623 
1624 	/* Remove the leaked files */
1625 	g_hash_table_iter_init (&iter, leaked_entries);
1626 	while (g_hash_table_iter_next (&iter, NULL, &value))
1627 		g_unlink ((char *)value);
1628 	g_hash_table_destroy (leaked_entries);
1629 
1630 	cache->priv->lru_start = g_list_reverse (cache->priv->lru_start);
1631 
1632 	/* frees */
1633 	g_variant_iter_free (entries_iter);
1634 	g_variant_unref (cache_variant);
1635 }
1636 
1637 /**
1638  * soup_cache_set_max_size:
1639  * @cache: a #SoupCache
1640  * @max_size: the maximum size of the cache, in bytes
1641  *
1642  * Sets the maximum size of the cache.
1643  *
1644  * Since: 2.34
1645  */
1646 void
soup_cache_set_max_size(SoupCache * cache,guint max_size)1647 soup_cache_set_max_size (SoupCache *cache,
1648 			 guint      max_size)
1649 {
1650 	cache->priv->max_size = max_size;
1651 	cache->priv->max_entry_data_size = cache->priv->max_size / MAX_ENTRY_DATA_PERCENTAGE;
1652 }
1653 
1654 /**
1655  * soup_cache_get_max_size:
1656  * @cache: a #SoupCache
1657  *
1658  * Gets the maximum size of the cache.
1659  *
1660  * Return value: the maximum size of the cache, in bytes.
1661  *
1662  * Since: 2.34
1663  */
1664 guint
soup_cache_get_max_size(SoupCache * cache)1665 soup_cache_get_max_size (SoupCache *cache)
1666 {
1667 	return cache->priv->max_size;
1668 }
1669