1
2 // -*- mode: c++; c-basic-offset:4 -*-
3
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26 #include "config.h"
27
28 // #define DODS_DEBUG
29
30 // TODO: Remove unneeded includes.
31
32 #include <pthread.h>
33 #include <limits.h>
34 #include <unistd.h> // for stat
35 #include <sys/types.h> // for stat and mkdir
36 #include <sys/stat.h>
37
38 #include <cstring>
39 #include <cerrno>
40
41 #include <iostream>
42 #include <sstream>
43 #include <algorithm>
44 #include <iterator>
45 #include <set>
46
47 #include "Error.h"
48 #include "InternalErr.h"
49 #include "ResponseTooBigErr.h"
50 #ifndef WIN32
51 #include "SignalHandler.h"
52 #endif
53 #include "HTTPCacheInterruptHandler.h"
54 #include "HTTPCacheTable.h"
55 #include "HTTPCacheMacros.h"
56
57 #include "util_mit.h"
58 #include "debug.h"
59
60 #ifdef WIN32
61 #include <direct.h>
62 #include <time.h>
63 #include <fcntl.h>
64 #define MKDIR(a,b) _mkdir((a))
65 #define REMOVE(a) do { \
66 int s = remove((a)); \
67 if (s != 0) \
68 throw InternalErr(__FILE__, __LINE__, "Cache error; could not remove file: " + long_to_string(s)); \
69 } while(0);
70 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
71 #define DIR_SEPARATOR_CHAR '\\'
72 #define DIR_SEPARATOR_STR "\\"
73 #else
74 #define MKDIR(a,b) mkdir((a), (b))
75 #define MKSTEMP(a) mkstemp((a))
76 #define DIR_SEPARATOR_CHAR '/'
77 #define DIR_SEPARATOR_STR "/"
78 #endif
79
80 #define CACHE_META ".meta"
81 #define CACHE_INDEX ".index"
82 #define CACHE_EMPTY_ETAG "@cache@"
83
84 #define NO_LM_EXPIRATION 24*3600 // 24 hours
85 #define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM
86
87 // If using LM to find the expiration then take 10% and no more than
88 // MAX_LM_EXPIRATION.
89 #ifndef LM_EXPIRATION
90 #define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
91 #endif
92
93 const int CACHE_TABLE_SIZE = 1499;
94
95 using namespace std;
96
97 namespace libdap {
98
99 /** Compute the hash value for a URL.
100 @param url
101 @return An integer hash code between 0 and CACHE_TABLE_SIZE. */
102 int
get_hash(const string & url)103 get_hash(const string &url)
104 {
105 int hash = 0;
106
107 for (const char *ptr = url.c_str(); *ptr; ptr++)
108 hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
109
110 return hash;
111 }
112
HTTPCacheTable(const string & cache_root,int block_size)113 HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size) :
114 d_cache_root(cache_root), d_block_size(block_size), d_current_size(0), d_new_entries(0)
115 {
116 d_cache_index = cache_root + CACHE_INDEX;
117
118 d_cache_table = new CacheEntries*[CACHE_TABLE_SIZE];
119
120 // Initialize the cache table.
121 for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
122 d_cache_table[i] = 0;
123
124 cache_index_read();
125 }
126
127 /** Called by for_each inside ~HTTPCache().
128 @param e The cache entry to delete. */
129
130 static inline void
delete_cache_entry(HTTPCacheTable::CacheEntry * e)131 delete_cache_entry(HTTPCacheTable::CacheEntry *e)
132 {
133 DBG2(cerr << "Deleting CacheEntry: " << e << endl);
134 delete e;
135 }
136
~HTTPCacheTable()137 HTTPCacheTable::~HTTPCacheTable()
138 {
139 for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
140 HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
141 if (cp) {
142 // delete each entry
143 for_each(cp->begin(), cp->end(), delete_cache_entry);
144
145 // now delete the vector that held the entries
146 delete get_cache_table()[i];
147 get_cache_table()[i] = 0;
148 }
149 }
150
151 delete[] d_cache_table;
152 }
153
154 /** Functor which deletes and nulls a single CacheEntry if it has expired.
155 This functor is called by expired_gc which then uses the
156 erase(remove(...) ...) idiom to really remove all the vector entries that
157 belonged to the deleted CacheEntry objects.
158
159 @see expired_gc. */
160
161 class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
162 time_t d_time;
163 HTTPCacheTable &d_table;
164
165 public:
DeleteExpired(HTTPCacheTable & table,time_t t)166 DeleteExpired(HTTPCacheTable &table, time_t t) :
167 d_time(t), d_table(table) {
168 if (!t)
169 d_time = time(0); // 0 == now
170 }
171
operator ()(HTTPCacheTable::CacheEntry * & e)172 void operator()(HTTPCacheTable::CacheEntry *&e) {
173 if (e && !e->readers && (e->freshness_lifetime
174 < (e->corrected_initial_age + (d_time - e->response_time)))) {
175 DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
176 d_table.remove_cache_entry(e);
177 delete e; e = 0;
178 }
179 }
180 };
181
182 // @param time base deletes againt this time, defaults to 0 (now)
delete_expired_entries(time_t time)183 void HTTPCacheTable::delete_expired_entries(time_t time) {
184 // Walk through and delete all the expired entries.
185 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
186 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
187 if (slot) {
188 for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
189 slot->erase(remove(slot->begin(), slot->end(),
190 static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
191 }
192 }
193 }
194
195 /** Functor which deletes and nulls a single CacheEntry which has less than
196 or equal to \c hits hits or if it is larger than the cache's
197 max_entry_size property.
198
199 @see hits_gc. */
200
201 class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
202 HTTPCacheTable &d_table;
203 int d_hits;
204
205 public:
DeleteByHits(HTTPCacheTable & table,int hits)206 DeleteByHits(HTTPCacheTable &table, int hits) :
207 d_table(table), d_hits(hits) {
208 }
209
operator ()(HTTPCacheTable::CacheEntry * & e)210 void operator()(HTTPCacheTable::CacheEntry *&e) {
211 if (e && !e->readers && e->hits <= d_hits) {
212 DBG(cerr << "Deleting cache entry: " << e->url << endl);
213 d_table.remove_cache_entry(e);
214 delete e; e = 0;
215 }
216 }
217 };
218
219 void
delete_by_hits(int hits)220 HTTPCacheTable::delete_by_hits(int hits) {
221 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
222 if (get_cache_table()[cnt]) {
223 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
224 for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
225 slot->erase(remove(slot->begin(), slot->end(),
226 static_cast<HTTPCacheTable::CacheEntry*>(0)),
227 slot->end());
228
229 }
230 }
231 }
232
233 /** Functor which deletes and nulls a single CacheEntry which is larger than
234 a given size.
235 @see hits_gc. */
236
237 class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
238 HTTPCacheTable &d_table;
239 unsigned int d_size;
240
241 public:
DeleteBySize(HTTPCacheTable & table,unsigned int size)242 DeleteBySize(HTTPCacheTable &table, unsigned int size) :
243 d_table(table), d_size(size) {
244 }
245
operator ()(HTTPCacheTable::CacheEntry * & e)246 void operator()(HTTPCacheTable::CacheEntry *&e) {
247 if (e && !e->readers && e->size > d_size) {
248 DBG(cerr << "Deleting cache entry: " << e->url << endl);
249 d_table.remove_cache_entry(e);
250 delete e; e = 0;
251 }
252 }
253 };
254
delete_by_size(unsigned int size)255 void HTTPCacheTable::delete_by_size(unsigned int size) {
256 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
257 if (get_cache_table()[cnt]) {
258 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
259 for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
260 slot->erase(remove(slot->begin(), slot->end(),
261 static_cast<HTTPCacheTable::CacheEntry*>(0)),
262 slot->end());
263
264 }
265 }
266 }
267
268 /** @name Cache Index
269
270 These methods manage the cache's index file. Each cache holds an index
271 file named \c .index which stores the cache's state information. */
272
273 //@{
274
275 /** Remove the cache index file.
276
277 A private method.
278
279 @return True if the file was deleted, otherwise false. */
280
281 bool
cache_index_delete()282 HTTPCacheTable::cache_index_delete()
283 {
284 d_new_entries = 0;
285
286 return (REMOVE_BOOL(d_cache_index.c_str()) == 0);
287 }
288
289 /** Read the saved set of cached entries from disk. Consistency between the
290 in-memory cache and the index is maintained by only reading the index
291 file when the HTTPCache object is created!
292
293 A private method.
294
295 @return True when a cache index was found and read, false otherwise. */
296
297 bool
cache_index_read()298 HTTPCacheTable::cache_index_read()
299 {
300 FILE *fp = fopen(d_cache_index.c_str(), "r");
301 // If the cache index can't be opened that's OK; start with an empty
302 // cache. 09/05/02 jhrg
303 if (!fp) {
304 return false;
305 }
306
307 char line[1024];
308 while (!feof(fp) && fgets(line, 1024, fp)) {
309 add_entry_to_cache_table(cache_index_parse_line(line));
310 DBG2(cerr << line << endl);
311 }
312
313 int res = fclose(fp) ;
314 if (res) {
315 DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
316 }
317
318 d_new_entries = 0;
319
320 return true;
321 }
322
323 /** Parse one line of the index file.
324
325 A private method.
326
327 @param line A single line from the \c .index file.
328 @return A CacheEntry initialized with the information from \c line. */
329
330 HTTPCacheTable::CacheEntry *
cache_index_parse_line(const char * line)331 HTTPCacheTable::cache_index_parse_line(const char *line)
332 {
333 // Read the line and create the cache object
334 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry;
335 istringstream iss(line);
336 iss >> entry->url;
337 iss >> entry->cachename;
338
339 iss >> entry->etag;
340 if (entry->etag == CACHE_EMPTY_ETAG)
341 entry->etag = "";
342
343 iss >> entry->lm;
344 iss >> entry->expires;
345 iss >> entry->size;
346 iss >> entry->range; // range is not used. 10/02/02 jhrg
347
348 iss >> entry->hash;
349 iss >> entry->hits;
350 iss >> entry->freshness_lifetime;
351 iss >> entry->response_time;
352 iss >> entry->corrected_initial_age;
353
354 iss >> entry->must_revalidate;
355
356 return entry;
357 }
358
359 /** Functor which writes a single CacheEntry to the \c .index file. */
360
361 class WriteOneCacheEntry :
362 public unary_function<HTTPCacheTable::CacheEntry *, void>
363 {
364
365 FILE *d_fp;
366
367 public:
WriteOneCacheEntry(FILE * fp)368 WriteOneCacheEntry(FILE *fp) : d_fp(fp)
369 {}
370
operator ()(HTTPCacheTable::CacheEntry * e)371 void operator()(HTTPCacheTable::CacheEntry *e)
372 {
373 if (e && fprintf(d_fp,
374 "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
375 e->url.c_str(),
376 e->cachename.c_str(),
377 e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(),
378 (long)(e->lm),
379 (long)(e->expires),
380 e->size,
381 e->range ? '1' : '0', // not used. 10/02/02 jhrg
382 e->hash,
383 e->hits,
384 (long)(e->freshness_lifetime),
385 (long)(e->response_time),
386 (long)(e->corrected_initial_age),
387 e->must_revalidate ? '1' : '0') < 0)
388 throw Error(internal_error, "Cache Index. Error writing cache index\n");
389 }
390 };
391
392 /** Walk through the list of cached objects and write the cache index file to
393 disk. If the file does not exist, it is created. If the file does exist,
394 it is overwritten. As a side effect, zero the new_entries counter.
395
396 A private method.
397
398 @exception Error Thrown if the index file cannot be opened for writing.
399 @note The HTTPCache destructor calls this method and silently ignores
400 this exception. */
401 void
cache_index_write()402 HTTPCacheTable::cache_index_write()
403 {
404 DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
405
406 // Open the file for writing.
407 FILE * fp = NULL;
408 if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
409 throw Error(string("Cache Index. Can't open `") + d_cache_index
410 + string("' for writing"));
411 }
412
413 // Walk through the list and write it out. The format is really
414 // simple as we keep it all in ASCII.
415
416 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
417 HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
418 if (cp)
419 for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
420 }
421
422 /* Done writing */
423 int res = fclose(fp);
424 if (res) {
425 DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
426 << (void *)fp << endl);
427 }
428
429 d_new_entries = 0;
430 }
431
432 //@} End of the cache index methods.
433 /** Create the directory path for cache file. The cache uses a set of
434 directories within d_cache_root to store individual responses. The name
435 of the directory that holds a given response is the value returned by the
436 get_hash() function (i.e., it's a number). If the directory exists, this
437 method does nothing.
438
439 A private method.
440
441 @param hash The hash value (i.e., directory name). An integer between 0
442 and CACHE_TABLE_SIZE (See HTTPCache.h).
443 @return The pathname to the directory (even if it already existed).
444 @exception Error Thrown if the directory cannot be created.*/
445
446 string
create_hash_directory(int hash)447 HTTPCacheTable::create_hash_directory(int hash)
448 {
449 #if 0
450 struct stat stat_info;
451 ostringstream path;
452
453 path << d_cache_root << hash;
454 string p = path.str();
455
456 if (stat(p.c_str(), &stat_info) == -1) {
457 DBG2(cerr << "Cache....... Create dir " << p << endl);
458 if (MKDIR(p.c_str(), 0777) < 0) {
459 DBG2(cerr << "Cache....... Can't create..." << endl);
460 throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
461 }
462 }
463 else {
464 DBG2(cerr << "Cache....... Directory " << p << " already exists"
465 << endl);
466 }
467
468 return p;
469 #endif
470
471 ostringstream path;
472 path << d_cache_root << hash;
473
474 // Save the mask
475 mode_t mask = umask(0);
476
477 // Ignore the error if the directory exists
478 errno = 0;
479 if (mkdir(path.str().c_str(), 0777) < 0 && errno != EEXIST) {
480 umask(mask);
481 throw Error(internal_error, "Could not create the directory for the cache at '" + path.str() + "' (" + strerror(errno) + ").");
482 }
483
484 // Restore themask
485 umask(mask);
486
487 return path.str();
488 }
489
490 /** Create the directory for this url (using the hash value from get_hash())
491 and a file within that directory to hold the response's information. The
492 cache name and cache_body_fd fields of \c entry are updated.
493
494 mkstemp opens the file it creates, which is a good thing but it makes
495 tracking resources hard for the HTTPCache object (because an exception
496 might cause a file descriptor resource leak). So I close that file
497 descriptor here.
498
499 A private method.
500
501 @param entry The cache entry object to operate on.
502 @exception Error If the file for the response's body cannot be created. */
503
504 void
create_location(HTTPCacheTable::CacheEntry * entry)505 HTTPCacheTable::create_location(HTTPCacheTable::CacheEntry *entry)
506 {
507 string hash_dir = create_hash_directory(entry->hash);
508 #ifdef WIN32
509 hash_dir += "\\dodsXXXXXX";
510 #else
511 hash_dir += "/dodsXXXXXX"; // mkstemp uses six characters.
512 #endif
513
514 // mkstemp uses the storage passed to it; must be writable and local.
515 // char *templat = new char[hash_dir.size() + 1];
516 vector<char> templat(hash_dir.size() + 1);
517 strncpy(&templat[0], hash_dir.c_str(), hash_dir.size() + 1);
518
519 // Open truncated for update. NB: mkstemp() returns a file descriptor.
520 // man mkstemp says "... The file is opened with the O_EXCL flag,
521 // guaranteeing that when mkstemp returns successfully we are the only
522 // user." 09/19/02 jhrg
523 #ifndef WIN32
524 // Make sure that temp files are accessible only by the owner.
525 umask(077);
526 #endif
527 int fd = MKSTEMP(&templat[0]); // fd mode is 666 or 600 (Unix)
528 if (fd < 0) {
529 // delete[] templat; templat = 0;
530 // close(fd); Calling close() when fd is < 0 is a bad idea! jhrg 7/2/15
531 throw Error(internal_error, "The HTTP Cache could not create a file to hold the response; it will not be cached.");
532 }
533
534 entry->cachename = &templat[0];
535 // delete[] templat; templat = 0;
536 close(fd);
537 }
538
539
540 /** compute real disk space for an entry. */
541 static inline int
entry_disk_space(int size,unsigned int block_size)542 entry_disk_space(int size, unsigned int block_size)
543 {
544 unsigned int num_of_blocks = (size + block_size) / block_size;
545
546 DBG(cerr << "size: " << size << ", block_size: " << block_size
547 << ", num_of_blocks: " << num_of_blocks << endl);
548
549 return num_of_blocks * block_size;
550 }
551
552 /** @name Methods to manipulate instances of CacheEntry. */
553
554 //@{
555
556 /** Add a CacheEntry to the cache table. As each entry is read, load it into
557 the in-memory cache table and update the HTTPCache's current_size. The
558 later is used by the garbage collection method.
559
560 @param entry The CacheEntry instance to add. */
561 void
add_entry_to_cache_table(CacheEntry * entry)562 HTTPCacheTable::add_entry_to_cache_table(CacheEntry *entry)
563 {
564 int hash = entry->hash;
565 if (hash > CACHE_TABLE_SIZE-1 || hash < 0)
566 throw InternalErr(__FILE__, __LINE__, "Hash value too large!");
567
568 if (!d_cache_table[hash])
569 d_cache_table[hash] = new CacheEntries;
570
571 d_cache_table[hash]->push_back(entry);
572
573 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
574 << ", entry->size: " << entry->size << ", block size: " << d_block_size
575 << endl);
576
577 d_current_size += entry_disk_space(entry->size, d_block_size);
578
579 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
580
581 increment_new_entries();
582 }
583
584 /** Get a pointer to a CacheEntry from the cache table.
585
586 @param url Look for this URL. */
587 HTTPCacheTable::CacheEntry *
get_locked_entry_from_cache_table(const string & url)588 HTTPCacheTable::get_locked_entry_from_cache_table(const string &url) /*const*/
589 {
590 return get_locked_entry_from_cache_table(get_hash(url), url);
591 }
592
593 /** Get a pointer to a CacheEntry from the cache table. Providing a way to
594 pass the hash code into this method makes it easier to test for correct
595 behavior when two entries collide. 10/07/02 jhrg
596
597 @param hash The hash code for \c url.
598 @param url Look for this URL.
599 @return The matching CacheEntry instance or NULL if none was found. */
600 HTTPCacheTable::CacheEntry *
get_locked_entry_from_cache_table(int hash,const string & url)601 HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url) /*const*/
602 {
603 DBG(cerr << "url: " << url << "; hash: " << hash << endl);
604 DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
605 if (d_cache_table[hash]) {
606 CacheEntries *cp = d_cache_table[hash];
607 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
608 // Must test *i because perform_garbage_collection may have
609 // removed this entry; the CacheEntry will then be null.
610 if ((*i) && (*i)->url == url) {
611 (*i)->lock_read_response(); // Lock the response
612 return *i;
613 }
614 }
615 }
616
617 return 0;
618 }
619
620 /** Get a pointer to a CacheEntry from the cache table. Providing a way to
621 pass the hash code into this method makes it easier to test for correct
622 behavior when two entries collide. 10/07/02 jhrg
623
624 @param url Look for this URL.
625 @return The matching CacheEntry instance or NULL if none was found. */
626 HTTPCacheTable::CacheEntry *
get_write_locked_entry_from_cache_table(const string & url)627 HTTPCacheTable::get_write_locked_entry_from_cache_table(const string &url)
628 {
629 int hash = get_hash(url);
630 if (d_cache_table[hash]) {
631 CacheEntries *cp = d_cache_table[hash];
632 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
633 // Must test *i because perform_garbage_collection may have
634 // removed this entry; the CacheEntry will then be null.
635 if ((*i) && (*i)->url == url) {
636 (*i)->lock_write_response(); // Lock the response
637 return *i;
638 }
639 }
640 }
641
642 return 0;
643 }
644
645 /** Remove a CacheEntry. This means delete the entry's files on disk and free
646 the CacheEntry object. The caller should null the entry's pointer in the
647 cache_table. The total size of the cache is decremented once the entry is
648 deleted.
649
650 @param entry The CacheEntry to delete.
651 @exception InternalErr Thrown if \c entry is in use. */
652 void
remove_cache_entry(HTTPCacheTable::CacheEntry * entry)653 HTTPCacheTable::remove_cache_entry(HTTPCacheTable::CacheEntry *entry)
654 {
655 // This should never happen; all calls to this method are protected by
656 // the caller, hence the InternalErr.
657 if (entry->readers)
658 throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
659
660 REMOVE(entry->cachename.c_str());
661 REMOVE(string(entry->cachename + CACHE_META).c_str());
662
663 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
664
665 unsigned int eds = entry_disk_space(entry->size, get_block_size());
666 set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
667
668 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
669 }
670
671 /** Functor which deletes and nulls a CacheEntry if the given entry matches
672 the url. */
673 class DeleteCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void>
674 {
675 string d_url;
676 HTTPCacheTable *d_cache_table;
677
678 public:
DeleteCacheEntry(HTTPCacheTable * c,const string & url)679 DeleteCacheEntry(HTTPCacheTable *c, const string &url)
680 : d_url(url), d_cache_table(c)
681 {}
682
operator ()(HTTPCacheTable::CacheEntry * & e)683 void operator()(HTTPCacheTable::CacheEntry *&e)
684 {
685 if (e && e->url == d_url) {
686 e->lock_write_response();
687 d_cache_table->remove_cache_entry(e);
688 e->unlock_write_response();
689 delete e; e = 0;
690 }
691 }
692 };
693
694 /** Find the CacheEntry for the given url and remove both its information in
695 the persistent store and the entry in d_cache_table. If \c url is not in
696 the cache, this method does nothing.
697
698 @param url Remove this URL's entry.
699 @exception InternalErr Thrown if the CacheEntry for \c url is locked. */
700 void
remove_entry_from_cache_table(const string & url)701 HTTPCacheTable::remove_entry_from_cache_table(const string &url)
702 {
703 int hash = get_hash(url);
704 if (d_cache_table[hash]) {
705 CacheEntries *cp = d_cache_table[hash];
706 for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
707 cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry*>(0)),
708 cp->end());
709 }
710 }
711
712 /** Functor to delete and null all unlocked HTTPCacheTable::CacheEntry objects. */
713
714 class DeleteUnlockedCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void> {
715 HTTPCacheTable &d_table;
716
717 public:
DeleteUnlockedCacheEntry(HTTPCacheTable & t)718 DeleteUnlockedCacheEntry(HTTPCacheTable &t) :
719 d_table(t)
720 {
721 }
operator ()(HTTPCacheTable::CacheEntry * & e)722 void operator()(HTTPCacheTable::CacheEntry *&e)
723 {
724 if (e) {
725 d_table.remove_cache_entry(e);
726 delete e;
727 e = 0;
728 }
729 }
730 };
731
delete_all_entries()732 void HTTPCacheTable::delete_all_entries()
733 {
734 // Walk through the cache table and, for every entry in the cache, delete
735 // it on disk and in the cache table.
736 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
737 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
738 if (slot) {
739 for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
740 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *> (0)), slot->end());
741 }
742 }
743
744 cache_index_delete();
745 }
746
747 /** Calculate the corrected_initial_age of the object. We use the time when
748 this function is called as the response_time as this is when we have
749 received the complete response. This may cause a delay if the response
750 header is very big but should not cause any incorrect behavior.
751
752 A private method.
753
754 @param entry The CacheEntry object.
755 @param default_expiration The default value of the cached object's
756 expiration time.
757 @param request_time When was the request made? I think this value must be
758 passed into the method that calls this method... */
759
760 void
calculate_time(HTTPCacheTable::CacheEntry * entry,int default_expiration,time_t request_time)761 HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
762 {
763 entry->response_time = time(NULL);
764 time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
765 time_t corrected_received_age = max(apparent_age, entry->age);
766 time_t response_delay = entry->response_time - request_time;
767 entry->corrected_initial_age = corrected_received_age + response_delay;
768
769 // Estimate an expires time using the max-age and expires time. If we
770 // don't have an explicit expires time then set it to 10% of the LM date
771 // (although max 24 h). If no LM date is available then use 24 hours.
772 time_t freshness_lifetime = entry->max_age;
773 if (freshness_lifetime < 0) {
774 if (entry->expires < 0) {
775 if (entry->lm < 0) {
776 freshness_lifetime = default_expiration;
777 }
778 else {
779 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
780 }
781 }
782 else
783 freshness_lifetime = entry->expires - entry->date;
784 }
785
786 entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
787
788 DBG2(cerr << "Cache....... Received Age " << entry->age
789 << ", corrected " << entry->corrected_initial_age
790 << ", freshness lifetime " << entry->freshness_lifetime << endl);
791 }
792
793 /** Parse various headers from the vector (which can be retrieved from
794 libcurl once a response is received) and load the CacheEntry object with
795 values. This method should only be called with headers from a response
796 (it should not be used to parse request headers).
797
798 A private method.
799
800 @param entry Store values from the headers here.
801 @param max_entry_size DO not cache entries larger than this.
802 @param headers A vector of header lines. */
803
parse_headers(HTTPCacheTable::CacheEntry * entry,unsigned long max_entry_size,const vector<string> & headers)804 void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size,
805 const vector<string> &headers)
806 {
807 vector<string>::const_iterator i;
808 for (i = headers.begin(); i != headers.end(); ++i) {
809 // skip a blank header.
810 if ((*i).empty())
811 continue;
812
813 string::size_type colon = (*i).find(':');
814
815 // skip a header with no colon in it.
816 if (colon == string::npos)
817 continue;
818
819 string header = (*i).substr(0, (*i).find(':'));
820 string value = (*i).substr((*i).find(": ") + 2);
821 DBG2(cerr << "Header: " << header << endl);DBG2(cerr << "Value: " << value << endl);
822
823 if (header == "ETag") {
824 entry->etag = value;
825 }
826 else if (header == "Last-Modified") {
827 entry->lm = parse_time(value.c_str());
828 }
829 else if (header == "Expires") {
830 entry->expires = parse_time(value.c_str());
831 }
832 else if (header == "Date") {
833 entry->date = parse_time(value.c_str());
834 }
835 else if (header == "Age") {
836 entry->age = parse_time(value.c_str());
837 }
838 else if (header == "Content-Length") {
839 unsigned long clength = strtoul(value.c_str(), 0, 0);
840 if (clength > max_entry_size)
841 entry->set_no_cache(true);
842 }
843 else if (header == "Cache-Control") {
844 // Ignored Cache-Control values: public, private, no-transform,
845 // proxy-revalidate, s-max-age. These are used by shared caches.
846 // See section 14.9 of RFC 2612. 10/02/02 jhrg
847 if (value == "no-cache" || value == "no-store")
848 // Note that we *can* store a 'no-store' response in volatile
849 // memory according to RFC 2616 (section 14.9.2) but those
850 // will be rare coming from DAP servers. 10/02/02 jhrg
851 entry->set_no_cache(true);
852 else if (value == "must-revalidate")
853 entry->must_revalidate = true;
854 else if (value.find("max-age") != string::npos) {
855 string max_age = value.substr(value.find("=" + 1));
856 entry->max_age = parse_time(max_age.c_str());
857 }
858 }
859 }
860 }
861
862 //@} End of the CacheEntry methods.
863
864 // @TODO Change name to record locked response
bind_entry_to_data(HTTPCacheTable::CacheEntry * entry,FILE * body)865 void HTTPCacheTable::bind_entry_to_data(HTTPCacheTable::CacheEntry *entry, FILE *body) {
866 entry->hits++; // Mark hit
867 d_locked_entries[body] = entry; // record lock, see release_cached_r...
868 }
869
uncouple_entry_from_data(FILE * body)870 void HTTPCacheTable::uncouple_entry_from_data(FILE *body) {
871
872 HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
873 if (!entry)
874 throw InternalErr("There is no cache entry for the response given.");
875
876 d_locked_entries.erase(body);
877 entry->unlock_read_response();
878
879 if (entry->readers < 0)
880 throw InternalErr("An unlocked entry was released");
881 }
882
is_locked_read_responses()883 bool HTTPCacheTable::is_locked_read_responses() {
884 return !d_locked_entries.empty();
885 }
886
887 } // namespace libdap
888