1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 // #define DODS_DEBUG
29 
30 // TODO: Remove unneeded includes.
31 
32 #include <pthread.h>
33 #include <limits.h>
34 #include <unistd.h>   // for stat
35 #include <sys/types.h>  // for stat and mkdir
36 #include <sys/stat.h>
37 
38 #include <cstring>
39 #include <cerrno>
40 
41 #include <iostream>
42 #include <sstream>
43 #include <algorithm>
44 #include <iterator>
45 #include <set>
46 
47 #include "Error.h"
48 #include "InternalErr.h"
49 #include "ResponseTooBigErr.h"
50 #ifndef WIN32
51 #include "SignalHandler.h"
52 #endif
53 #include "HTTPCacheInterruptHandler.h"
54 #include "HTTPCacheTable.h"
55 #include "HTTPCacheMacros.h"
56 
57 #include "util_mit.h"
58 #include "debug.h"
59 
60 #ifdef WIN32
61 #include <direct.h>
62 #include <time.h>
63 #include <fcntl.h>
64 #define MKDIR(a,b) _mkdir((a))
65 #define REMOVE(a) do { \
66 		int s = remove((a)); \
67 		if (s != 0) \
68 			throw InternalErr(__FILE__, __LINE__, "Cache error; could not remove file: " + long_to_string(s)); \
69 	} while(0);
70 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
71 #define DIR_SEPARATOR_CHAR '\\'
72 #define DIR_SEPARATOR_STR "\\"
73 #else
74 #define MKDIR(a,b) mkdir((a), (b))
75 #define MKSTEMP(a) mkstemp((a))
76 #define DIR_SEPARATOR_CHAR '/'
77 #define DIR_SEPARATOR_STR "/"
78 #endif
79 
80 #define CACHE_META ".meta"
81 #define CACHE_INDEX ".index"
82 #define CACHE_EMPTY_ETAG "@cache@"
83 
84 #define NO_LM_EXPIRATION 24*3600 // 24 hours
85 #define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM
86 
87 // If using LM to find the expiration then take 10% and no more than
88 // MAX_LM_EXPIRATION.
89 #ifndef LM_EXPIRATION
90 #define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
91 #endif
92 
93 const int CACHE_TABLE_SIZE = 1499;
94 
95 using namespace std;
96 
97 namespace libdap {
98 
99 /** Compute the hash value for a URL.
100     @param url
101     @return An integer hash code between 0 and CACHE_TABLE_SIZE. */
102 int
get_hash(const string & url)103 get_hash(const string &url)
104 {
105     int hash = 0;
106 
107     for (const char *ptr = url.c_str(); *ptr; ptr++)
108         hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
109 
110     return hash;
111 }
112 
HTTPCacheTable(const string & cache_root,int block_size)113 HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size) :
114     d_cache_root(cache_root), d_block_size(block_size), d_current_size(0), d_new_entries(0)
115 {
116     d_cache_index = cache_root + CACHE_INDEX;
117 
118     d_cache_table = new CacheEntries*[CACHE_TABLE_SIZE];
119 
120     // Initialize the cache table.
121     for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
122 	d_cache_table[i] = 0;
123 
124     cache_index_read();
125 }
126 
127 /** Called by for_each inside ~HTTPCache().
128     @param e The cache entry to delete. */
129 
130 static inline void
delete_cache_entry(HTTPCacheTable::CacheEntry * e)131 delete_cache_entry(HTTPCacheTable::CacheEntry *e)
132 {
133     DBG2(cerr << "Deleting CacheEntry: " << e << endl);
134     delete e;
135 }
136 
~HTTPCacheTable()137 HTTPCacheTable::~HTTPCacheTable()
138 {
139     for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
140         HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
141         if (cp) {
142             // delete each entry
143             for_each(cp->begin(), cp->end(), delete_cache_entry);
144 
145             // now delete the vector that held the entries
146             delete get_cache_table()[i];
147             get_cache_table()[i] = 0;
148         }
149     }
150 
151     delete[] d_cache_table;
152 }
153 
154 /** Functor which deletes and nulls a single CacheEntry if it has expired.
155     This functor is called by expired_gc which then uses the
156     erase(remove(...) ...) idiom to really remove all the vector entries that
157     belonged to the deleted CacheEntry objects.
158 
159     @see expired_gc. */
160 
161 class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
162 	time_t d_time;
163 	HTTPCacheTable &d_table;
164 
165 public:
DeleteExpired(HTTPCacheTable & table,time_t t)166 	DeleteExpired(HTTPCacheTable &table, time_t t) :
167 		d_time(t), d_table(table) {
168 		if (!t)
169 			d_time = time(0); // 0 == now
170 	}
171 
operator ()(HTTPCacheTable::CacheEntry * & e)172 	void operator()(HTTPCacheTable::CacheEntry *&e) {
173 		if (e && !e->readers && (e->freshness_lifetime
174 				< (e->corrected_initial_age + (d_time - e->response_time)))) {
175 			DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
176 			d_table.remove_cache_entry(e);
177 			delete e; e = 0;
178 		}
179 	}
180 };
181 
182 // @param time base deletes againt this time, defaults to 0 (now)
delete_expired_entries(time_t time)183 void HTTPCacheTable::delete_expired_entries(time_t time) {
184 	// Walk through and delete all the expired entries.
185 	for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
186 		HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
187 		if (slot) {
188 			for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
189 			slot->erase(remove(slot->begin(), slot->end(),
190 					static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
191 		}
192 	}
193 }
194 
195 /** Functor which deletes and nulls a single CacheEntry which has less than
196     or equal to \c hits hits or if it is larger than the cache's
197     max_entry_size property.
198 
199     @see hits_gc. */
200 
201 class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
202 	HTTPCacheTable &d_table;
203 	int d_hits;
204 
205 public:
DeleteByHits(HTTPCacheTable & table,int hits)206 	DeleteByHits(HTTPCacheTable &table, int hits) :
207 		d_table(table), d_hits(hits) {
208 	}
209 
operator ()(HTTPCacheTable::CacheEntry * & e)210 	void operator()(HTTPCacheTable::CacheEntry *&e) {
211 		if (e && !e->readers && e->hits <= d_hits) {
212 			DBG(cerr << "Deleting cache entry: " << e->url << endl);
213 			d_table.remove_cache_entry(e);
214 			delete e; e = 0;
215 		}
216 	}
217 };
218 
219 void
delete_by_hits(int hits)220 HTTPCacheTable::delete_by_hits(int hits) {
221     for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
222         if (get_cache_table()[cnt]) {
223             HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
224             for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
225             slot->erase(remove(slot->begin(), slot->end(),
226                                static_cast<HTTPCacheTable::CacheEntry*>(0)),
227                         slot->end());
228 
229         }
230     }
231 }
232 
233 /** Functor which deletes and nulls a single CacheEntry which is larger than
234     a given size.
235     @see hits_gc. */
236 
237 class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
238 	HTTPCacheTable &d_table;
239 	unsigned int d_size;
240 
241 public:
DeleteBySize(HTTPCacheTable & table,unsigned int size)242 	DeleteBySize(HTTPCacheTable &table, unsigned int size) :
243 		d_table(table), d_size(size) {
244 	}
245 
operator ()(HTTPCacheTable::CacheEntry * & e)246 	void operator()(HTTPCacheTable::CacheEntry *&e) {
247 		if (e && !e->readers && e->size > d_size) {
248 			DBG(cerr << "Deleting cache entry: " << e->url << endl);
249 			d_table.remove_cache_entry(e);
250 			delete e; e = 0;
251 		}
252 	}
253 };
254 
delete_by_size(unsigned int size)255 void HTTPCacheTable::delete_by_size(unsigned int size) {
256     for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
257         if (get_cache_table()[cnt]) {
258             HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
259             for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
260             slot->erase(remove(slot->begin(), slot->end(),
261                                static_cast<HTTPCacheTable::CacheEntry*>(0)),
262                         slot->end());
263 
264         }
265     }
266 }
267 
268 /** @name Cache Index
269 
270     These methods manage the cache's index file. Each cache holds an index
271     file named \c .index which stores the cache's state information. */
272 
273 //@{
274 
275 /** Remove the cache index file.
276 
277     A private method.
278 
279     @return True if the file was deleted, otherwise false. */
280 
281 bool
cache_index_delete()282 HTTPCacheTable::cache_index_delete()
283 {
284 	d_new_entries = 0;
285 
286     return (REMOVE_BOOL(d_cache_index.c_str()) == 0);
287 }
288 
289 /** Read the saved set of cached entries from disk. Consistency between the
290     in-memory cache and the index is maintained by only reading the index
291     file when the HTTPCache object is created!
292 
293     A private method.
294 
295     @return True when a cache index was found and read, false otherwise. */
296 
297 bool
cache_index_read()298 HTTPCacheTable::cache_index_read()
299 {
300     FILE *fp = fopen(d_cache_index.c_str(), "r");
301     // If the cache index can't be opened that's OK; start with an empty
302     // cache. 09/05/02 jhrg
303     if (!fp) {
304         return false;
305     }
306 
307     char line[1024];
308     while (!feof(fp) && fgets(line, 1024, fp)) {
309     	add_entry_to_cache_table(cache_index_parse_line(line));
310         DBG2(cerr << line << endl);
311     }
312 
313     int res = fclose(fp) ;
314     if (res) {
315         DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
316     }
317 
318     d_new_entries = 0;
319 
320     return true;
321 }
322 
323 /** Parse one line of the index file.
324 
325     A private method.
326 
327     @param line A single line from the \c .index file.
328     @return A CacheEntry initialized with the information from \c line. */
329 
330 HTTPCacheTable::CacheEntry *
cache_index_parse_line(const char * line)331 HTTPCacheTable::cache_index_parse_line(const char *line)
332 {
333     // Read the line and create the cache object
334 	HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry;
335     istringstream iss(line);
336     iss >> entry->url;
337     iss >> entry->cachename;
338 
339     iss >> entry->etag;
340     if (entry->etag == CACHE_EMPTY_ETAG)
341         entry->etag = "";
342 
343     iss >> entry->lm;
344     iss >> entry->expires;
345     iss >> entry->size;
346     iss >> entry->range; // range is not used. 10/02/02 jhrg
347 
348     iss >> entry->hash;
349     iss >> entry->hits;
350     iss >> entry->freshness_lifetime;
351     iss >> entry->response_time;
352     iss >> entry->corrected_initial_age;
353 
354     iss >> entry->must_revalidate;
355 
356     return entry;
357 }
358 
359 /** Functor which writes a single CacheEntry to the \c .index file. */
360 
361 class WriteOneCacheEntry :
362 	public unary_function<HTTPCacheTable::CacheEntry *, void>
363 {
364 
365     FILE *d_fp;
366 
367 public:
WriteOneCacheEntry(FILE * fp)368     WriteOneCacheEntry(FILE *fp) : d_fp(fp)
369     {}
370 
operator ()(HTTPCacheTable::CacheEntry * e)371     void operator()(HTTPCacheTable::CacheEntry *e)
372     {
373         if (e && fprintf(d_fp,
374                          "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
375                          e->url.c_str(),
376                          e->cachename.c_str(),
377                          e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(),
378                          (long)(e->lm),
379                          (long)(e->expires),
380                          e->size,
381                          e->range ? '1' : '0', // not used. 10/02/02 jhrg
382                          e->hash,
383                          e->hits,
384                          (long)(e->freshness_lifetime),
385                          (long)(e->response_time),
386                          (long)(e->corrected_initial_age),
387                          e->must_revalidate ? '1' : '0') < 0)
388             throw Error(internal_error, "Cache Index. Error writing cache index\n");
389     }
390 };
391 
392 /** Walk through the list of cached objects and write the cache index file to
393     disk. If the file does not exist, it is created. If the file does exist,
394     it is overwritten. As a side effect, zero the new_entries counter.
395 
396     A private method.
397 
398     @exception Error Thrown if the index file cannot be opened for writing.
399     @note The HTTPCache destructor calls this method and silently ignores
400     this exception. */
401 void
cache_index_write()402 HTTPCacheTable::cache_index_write()
403 {
404     DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
405 
406     // Open the file for writing.
407     FILE * fp = NULL;
408     if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
409         throw Error(string("Cache Index. Can't open `") + d_cache_index
410                     + string("' for writing"));
411     }
412 
413     // Walk through the list and write it out. The format is really
414     // simple as we keep it all in ASCII.
415 
416     for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
417         HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
418         if (cp)
419             for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
420     }
421 
422     /* Done writing */
423     int res = fclose(fp);
424     if (res) {
425         DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
426             << (void *)fp << endl);
427     }
428 
429     d_new_entries = 0;
430 }
431 
432 //@} End of the cache index methods.
433 /** Create the directory path for cache file. The cache uses a set of
434     directories within d_cache_root to store individual responses. The name
435     of the directory that holds a given response is the value returned by the
436     get_hash() function (i.e., it's a number). If the directory exists, this
437     method does nothing.
438 
439     A private method.
440 
441     @param hash The hash value (i.e., directory name). An integer between 0
442     and CACHE_TABLE_SIZE (See HTTPCache.h).
443     @return The pathname to the directory (even if it already existed).
444     @exception Error Thrown if the directory cannot be created.*/
445 
446 string
create_hash_directory(int hash)447 HTTPCacheTable::create_hash_directory(int hash)
448 {
449 #if 0
450     struct stat stat_info;
451     ostringstream path;
452 
453     path << d_cache_root << hash;
454     string p = path.str();
455 
456     if (stat(p.c_str(), &stat_info) == -1) {
457         DBG2(cerr << "Cache....... Create dir " << p << endl);
458         if (MKDIR(p.c_str(), 0777) < 0) {
459             DBG2(cerr << "Cache....... Can't create..." << endl);
460             throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
461         }
462     }
463     else {
464         DBG2(cerr << "Cache....... Directory " << p << " already exists"
465              << endl);
466     }
467 
468     return p;
469 #endif
470 
471     ostringstream path;
472     path << d_cache_root << hash;
473 
474     // Save the mask
475     mode_t mask = umask(0);
476 
477     // Ignore the error if the directory exists
478     errno = 0;
479     if (mkdir(path.str().c_str(), 0777) < 0 && errno != EEXIST) {
480         umask(mask);
481         throw Error(internal_error, "Could not create the directory for the cache at '" + path.str() + "' (" + strerror(errno) + ").");
482     }
483 
484     // Restore themask
485     umask(mask);
486 
487     return path.str();
488 }
489 
490 /** Create the directory for this url (using the hash value from get_hash())
491     and a file within that directory to hold the response's information. The
492     cache name and cache_body_fd fields of \c entry are updated.
493 
494     mkstemp opens the file it creates, which is a good thing but it makes
495     tracking resources hard for the HTTPCache object (because an exception
496     might cause a file descriptor resource leak). So I close that file
497     descriptor here.
498 
499     A private method.
500 
501     @param entry The cache entry object to operate on.
502     @exception Error If the file for the response's body cannot be created. */
503 
504 void
create_location(HTTPCacheTable::CacheEntry * entry)505 HTTPCacheTable::create_location(HTTPCacheTable::CacheEntry *entry)
506 {
507     string hash_dir = create_hash_directory(entry->hash);
508 #ifdef WIN32
509     hash_dir += "\\dodsXXXXXX";
510 #else
511     hash_dir += "/dodsXXXXXX"; // mkstemp uses six characters.
512 #endif
513 
514     // mkstemp uses the storage passed to it; must be writable and local.
515     // char *templat = new char[hash_dir.size() + 1];
516     vector<char> templat(hash_dir.size() + 1);
517     strncpy(&templat[0], hash_dir.c_str(), hash_dir.size() + 1);
518 
519     // Open truncated for update. NB: mkstemp() returns a file descriptor.
520     // man mkstemp says "... The file is opened with the O_EXCL flag,
521     // guaranteeing that when mkstemp returns successfully we are the only
522     // user." 09/19/02 jhrg
523 #ifndef WIN32
524     // Make sure that temp files are accessible only by the owner.
525     umask(077);
526 #endif
527     int fd = MKSTEMP(&templat[0]); // fd mode is 666 or 600 (Unix)
528     if (fd < 0) {
529         // delete[] templat; templat = 0;
530         // close(fd); Calling close() when fd is < 0 is a bad idea! jhrg 7/2/15
531         throw Error(internal_error, "The HTTP Cache could not create a file to hold the response; it will not be cached.");
532     }
533 
534     entry->cachename = &templat[0];
535     // delete[] templat; templat = 0;
536     close(fd);
537 }
538 
539 
540 /** compute real disk space for an entry. */
541 static inline int
entry_disk_space(int size,unsigned int block_size)542 entry_disk_space(int size, unsigned int block_size)
543 {
544     unsigned int num_of_blocks = (size + block_size) / block_size;
545 
546     DBG(cerr << "size: " << size << ", block_size: " << block_size
547         << ", num_of_blocks: " << num_of_blocks << endl);
548 
549     return num_of_blocks * block_size;
550 }
551 
552 /** @name Methods to manipulate instances of CacheEntry. */
553 
554 //@{
555 
556 /** Add a CacheEntry to the cache table. As each entry is read, load it into
557     the in-memory cache table and update the HTTPCache's current_size. The
558     later is used by the garbage collection method.
559 
560     @param entry The CacheEntry instance to add. */
561 void
add_entry_to_cache_table(CacheEntry * entry)562 HTTPCacheTable::add_entry_to_cache_table(CacheEntry *entry)
563 {
564     int hash = entry->hash;
565     if (hash > CACHE_TABLE_SIZE-1 || hash < 0)
566         throw InternalErr(__FILE__, __LINE__, "Hash value too large!");
567 
568     if (!d_cache_table[hash])
569         d_cache_table[hash] = new CacheEntries;
570 
571     d_cache_table[hash]->push_back(entry);
572 
573     DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
574         << ", entry->size: " << entry->size << ", block size: " << d_block_size
575         << endl);
576 
577     d_current_size += entry_disk_space(entry->size, d_block_size);
578 
579     DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
580 
581     increment_new_entries();
582 }
583 
584 /** Get a pointer to a CacheEntry from the cache table.
585 
586     @param url Look for this URL. */
587 HTTPCacheTable::CacheEntry *
get_locked_entry_from_cache_table(const string & url)588 HTTPCacheTable::get_locked_entry_from_cache_table(const string &url) /*const*/
589 {
590     return get_locked_entry_from_cache_table(get_hash(url), url);
591 }
592 
593 /** Get a pointer to a CacheEntry from the cache table. Providing a way to
594     pass the hash code into this method makes it easier to test for correct
595     behavior when two entries collide. 10/07/02 jhrg
596 
597     @param hash The hash code for \c url.
598     @param url Look for this URL.
599     @return The matching CacheEntry instance or NULL if none was found. */
600 HTTPCacheTable::CacheEntry *
get_locked_entry_from_cache_table(int hash,const string & url)601 HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url) /*const*/
602 {
603     DBG(cerr << "url: " << url << "; hash: " << hash << endl);
604     DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
605     if (d_cache_table[hash]) {
606 	CacheEntries *cp = d_cache_table[hash];
607 	for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
608 	    // Must test *i because perform_garbage_collection may have
609 	    // removed this entry; the CacheEntry will then be null.
610 	    if ((*i) && (*i)->url == url) {
611 		(*i)->lock_read_response(); // Lock the response
612 		return *i;
613 	    }
614 	}
615     }
616 
617     return 0;
618 }
619 
620 /** Get a pointer to a CacheEntry from the cache table. Providing a way to
621     pass the hash code into this method makes it easier to test for correct
622     behavior when two entries collide. 10/07/02 jhrg
623 
624     @param url Look for this URL.
625     @return The matching CacheEntry instance or NULL if none was found. */
626 HTTPCacheTable::CacheEntry *
get_write_locked_entry_from_cache_table(const string & url)627 HTTPCacheTable::get_write_locked_entry_from_cache_table(const string &url)
628 {
629 	int hash = get_hash(url);
630     if (d_cache_table[hash]) {
631         CacheEntries *cp = d_cache_table[hash];
632         for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
633             // Must test *i because perform_garbage_collection may have
634             // removed this entry; the CacheEntry will then be null.
635             if ((*i) && (*i)->url == url) {
636             	(*i)->lock_write_response();	// Lock the response
637             	return *i;
638             }
639         }
640     }
641 
642     return 0;
643 }
644 
645 /** Remove a CacheEntry. This means delete the entry's files on disk and free
646     the CacheEntry object. The caller should null the entry's pointer in the
647     cache_table. The total size of the cache is decremented once the entry is
648     deleted.
649 
650     @param entry The CacheEntry to delete.
651     @exception InternalErr Thrown if \c entry is in use. */
652 void
remove_cache_entry(HTTPCacheTable::CacheEntry * entry)653 HTTPCacheTable::remove_cache_entry(HTTPCacheTable::CacheEntry *entry)
654 {
655     // This should never happen; all calls to this method are protected by
656     // the caller, hence the InternalErr.
657     if (entry->readers)
658         throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
659 
660     REMOVE(entry->cachename.c_str());
661     REMOVE(string(entry->cachename + CACHE_META).c_str());
662 
663     DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
664 
665     unsigned int eds = entry_disk_space(entry->size, get_block_size());
666     set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
667 
668     DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
669 }
670 
671 /** Functor which deletes and nulls a CacheEntry if the given entry matches
672     the url. */
673 class DeleteCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void>
674 {
675     string d_url;
676     HTTPCacheTable *d_cache_table;
677 
678 public:
DeleteCacheEntry(HTTPCacheTable * c,const string & url)679     DeleteCacheEntry(HTTPCacheTable *c, const string &url)
680             : d_url(url), d_cache_table(c)
681     {}
682 
operator ()(HTTPCacheTable::CacheEntry * & e)683     void operator()(HTTPCacheTable::CacheEntry *&e)
684     {
685         if (e && e->url == d_url) {
686         	e->lock_write_response();
687             d_cache_table->remove_cache_entry(e);
688         	e->unlock_write_response();
689             delete e; e = 0;
690         }
691     }
692 };
693 
694 /** Find the CacheEntry for the given url and remove both its information in
695     the persistent store and the entry in d_cache_table. If \c url is not in
696     the cache, this method does nothing.
697 
698     @param url Remove this URL's entry.
699     @exception InternalErr Thrown if the CacheEntry for \c url is locked. */
700 void
remove_entry_from_cache_table(const string & url)701 HTTPCacheTable::remove_entry_from_cache_table(const string &url)
702 {
703     int hash = get_hash(url);
704     if (d_cache_table[hash]) {
705         CacheEntries *cp = d_cache_table[hash];
706         for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
707         cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry*>(0)),
708                   cp->end());
709     }
710 }
711 
712 /** Functor to delete and null all unlocked HTTPCacheTable::CacheEntry objects. */
713 
714 class DeleteUnlockedCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void> {
715     HTTPCacheTable &d_table;
716 
717 public:
DeleteUnlockedCacheEntry(HTTPCacheTable & t)718     DeleteUnlockedCacheEntry(HTTPCacheTable &t) :
719 	d_table(t)
720     {
721     }
operator ()(HTTPCacheTable::CacheEntry * & e)722     void operator()(HTTPCacheTable::CacheEntry *&e)
723     {
724 	if (e) {
725 	    d_table.remove_cache_entry(e);
726 	    delete e;
727 	    e = 0;
728 	}
729     }
730 };
731 
delete_all_entries()732 void HTTPCacheTable::delete_all_entries()
733 {
734     // Walk through the cache table and, for every entry in the cache, delete
735     // it on disk and in the cache table.
736     for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
737 	HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
738 	if (slot) {
739 	    for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
740 	    slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *> (0)), slot->end());
741 	}
742     }
743 
744     cache_index_delete();
745 }
746 
747 /** Calculate the corrected_initial_age of the object. We use the time when
748     this function is called as the response_time as this is when we have
749     received the complete response. This may cause a delay if the response
750     header is very big but should not cause any incorrect behavior.
751 
752     A private method.
753 
754     @param entry The CacheEntry object.
755     @param default_expiration The default value of the cached object's
756     expiration time.
757     @param request_time When was the request made? I think this value must be
758     passed into the method that calls this method... */
759 
760 void
calculate_time(HTTPCacheTable::CacheEntry * entry,int default_expiration,time_t request_time)761 HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
762 {
763     entry->response_time = time(NULL);
764     time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
765     time_t corrected_received_age = max(apparent_age, entry->age);
766     time_t response_delay = entry->response_time - request_time;
767     entry->corrected_initial_age = corrected_received_age + response_delay;
768 
769     // Estimate an expires time using the max-age and expires time. If we
770     // don't have an explicit expires time then set it to 10% of the LM date
771     // (although max 24 h). If no LM date is available then use 24 hours.
772     time_t freshness_lifetime = entry->max_age;
773     if (freshness_lifetime < 0) {
774         if (entry->expires < 0) {
775             if (entry->lm < 0) {
776                 freshness_lifetime = default_expiration;
777             }
778             else {
779                 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
780             }
781         }
782         else
783             freshness_lifetime = entry->expires - entry->date;
784     }
785 
786     entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
787 
788     DBG2(cerr << "Cache....... Received Age " << entry->age
789          << ", corrected " << entry->corrected_initial_age
790          << ", freshness lifetime " << entry->freshness_lifetime << endl);
791 }
792 
793 /** Parse various headers from the vector (which can be retrieved from
794     libcurl once a response is received) and load the CacheEntry object with
795     values. This method should only be called with headers from a response
796     (it should not be used to parse request headers).
797 
798     A private method.
799 
800     @param entry Store values from the headers here.
801     @param max_entry_size DO not cache entries larger than this.
802     @param headers A vector of header lines. */
803 
parse_headers(HTTPCacheTable::CacheEntry * entry,unsigned long max_entry_size,const vector<string> & headers)804 void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size,
805 	const vector<string> &headers)
806 {
807     vector<string>::const_iterator i;
808     for (i = headers.begin(); i != headers.end(); ++i) {
809 	// skip a blank header.
810 	if ((*i).empty())
811 	    continue;
812 
813 	string::size_type colon = (*i).find(':');
814 
815 	// skip a header with no colon in it.
816 	if (colon == string::npos)
817 	    continue;
818 
819 	string header = (*i).substr(0, (*i).find(':'));
820 	string value = (*i).substr((*i).find(": ") + 2);
821 	DBG2(cerr << "Header: " << header << endl);DBG2(cerr << "Value: " << value << endl);
822 
823 	if (header == "ETag") {
824 	    entry->etag = value;
825 	}
826 	else if (header == "Last-Modified") {
827 	    entry->lm = parse_time(value.c_str());
828 	}
829 	else if (header == "Expires") {
830 	    entry->expires = parse_time(value.c_str());
831 	}
832 	else if (header == "Date") {
833 	    entry->date = parse_time(value.c_str());
834 	}
835 	else if (header == "Age") {
836 	    entry->age = parse_time(value.c_str());
837 	}
838 	else if (header == "Content-Length") {
839 	    unsigned long clength = strtoul(value.c_str(), 0, 0);
840 	    if (clength > max_entry_size)
841 		entry->set_no_cache(true);
842 	}
843 	else if (header == "Cache-Control") {
844 	    // Ignored Cache-Control values: public, private, no-transform,
845 	    // proxy-revalidate, s-max-age. These are used by shared caches.
846 	    // See section 14.9 of RFC 2612. 10/02/02 jhrg
847 	    if (value == "no-cache" || value == "no-store")
848 		// Note that we *can* store a 'no-store' response in volatile
849 		// memory according to RFC 2616 (section 14.9.2) but those
850 		// will be rare coming from DAP servers. 10/02/02 jhrg
851 		entry->set_no_cache(true);
852 	    else if (value == "must-revalidate")
853 		entry->must_revalidate = true;
854 	    else if (value.find("max-age") != string::npos) {
855 		string max_age = value.substr(value.find("=" + 1));
856 		entry->max_age = parse_time(max_age.c_str());
857 	    }
858 	}
859     }
860 }
861 
862 //@} End of the CacheEntry methods.
863 
864 // @TODO Change name to record locked response
bind_entry_to_data(HTTPCacheTable::CacheEntry * entry,FILE * body)865 void HTTPCacheTable::bind_entry_to_data(HTTPCacheTable::CacheEntry *entry, FILE *body) {
866 	entry->hits++;  // Mark hit
867     d_locked_entries[body] = entry; // record lock, see release_cached_r...
868 }
869 
uncouple_entry_from_data(FILE * body)870 void HTTPCacheTable::uncouple_entry_from_data(FILE *body) {
871 
872     HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
873     if (!entry)
874         throw InternalErr("There is no cache entry for the response given.");
875 
876     d_locked_entries.erase(body);
877     entry->unlock_read_response();
878 
879     if (entry->readers < 0)
880         throw InternalErr("An unlocked entry was released");
881 }
882 
is_locked_read_responses()883 bool HTTPCacheTable::is_locked_read_responses() {
884 	return !d_locked_entries.empty();
885 }
886 
887 } // namespace libdap
888