1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 // #define DODS_DEBUG
29 // #define DODS_DEBUG2
30 #undef USE_GETENV
31 
32 #include <pthread.h>
33 #include <limits.h>
34 #include <unistd.h>   // for stat
35 #include <sys/types.h>  // for stat and mkdir
36 #include <sys/stat.h>
37 
38 #include <cstring>
39 #include <cerrno>
40 
41 #include <iostream>
42 #include <sstream>
43 #include <algorithm>
44 #include <iterator>
45 #include <set>
46 
47 #include "Error.h"
48 #include "InternalErr.h"
49 #include "ResponseTooBigErr.h"
50 #ifndef WIN32
51 #include "SignalHandler.h"
52 #endif
53 #include "HTTPCacheInterruptHandler.h"
54 #include "HTTPCacheTable.h"
55 #include "HTTPCache.h"
56 #include "HTTPCacheMacros.h"
57 #include "SignalHandlerRegisteredErr.h"
58 
59 #include "util_mit.h"
60 #include "debug.h"
61 
62 using namespace std;
63 
64 namespace libdap {
65 
66 HTTPCache *HTTPCache::_instance = 0;
67 
68 // instance_mutex is used to ensure that only one instance is created.
69 // That is, it protects the body of the HTTPCache::instance() method. This
70 // mutex is initialized from within the static function once_init_routine()
71 // and the call to that takes place using pthread_once_init() where the mutex
72 // once_block is used to protect that call. All of this ensures that no matter
73 // how many threads call the instance() method, only one instance is ever
74 // made.
75 static pthread_mutex_t instance_mutex;
76 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
77 
78 
79 #define NO_LM_EXPIRATION 24*3600 // 24 hours
80 
81 #define DUMP_FREQUENCY 10 // Dump index every x loads
82 
83 #define MEGA 0x100000L
84 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
85 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
86 #define CACHE_GC_PCT 10  // 10% of cache size free after GC
87 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
88 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
89 
90 static void
once_init_routine()91 once_init_routine()
92 {
93     int status;
94     status = INIT(&instance_mutex);
95 
96     if (status != 0)
97         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
98 }
99 
100 /** Get a pointer to the HTTP 1.1 compliant cache. If not already
101     instantiated, this creates an instance of the HTTP cache object and
102     initializes it to use \c cache_root as the location of the persistent
103     store. If there's an index (\c .index) file in that directory, it is read
104     as part of the initialization. If the cache has already been initialized,
105     this method returns a pointer to that instance. Note HTTPCache uses the
106     singleton pattern; A process may have only one instance of this object.
107     Also note that HTTPCache is MT-safe. However, if the \c force parameter
108     is set to true, it may be possible for two or more processes to access
109     the persistent store at the same time resulting in undefined behavior.
110 
111     Default values: is_cache_enabled(): true, is_cache_protected(): false,
112     is_expire_ignored(): false, the total size of the cache is 20M, 2M of that
113     is reserved for response headers, during GC the cache is reduced to at
114     least 18M (total size - 10% of the total size), and the max size for an
115     individual entry is 3M. It is possible to change the size of the cache,
116     but not to make it smaller than 5M. If expiration information is not sent
117     with a response, it is assumed to expire in 24 hours.
118 
119     @param cache_root The fully qualified pathname of the directory which
120     will hold the cache data (i.e., the persistent store).
121     @param force Force access to the persistent store if true. By default
122     false. Use this only if you're sure no one else is using the same cache
123     root! This is included so that programs may use a cache that was
124     left in an inconsistent state.
125     @return A pointer to the HTTPCache object.
126     @exception Error thrown if the cache root cannot set. */
127 
128 HTTPCache *
instance(const string & cache_root,bool force)129 HTTPCache::instance(const string &cache_root, bool force)
130 {
131     int status = pthread_once(&once_block, once_init_routine);
132     if (status != 0)
133         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
134 
135     LOCK(&instance_mutex);
136 
137     DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" << "... ");
138 
139     try {
140         if (!_instance) {
141             _instance = new HTTPCache(cache_root, force);
142 
143             DBG(cerr << "New instance: " << _instance << ", cache root: "
144                 << _instance->d_cache_root << endl);
145 
146             atexit(delete_instance);
147 
148 #ifndef WIN32
149             // Register the interrupt handler. If we've already registered
150             // one, barf. If this becomes a problem, hack SignalHandler so
151             // that we can chain these handlers... 02/10/04 jhrg
152             //
153             // Technically we're leaking memory here. However, since this
154             // class is a singleton, we know that only three objects will
155             // ever be created and they will all exist until the process
156             // exits. We can let this slide... 02/12/04 jhrg
157             EventHandler *old_eh = SignalHandler::instance()->register_handler(SIGINT, new HTTPCacheInterruptHandler, true);
158             if (old_eh) {
159                 SignalHandler::instance()->register_handler(SIGINT, old_eh);
160                 throw SignalHandlerRegisteredErr(
161                     "Could not register event handler for SIGINT without superseding an existing one.");
162             }
163 
164             old_eh = SignalHandler::instance()->register_handler(SIGPIPE, new HTTPCacheInterruptHandler, true);
165             if (old_eh) {
166                 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
167                 throw SignalHandlerRegisteredErr(
168                     "Could not register event handler for SIGPIPE without superseding an existing one.");
169             }
170 
171             old_eh = SignalHandler::instance()->register_handler(SIGTERM, new HTTPCacheInterruptHandler, true);
172             if (old_eh) {
173                 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
174                 throw SignalHandlerRegisteredErr(
175                     "Could not register event handler for SIGTERM without superseding an existing one.");
176             }
177 #endif
178         }
179     }
180     catch (...) {
181         DBG2(cerr << "The constructor threw an Error!" << endl);
182         UNLOCK(&instance_mutex);
183         throw;
184     }
185 
186     UNLOCK(&instance_mutex);
187     DBGN(cerr << "returning " << hex << _instance << dec << endl);
188 
189     return _instance;
190 }
191 
192 /** This static method is called using atexit(). It deletes the singleton;
193     see ~HTTPCache for all that implies. */
194 
195 void
delete_instance()196 HTTPCache::delete_instance()
197 {
198     DBG(cerr << "Entering delete_instance()..." << endl);
199 
200     if (HTTPCache::_instance) {
201         DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
202         delete HTTPCache::_instance;
203         HTTPCache::_instance = 0;
204 
205         //Now remove the signal handlers
206         delete SignalHandler::instance()->remove_handler(SIGINT);
207         delete SignalHandler::instance()->remove_handler(SIGPIPE);
208         delete SignalHandler::instance()->remove_handler(SIGTERM);
209     }
210 
211     DBG(cerr << "Exiting delete_instance()" << endl);
212 }
213 
214 /** Create an instance of the HTTP 1.1 compliant cache. This initializes the
215     both the cache root and the path to the index file. It then reads the
216     cache index file if one is present.
217 
218     A private method.
219 
220     @note This assumes that the cache directory structure should be created!
221     @param cache_root The fully qualified pathname of the directory which
222     will hold the cache data.
223     @param force Force access to the persistent store!
224     @exception Error Thrown if the single user/process lock for the
225     persistent store cannot be obtained.
226     @see cache_index_read */
227 
HTTPCache(string cache_root,bool force)228 HTTPCache::HTTPCache(string cache_root, bool force) :
229         d_locked_open_file(0),
230         d_cache_enabled(false),
231         d_cache_protected(false),
232 
233         d_cache_disconnected(DISCONNECT_NONE),
234 
235         d_expire_ignored(false),
236         d_always_validate(false),
237         d_total_size(CACHE_TOTAL_SIZE * MEGA),
238         d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
239         d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
240         d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
241         d_default_expiration(NO_LM_EXPIRATION),
242         d_max_age(-1),
243         d_max_stale(-1),
244         d_min_fresh(-1),
245         d_http_cache_table(0)
246 {
247     DBG(cerr << "Entering the constructor for " << this << "... ");
248 #if 0
249 	int status = pthread_once(&once_block, once_init_routine);
250 	if (status != 0)
251 		throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
252 #endif
253 	INIT(&d_cache_mutex);
254 
255 	// This used to throw an Error object if we could not get the
256 	// single user lock. However, that results in an invalid object. It's
257 	// better to have an instance that has default values. If we cannot get
258 	// the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
259 	//
260 	// I fixed this block so that the cache root is set before we try to get
261 	// the single user lock. That was the fix for bug #661. To make that
262 	// work, I had to move the call to create_cache_root out of
263 	// set_cache_root(). 09/08/03 jhrg
264 
265 	set_cache_root(cache_root);
266 	int block_size;
267 
268 	if (!get_single_user_lock(force))
269 	    throw Error(internal_error, "Could not get single user lock for the cache");
270 
271 #ifdef WIN32
272 	//  Windows is unable to provide us this information.  4096 appears
273 	//  a best guess.  It is likely to be in the range [2048, 8192] on
274 	//  windows, but will the level of truth of that statement vary over
275 	//  time ?
276 	block_size = 4096;
277 #else
278 	struct stat s;
279 	if (stat(cache_root.c_str(), &s) == 0)
280 		block_size = s.st_blksize;
281 	else
282 		throw Error(internal_error, "Could not set file system block size.");
283 #endif
284 	d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
285 	d_cache_enabled = true;
286 
287 	DBGN(cerr << "exiting" << endl);
288 }
289 
290 /** Destroy an instance of HTTPCache. This writes the cache index and frees
291     the in-memory cache table structure. The persistent cache (the response
292     headers and bodies and the index file) are not removed. To remove those,
293     either erase the directory that contains the cache using a file system
294     command or use the purge_cache() method (which leaves the cache directory
295     structure in place but removes all the cached information).
296 
297     This class uses the singleton pattern. Clients should \e never call this
298     method. The HTTPCache::instance() method arranges to call the
299     HTTPCache::delete_instance() using \c atexit(). If delete is called more
300     than once, the result will likely be an index file that is corrupt. */
301 
~HTTPCache()302 HTTPCache::~HTTPCache()
303 {
304     DBG(cerr << "Entering the destructor for " << this << "... ");
305 
306     try {
307         if (startGC())
308             perform_garbage_collection();
309 
310         d_http_cache_table->cache_index_write();
311     }
312     catch (Error &e) {
313         // If the cache index cannot be written, we've got problems. However,
314         // unless we're debugging, still free up the cache table in memory.
315         // How should we let users know they cache index is not being
316         // written?? 10/03/02 jhrg
317         DBG(cerr << e.get_error_message() << endl);
318     }
319 
320     delete d_http_cache_table;
321 
322     release_single_user_lock();
323 
324     DBGN(cerr << "exiting destructor." << endl);
325     DESTROY(&d_cache_mutex);
326 }
327 
328 
329 /** @name Garbage collection
330     These private methods manage the garbage collection tasks for the cache. */
331 //@{
332 
333 /** Enough removed from cache? A private method.
334     @return True if enough has been removed from the cache. */
335 
336 bool
stopGC() const337 HTTPCache::stopGC() const
338 {
339     return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
340 }
341 
342 /** Is there too much in the cache. A private method.
343 
344     @todo Modify this method so that it does not count locked entries. See
345     the note for hits_gc().
346     @return True if garbage collection should be performed. */
347 
348 bool
startGC() const349 HTTPCache::startGC() const
350 {
351     DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
352     return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
353 }
354 
355 /** Perform garbage collection on the cache. First, all expired responses are
356     removed. Then, if the size of the cache is still too large, the cache is
357     scanned for responses larger than the max_entry_size property. At the
358     same time, responses are removed based on the number of cache hits. This
359     process continues until the size of the cache has been reduced to 90% of
360     the max_size property value. Once the garbage collection is complete,
361     update the index file. Note that locked entries are not removed!
362 
363     A private method.
364 
365     @see stopGC
366     @see expired_gc
367     @see hits_gc */
368 
369 void
perform_garbage_collection()370 HTTPCache::perform_garbage_collection()
371 {
372     DBG(cerr << "Performing garbage collection" << endl);
373 
374     // Remove all the expired responses.
375     expired_gc();
376 
377     // Remove entries larger than max_entry_size.
378     too_big_gc();
379 
380     // Remove entries starting with zero hits, 1, ..., until stopGC()
381     // returns true.
382     hits_gc();
383 }
384 
385 /** Scan the current cache table and remove anything that has expired. Don't
386     remove locked entries.
387 
388     A private method. */
389 
390 void
expired_gc()391 HTTPCache::expired_gc()
392 {
393     if (!d_expire_ignored) {
394         d_http_cache_table->delete_expired_entries();
395     }
396 }
397 
398 /** Scan the cache for entires that are larger than max_entry_size. Also
399     start removing entires with low hit counts. Start looking for entries
400     with zero hits, then one, and so on. Stop when the method stopGC returns
401     true. Locked entries are never removed.
402 
403     @note Potential infinite loop. What if more than 80% of the cache holds
404     entries that are locked? One solution is to modify startGC() so that it
405     does not count locked entries.
406 
407     @todo Change this method to that it looks at the oldest entries first,
408     using the CacheEntry::date to determine entry age. Using the current
409     algorithm it's possible to remove the latest entry which is probably not
410     what we want.
411 
412     A private method. */
413 
414 void
hits_gc()415 HTTPCache::hits_gc()
416 {
417     int hits = 0;
418 
419     if (startGC()) {
420 		while (!stopGC()) {
421 			d_http_cache_table->delete_by_hits(hits);
422 			hits++;
423 		}
424 	}
425 }
426 
427 /** Scan the current cache table and remove anything that has is too big.
428  	Don't remove locked entries.
429 
430     A private method. */
too_big_gc()431 void HTTPCache::too_big_gc() {
432 	if (startGC())
433 		d_http_cache_table->delete_by_size(d_max_entry_size);
434 }
435 
436 //@} End of the garbage collection methods.
437 
438 /** Lock the persistent store part of the cache. Return true if the cache lock
439     was acquired, false otherwise. This is a single user cache, so it
440     requires locking at the process level.
441 
442     A private method.
443 
444     @param force If True force access to the persistent store. False by
445     default.
446     @return True if the cache was locked for our use, False otherwise. */
447 
get_single_user_lock(bool force)448 bool HTTPCache::get_single_user_lock(bool force)
449 {
450     if (!d_locked_open_file) {
451 	FILE * fp = NULL;
452 
453 	try {
454 	    // It's OK to call create_cache_root if the directory already
455 	    // exists.
456 	    create_cache_root(d_cache_root);
457 	}
458 	catch (Error &e) {
459 	    // We need to catch and return false because this method is
460 	    // called from a ctor and throwing at this point will result in a
461 	    // partially constructed object. 01/22/04 jhrg
462 	    DBG(cerr << "Failure to create the cache root" << endl);
463 	    return false;
464 	}
465 
466 	// Try to read the lock file. If we can open for reading, it exists.
467 	string lock = d_cache_root + CACHE_LOCK;
468 	if ((fp = fopen(lock.c_str(), "r")) != NULL) {
469 	    int res = fclose(fp);
470 	    if (res) {
471 		DBG(cerr << "Failed to close " << (void *)fp << endl);
472 	    }
473 	    if (force)
474 		REMOVE(lock.c_str());
475 	    else
476 		return false;
477 	}
478 
479 	if ((fp = fopen(lock.c_str(), "w")) == NULL) {
480 	    DBG(cerr << "Could not open for write access" << endl);
481 	    return false;
482 	}
483 
484 	d_locked_open_file = fp;
485 	return true;
486     }
487 
488     DBG(cerr << "locked_open_file is true" << endl);
489     return false;
490 }
491 
492 /** Release the single user (process) lock. A private method. */
493 
494 void
release_single_user_lock()495 HTTPCache::release_single_user_lock()
496 {
497     if (d_locked_open_file) {
498         int res = fclose(d_locked_open_file);
499         if (res) {
500             DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
501         }
502         d_locked_open_file = 0;
503     }
504 
505     string lock = d_cache_root + CACHE_LOCK;
506     REMOVE(lock.c_str());
507 }
508 
509 /** @name Accessors and Mutators for various properties. */
510 //@{
511 
512 /** Get the current cache root directory.
513     @return A string that contains the cache root directory. */
514 
515 string
get_cache_root() const516 HTTPCache::get_cache_root() const
517 {
518     return d_cache_root;
519 }
520 
521 
522 /** Create the cache's root directory. This is the persistent store used by
523     the cache. Paths must always end in DIR_SEPARATOR_CHAR.
524 
525     A private method.
526 
527     @param cache_root The pathname to the desired cache root directory.
528     @exception Error Thrown if the given pathname cannot be created. */
529 
530 void
create_cache_root(const string & cache_root)531 HTTPCache::create_cache_root(const string &cache_root)
532 {
533 #ifdef WIN32
534     string::size_type cur = cache_root[1] == ':' ? 3 : 1;
535     typedef int mode_t;
536 
537     while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
538         string dir = cache_root.substr(0, cur);
539         struct stat stat_info;
540         if (stat(dir.c_str(), &stat_info) == -1) {
541             DBG2(cerr << "Cache....... Creating " << dir << endl);
542             mode_t mask = UMASK(0);
543             if (MKDIR(dir.c_str(), 0777) < 0) {
544                 DBG2(cerr << "Error: can't create." << endl);
545                 UMASK(mask);
546                 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
547             }
548             UMASK(mask);
549         }
550         else {
551             DBG2(cerr << "Cache....... Found " << dir << endl);
552         }
553         cur++;
554     }
555 #else
556     // OSX and Linux
557 
558     // Save the mask
559     mode_t mask = umask(0);
560 
561     // Ignore the error if the directory exists
562     errno = 0;
563     if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
564         umask(mask);
565         throw Error("Could not create the directory for the cache at '" + cache_root + "' (" + strerror(errno) + ").");
566     }
567 
568     // Restore themask
569     umask(mask);
570 
571 #endif
572 }
573 
574 /** Set the cache's root directory to the given path. If no path is given,
575     look at the DODS_CACHE, TMP and TEMP environment variables (in that
576     order) to guess at a good location. If those are all NULL, use \c /tmp.
577     If the cache root directory cannot be created, throw an exception.
578 
579     Note that in most cases callers should look for this path in the user's
580     .dodsrc file.
581 
582     A private method.
583 
584     @see RCReader
585     @param root Set the cache root to this pathname. Defaults to "".
586     @exception Error Thrown if the path can neither be deduced nor created. */
587 
588 void
set_cache_root(const string & root)589 HTTPCache::set_cache_root(const string &root)
590 {
591     if (root != "") {
592         d_cache_root = root;
593         // cache root should end in /.
594         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
595             d_cache_root += DIR_SEPARATOR_CHAR;
596     }
597     else {
598         // If no cache root has been indicated then look for a suitable
599         // location.
600 #ifdef USE_GETENV
601         char * cr = (char *) getenv("DODS_CACHE");
602         if (!cr) cr = (char *) getenv("TMP");
603         if (!cr) cr = (char *) getenv("TEMP");
604         if (!cr) cr = (char*)CACHE_LOCATION;
605         d_cache_root = cr;
606 #else
607         d_cache_root = CACHE_LOCATION;
608 #endif
609 
610         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
611             d_cache_root += DIR_SEPARATOR_CHAR;
612 
613         d_cache_root += CACHE_ROOT;
614     }
615 
616     // Test d_hhtp_cache_table because this method can be called before that
617     // instance is created and also can be called later to change the cache
618     // root. jhrg 05.14.08
619     if (d_http_cache_table)
620     	d_http_cache_table->set_cache_root(d_cache_root);
621 }
622 
623 /** Enable or disable the cache. The cache can be temporarily suspended using
624     the enable/disable property. This does not prevent the cache from being
625     enabled/disable at a later point in time.
626 
627     Default: yes
628 
629     This method locks the class' interface.
630 
631     @param mode True if the cache should be enabled, False if it should be
632     disabled. */
633 
634 void
set_cache_enabled(bool mode)635 HTTPCache::set_cache_enabled(bool mode)
636 {
637     lock_cache_interface();
638 
639     d_cache_enabled = mode;
640 
641     unlock_cache_interface();
642 }
643 
644 /** Is the cache currently enabled? */
645 
646 bool
is_cache_enabled() const647 HTTPCache::is_cache_enabled() const
648 {
649     DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
650          << endl);
651     return d_cache_enabled;
652 }
653 
654 /** Set the cache's disconnected property. The cache can operate either
655     disconnected from the network or using a proxy cache (but tell that proxy
656     not to use the network).
657 
658     This method locks the class' interface.
659 
660     @param mode One of DISCONNECT_NONE, DISCONNECT_NORMAL or
661     DISCONNECT_EXTERNAL.
662     @see CacheDIsconnectedMode */
663 void
set_cache_disconnected(CacheDisconnectedMode mode)664 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
665 {
666     lock_cache_interface();
667 
668     d_cache_disconnected = mode;
669 
670     unlock_cache_interface();
671 }
672 
673 /** Get the cache's disconnected mode property. */
674 
675 CacheDisconnectedMode
get_cache_disconnected() const676 HTTPCache::get_cache_disconnected() const
677 {
678     return d_cache_disconnected;
679 }
680 
681 /** How should the cache handle the Expires header?
682     Default: no
683 
684     This method locks the class' interface.
685 
686     @param mode True if a responses Expires header should be ignored, False
687     otherwise. */
688 
689 void
set_expire_ignored(bool mode)690 HTTPCache::set_expire_ignored(bool mode)
691 {
692     lock_cache_interface();
693 
694     d_expire_ignored = mode;
695 
696     unlock_cache_interface();
697 }
698 
699 /* Is the cache ignoring Expires headers returned with responses that have
700    been cached? */
701 
702 bool
is_expire_ignored() const703 HTTPCache::is_expire_ignored() const
704 {
705     return d_expire_ignored;
706 }
707 
708 /** Cache size management. The default cache size is 20M. The minimum size is
709     5M in order not to get into weird problems while writing the cache. The
710     size is indicated in Mega bytes. Note that reducing the size of the cache
711     may trigger a garbage collection operation.
712 
713     @note The maximum cache size is UINT_MAX bytes (usually 4294967295 for
714     32-bit computers). If \e size is larger the value will be truncated to
715     the value of that constant. It seems pretty unlikely that will happen
716     given that the parameter is an unsigned long. This is a fix for bug 689
717     which was reported when the parameter type was signed.
718 
719     This method locks the class' interface.
720 
721     @param size The maximum size of the cache in megabytes. */
722 
723 void
set_max_size(unsigned long size)724 HTTPCache::set_max_size(unsigned long size)
725 {
726     lock_cache_interface();
727 
728     try {
729         unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
730                                  MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
731         unsigned long old_size = d_total_size;
732         d_total_size = new_size;
733         d_folder_size = d_total_size / CACHE_FOLDER_PCT;
734         d_gc_buffer = d_total_size / CACHE_GC_PCT;
735 
736         if (new_size < old_size && startGC()) {
737             perform_garbage_collection();
738             d_http_cache_table->cache_index_write();
739         }
740     }
741     catch (...) {
742         unlock_cache_interface();
743         DBGN(cerr << "Unlocking interface." << endl);
744         throw;
745     }
746 
747     DBG2(cerr << "Cache....... Total cache size: " << d_total_size
748          << " with " << d_folder_size
749          << " bytes for meta information and folders and at least "
750          << d_gc_buffer << " bytes free after every gc" << endl);
751 
752     unlock_cache_interface();
753 }
754 
755 /** How big is the cache? The value returned is the size in megabytes. */
756 
757 unsigned long
get_max_size() const758 HTTPCache::get_max_size() const
759 {
760     return d_total_size / MEGA;
761 }
762 
763 /** Set the maximum size for a single entry in the cache.
764 
765     Default: 3M
766 
767     This method locks the class' interface.
768 
769     @param size The size in megabytes. */
770 
771 void
set_max_entry_size(unsigned long size)772 HTTPCache::set_max_entry_size(unsigned long size)
773 {
774     lock_cache_interface();
775 
776     try {
777         unsigned long new_size = size * MEGA;
778         if (new_size > 0 && new_size < d_total_size - d_folder_size) {
779             unsigned long old_size = d_max_entry_size;
780             d_max_entry_size = new_size;
781             if (new_size < old_size && startGC()) {
782                 perform_garbage_collection();
783                 d_http_cache_table->cache_index_write();
784             }
785         }
786     }
787     catch (...) {
788         unlock_cache_interface();
789         throw;
790     }
791 
792     DBG2(cerr << "Cache...... Max entry cache size is "
793          << d_max_entry_size << endl);
794 
795     unlock_cache_interface();
796 }
797 
798 /** Get the maximum size of an individual entry in the cache.
799 
800     @return The maximum size in megabytes. */
801 
802 unsigned long
get_max_entry_size() const803 HTTPCache::get_max_entry_size() const
804 {
805     return d_max_entry_size / MEGA;
806 }
807 
808 /** Set the default expiration time. Use the <i>default expiration</i>
809     property to determine when a cached response becomes stale if the
810     response lacks the information necessary to compute a specific value.
811 
812     Default: 24 hours (86,400 seconds)
813 
814     This method locks the class' interface.
815 
816     @param exp_time The time in seconds. */
817 
818 void
set_default_expiration(const int exp_time)819 HTTPCache::set_default_expiration(const int exp_time)
820 {
821     lock_cache_interface();
822 
823     d_default_expiration = exp_time;
824 
825     unlock_cache_interface();
826 }
827 
828 /** Get the default expiration time used by the cache. */
829 
830 int
get_default_expiration() const831 HTTPCache::get_default_expiration() const
832 {
833     return d_default_expiration;
834 }
835 
836 /** Should every cache entry be validated?
837     @param validate True if every cache entry should be validated before
838     being used. */
839 
840 void
set_always_validate(bool validate)841 HTTPCache::set_always_validate(bool validate)
842 {
843     d_always_validate = validate;
844 }
845 
846 /** Should every cache entry be validated before each use?
847     @return True if all cache entries require validation. */
848 
849 bool
get_always_validate() const850 HTTPCache::get_always_validate() const
851 {
852     return d_always_validate;
853 }
854 
855 /** Set the request Cache-Control headers. If a request must be satisfied
856     using HTTP, these headers should be included in request since they might
857     be pertinent to a proxy cache.
858 
859     Ignored headers: no-transform, only-if-cached. These headers are not used
860     by HTTPCache and are not recorded. However, if present in the vector
861     passed to this method, they will be present in the vector returned by
862     get_cache_control.
863 
864     This method locks the class' interface.
865 
866     @param cc A vector of strings, each string holds one Cache-Control
867     header.
868     @exception InternalErr Thrown if one of the strings in \c cc does not
869     start with 'Cache-Control: '. */
870 
871 void
set_cache_control(const vector<string> & cc)872 HTTPCache::set_cache_control(const vector<string> &cc)
873 {
874     lock_cache_interface();
875 
876     try {
877         d_cache_control = cc;
878 
879         vector<string>::const_iterator i;
880         for (i = cc.begin(); i != cc.end(); ++i) {
881             string header = (*i).substr(0, (*i).find(':'));
882             string value = (*i).substr((*i).find(": ") + 2);
883             if (header != "Cache-Control") {
884                 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
885             }
886             else {
887                 if (value == "no-cache" || value == "no-store")
888                     d_cache_enabled = false;
889                 else if (value.find("max-age") != string::npos) {
890                     string max_age = value.substr(value.find("=" + 1));
891                     d_max_age = parse_time(max_age.c_str());
892                 }
893                 else if (value == "max-stale")
894                     d_max_stale = 0; // indicates will take anything;
895                 else if (value.find("max-stale") != string::npos) {
896                     string max_stale = value.substr(value.find("=" + 1));
897                     d_max_stale = parse_time(max_stale.c_str());
898                 }
899                 else if (value.find("min-fresh") != string::npos) {
900                     string min_fresh = value.substr(value.find("=" + 1));
901                     d_min_fresh = parse_time(min_fresh.c_str());
902                 }
903             }
904         }
905     }
906     catch (...) {
907         unlock_cache_interface();
908         throw;
909     }
910 
911     unlock_cache_interface();
912 }
913 
914 
915 /** Get the Cache-Control headers.
916 
917     @return A vector of strings, one string for each header. */
918 
919 vector<string>
get_cache_control()920 HTTPCache::get_cache_control()
921 {
922     return d_cache_control;
923 }
924 
925 //@}
926 
927 /** Look in the cache for the given \c url. Is it in the cache table?
928 
929     This method locks the class' interface.
930 
931 	@todo Remove this is broken.
932     @param url The url to look for.
933     @return True if \c url is found, otherwise False. */
934 
935 bool
is_url_in_cache(const string & url)936 HTTPCache::is_url_in_cache(const string &url)
937 {
938     DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
939 
940     HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
941     bool status = entry != 0;
942     if (entry) {
943         entry->unlock_read_response();
944     }
945     return  status;
946 }
947 
948 /** Is the header a hop by hop header? If so, we're not supposed to store it
949     in the cache. See RFC 2616, Section 13.5.1.
950 
951     @return True if the header is, otherwise False. */
952 
953 bool
is_hop_by_hop_header(const string & header)954 is_hop_by_hop_header(const string &header)
955 {
956     return header.find("Connection") != string::npos
957            || header.find("Keep-Alive") != string::npos
958            || header.find("Proxy-Authenticate") != string::npos
959            || header.find("Proxy-Authorization") != string::npos
960            || header.find("Transfer-Encoding") != string::npos
961            || header.find("Upgrade") != string::npos;
962 }
963 
964 /** Dump the headers out to the meta data file. The file is truncated if it
965     already exists.
966 
967     @todo This code could be replaced with STL/iostream stuff.
968 
969     A private method.
970 
971     @param cachename Base name of file for meta data.
972     @param headers A vector of strings, one header per string.
973     @exception InternalErr Thrown if the file cannot be opened. */
974 
975 void
write_metadata(const string & cachename,const vector<string> & headers)976 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
977 {
978     string fname = cachename + CACHE_META;
979     d_open_files.push_back(fname);
980 
981     FILE *dest = fopen(fname.c_str(), "w");
982     if (!dest) {
983         throw InternalErr(__FILE__, __LINE__,
984                           "Could not open named cache entry file.");
985     }
986 
987     vector<string>::const_iterator i;
988     for (i = headers.begin(); i != headers.end(); ++i) {
989         if (!is_hop_by_hop_header(*i)) {
990             int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
991             if (s != 1) {
992                 fclose(dest);
993             	throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
994             }
995             s = fwrite("\n", 1, 1, dest);
996             if (s != 1) {
997                 fclose(dest);
998             	throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
999             }
1000         }
1001     }
1002 
1003     int res = fclose(dest);
1004     if (res) {
1005         DBG(cerr << "HTTPCache::write_metadata - Failed to close "
1006             << dest << endl);
1007     }
1008 
1009     d_open_files.pop_back();
1010 }
1011 
1012 /** Read headers from a .meta.
1013 
1014     @todo This code could be replaced with STL/iostream code.
1015 
1016     A private method.
1017 
1018     @param cachename The name of the file in the persistent store.
1019     @param headers The headers are returned using this parameter.
1020     @exception InternalErr Thrown if the file cannot be opened. */
1021 
1022 void
read_metadata(const string & cachename,vector<string> & headers)1023 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
1024 {
1025     FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
1026     if (!md) {
1027         throw InternalErr(__FILE__, __LINE__,
1028                           "Could not open named cache entry meta data file.");
1029     }
1030 
1031     char line[1024];
1032     while (!feof(md) && fgets(line, 1024, md)) {
1033         line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
1034         headers.push_back(string(line));
1035     }
1036 
1037     int res = fclose(md);
1038     if (res) {
1039         DBG(cerr << "HTTPCache::read_metadata - Failed to close "
1040             << md << endl);
1041     }
1042 }
1043 
1044 /** Write the body of the HTTP response to the cache.
1045 
1046     This method used to throw ResponseTooBig if any response was larger than
1047     max_entry_size. I've disabled that since perform_garbage_collection will
1048     remove any such entry if it's causing problems. Note that if
1049     parse_headers finds a Content-Length header that indicates a response is
1050     too big, the response won't be cached. The idea here is that once we've
1051     already written a bunch of bytes to the cache, we might as well continue.
1052     If it overflows the cache, perform_garbage_collection() will remove it.
1053 
1054     A private method.
1055 
1056     @param cachename Write data to this file.
1057     @param src Read data from this stream.
1058     @return The total number of bytes written.
1059     @exception InternalErr Thrown if the file cannot be opened or if an I/O
1060     error was detected.
1061     @exception ResponseTooBig Thrown if the response was found to be bigger
1062     than the max_entry_size property. This is not longer thrown. 10/11/02
1063     jhrg */
1064 
1065 int
write_body(const string & cachename,const FILE * src)1066 HTTPCache::write_body(const string &cachename, const FILE *src)
1067 {
1068     d_open_files.push_back(cachename);
1069 
1070     FILE *dest = fopen(cachename.c_str(), "wb");
1071     if (!dest) {
1072         throw InternalErr(__FILE__, __LINE__,
1073                           "Could not open named cache entry file.");
1074     }
1075 
1076     // Read and write in 1k blocks; an attempt at doing this efficiently.
1077     // 09/30/02 jhrg
1078     char line[1024];
1079     size_t n;
1080     int total = 0;
1081     while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1082         total += fwrite(line, 1, n, dest);
1083         DBG2(sleep(3));
1084     }
1085 
1086     if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1087         int res = fclose(dest);
1088         res = res & unlink(cachename.c_str());
1089         if (res) {
1090             DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
1091                 << dest << endl);
1092         }
1093         throw InternalErr(__FILE__, __LINE__,
1094                           "I/O error transferring data to the cache.");
1095     }
1096 
1097     rewind(const_cast<FILE *>(src));
1098 
1099     int res = fclose(dest);
1100     if (res) {
1101         DBG(cerr << "HTTPCache::write_body - Failed to close "
1102             << dest << endl);
1103     }
1104 
1105     d_open_files.pop_back();
1106 
1107     return total;
1108 }
1109 
1110 /** Get a pointer to file that contains the body of a cached response. The
1111     returned FILE* can be used both for reading and for writing.
1112 
1113     A private method.
1114 
1115     @param cachename The name of the file that holds the response body.
1116     @exception InternalErr Thrown if the file cannot be opened. */
1117 
1118 FILE *
open_body(const string & cachename)1119 HTTPCache::open_body(const string &cachename)
1120 {
1121     DBG(cerr << "cachename: " << cachename << endl);
1122 
1123     FILE *src = fopen(cachename.c_str(), "rb"); // Read only
1124     if (!src)
1125 	throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
1126 
1127     return src;
1128 }
1129 
1130 /** Add a new response to the cache, or replace an existing cached response
1131     with new data. This method returns True if the information for \c url was
1132     added to the cache. A response might not be cache-able; in that case this
1133     method returns false. (For example, the response might contain the
1134     'Cache-Control: no-cache' header.)
1135 
1136     Note that the FILE *body is rewound so that the caller can re-read it
1137     without using fseek or rewind.
1138 
1139     If a response for \c url is already present in the cache, it will be
1140     replaced by the new headers and body. To update a response in the cache
1141     with new meta data, use update_response().
1142 
1143     This method locks the class' interface.
1144 
1145     @param url A string which holds the request URL.
1146     @param request_time The time when the request was made, in seconds since
1147     1 Jan 1970.
1148     @param headers A vector of strings which hold the response headers.
1149     @param body A FILE * to a file which holds the response body.
1150     @return True if the response was cached, False if the response could not
1151     be cached.
1152     @exception InternalErr Thrown if there was a I/O error while writing to
1153     the persistent store. */
1154 
1155 bool
cache_response(const string & url,time_t request_time,const vector<string> & headers,const FILE * body)1156 HTTPCache::cache_response(const string &url, time_t request_time,
1157                           const vector<string> &headers, const FILE *body)
1158 {
1159     lock_cache_interface();
1160 
1161     DBG(cerr << "Caching url: " << url << "." << endl);
1162 
1163     try {
1164         // If this is not an http or https URL, don't cache.
1165         if (url.find("http:") == string::npos &&
1166             url.find("https:") == string::npos) {
1167             unlock_cache_interface();
1168             return false;
1169         }
1170 
1171         // This does nothing if url is not already in the cache. It's
1172         // more efficient to do this than to first check and see if the entry
1173         // exists. 10/10/02 jhrg
1174         d_http_cache_table->remove_entry_from_cache_table(url);
1175 
1176         HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url);
1177         entry->lock_write_response();
1178 
1179         try {
1180             d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
1181             if (entry->is_no_cache()) {
1182                 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1183                     << "(" << url << ")" << endl);
1184                 entry->unlock_write_response();
1185                 delete entry; entry = 0;
1186                 unlock_cache_interface();
1187                 return false;
1188             }
1189 
1190             // corrected_initial_age, freshness_lifetime, response_time.
1191             d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1192 
1193             d_http_cache_table->create_location(entry); // cachename, cache_body_fd
1194             // move these write function to cache table
1195             entry->set_size(write_body(entry->get_cachename(), body));
1196             write_metadata(entry->get_cachename(), headers);
1197             d_http_cache_table->add_entry_to_cache_table(entry);
1198             entry->unlock_write_response();
1199         }
1200         catch (ResponseTooBigErr &e) {
1201             // Oops. Bummer. Clean up and exit.
1202             DBG(cerr << e.get_error_message() << endl);
1203             REMOVE(entry->get_cachename().c_str());
1204             REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
1205             DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
1206                 << ")" << endl);
1207             entry->unlock_write_response();
1208             delete entry; entry = 0;
1209             unlock_cache_interface();
1210             return false;
1211         }
1212 
1213         if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1214             if (startGC())
1215                 perform_garbage_collection();
1216 
1217             d_http_cache_table->cache_index_write(); // resets new_entries
1218         }
1219     }
1220     catch (...) {
1221         unlock_cache_interface();
1222         throw;
1223     }
1224 
1225     unlock_cache_interface();
1226 
1227     return true;
1228 }
1229 
1230 /** Build the headers to send along with a GET request to make that request
1231     conditional. This method examines the headers for a given response in the
1232     cache and formulates the correct headers for a valid HTTP 1.1 conditional
1233     GET request. See RFC 2616, Section 13.3.4.
1234 
1235     Rules: If an ETag is present, it must be used. Use If-None-Match. If a
1236     Last-Modified header is present, use it. Use If-Modified-Since. If both
1237     are present, use both (this means that HTTP 1.0 daemons are more likely
1238     to work). If a Last-Modified header is not present, use the value of the
1239     Cache-Control max-age or Expires header(s). Note that a 'Cache-Control:
1240     max-age' header overrides an Expires header (Sec 14.9.3).
1241 
1242     This method locks the cache interface and the cache entry.
1243 
1244     @param url Get the HTTPCacheTable::CacheEntry for this URL.
1245     @return A vector of strings, one request header per string.
1246     @exception Error Thrown if the \e url is not in the cache. */
1247 
1248 vector<string>
get_conditional_request_headers(const string & url)1249 HTTPCache::get_conditional_request_headers(const string &url)
1250 {
1251     lock_cache_interface();
1252 
1253     HTTPCacheTable::CacheEntry *entry = 0;
1254     vector<string> headers;
1255 
1256     DBG(cerr << "Getting conditional request headers for " << url << endl);
1257 
1258     try {
1259         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1260         if (!entry)
1261             throw Error(internal_error, "There is no cache entry for the URL: " + url);
1262 
1263         if (entry->get_etag() != "")
1264             headers.push_back(string("If-None-Match: ") + entry->get_etag());
1265 
1266         if (entry->get_lm() > 0) {
1267         	time_t lm = entry->get_lm();
1268             headers.push_back(string("If-Modified-Since: ")
1269                               + date_time_str(&lm));
1270         }
1271         else if (entry->get_max_age() > 0) {
1272         	time_t max_age = entry->get_max_age();
1273             headers.push_back(string("If-Modified-Since: ")
1274                               + date_time_str(&max_age));
1275         }
1276         else if (entry->get_expires() > 0) {
1277         	time_t expires = entry->get_expires();
1278             headers.push_back(string("If-Modified-Since: ")
1279                               + date_time_str(&expires));
1280         }
1281         entry->unlock_read_response();
1282         unlock_cache_interface();
1283     }
1284     catch (...) {
1285 	unlock_cache_interface();
1286 	if (entry) {
1287 	    entry->unlock_read_response();
1288 	}
1289 	throw;
1290     }
1291 
1292     return headers;
1293 }
1294 
1295 /** Functor/Predicate which orders two MIME headers based on the header name
1296     only (discounting the value). */
1297 
1298 struct HeaderLess: binary_function<const string&, const string&, bool>
1299 {
operator ()libdap::HeaderLess1300     bool operator()(const string &s1, const string &s2) const {
1301         return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1302     }
1303 };
1304 
1305 /** Update the meta data for a response already in the cache. This method
1306     provides a way to merge response headers returned from a conditional GET
1307     request, for the given URL, with those already present.
1308 
1309     This method locks the class' interface and the cache entry.
1310 
1311     @param url Update the meta data for this cache entry.
1312     @param request_time The time (Unix time, seconds since 1 Jan 1970) that
1313     the conditional request was made.
1314     @param headers New headers, one header per string, returned in the
1315     response.
1316     @exception Error Thrown if the \c url is not in the cache. */
1317 
1318 void
update_response(const string & url,time_t request_time,const vector<string> & headers)1319 HTTPCache::update_response(const string &url, time_t request_time,
1320                            const vector<string> &headers)
1321 {
1322     lock_cache_interface();
1323 
1324     HTTPCacheTable::CacheEntry *entry = 0;
1325     DBG(cerr << "Updating the response headers for: " << url << endl);
1326 
1327     try {
1328         entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1329         if (!entry)
1330             throw Error(internal_error, "There is no cache entry for the URL: " + url);
1331 
1332         // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1333         d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1334 
1335         // Update corrected_initial_age, freshness_lifetime, response_time.
1336         d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1337 
1338         // Merge the new headers with those in the persistent store. How:
1339         // Load the new headers into a set, then merge the old headers. Since
1340         // set<> ignores duplicates, old headers with the same name as a new
1341         // header will got into the bit bucket. Define a special compare
1342         // functor to make sure that headers are compared using only their
1343         // name and not their value too.
1344         set<string, HeaderLess> merged_headers;
1345 
1346         // Load in the new headers
1347         copy(headers.begin(), headers.end(),
1348              inserter(merged_headers, merged_headers.begin()));
1349 
1350         // Get the old headers and load them in.
1351         vector<string> old_headers;
1352         read_metadata(entry->get_cachename(), old_headers);
1353         copy(old_headers.begin(), old_headers.end(),
1354              inserter(merged_headers, merged_headers.begin()));
1355 
1356         // Read the values back out. Use reverse iterators with back_inserter
1357         // to preserve header order. NB: vector<> does not support push_front
1358         // so we can't use front_inserter(). 01/09/03 jhrg
1359         vector<string> result;
1360         copy(merged_headers.rbegin(), merged_headers.rend(),
1361              back_inserter(result));
1362 
1363         write_metadata(entry->get_cachename(), result);
1364         entry->unlock_write_response();
1365         unlock_cache_interface();
1366     }
1367     catch (...) {
1368         if (entry) {
1369             entry->unlock_read_response();
1370         }
1371         unlock_cache_interface();
1372         throw;
1373     }
1374 }
1375 
1376 /** Look in the cache and return the status (validity) of the cached
1377     response. This method should be used to determine if a cached response
1378     requires validation.
1379 
1380     This method locks the class' interface and the cache entry.
1381 
1382     @param url Find the cached response associated with this URL.
1383     @return True indicates that the response can be used, False indicates
1384     that it must first be validated.
1385     @exception Error Thrown if the URL's response is not in the cache. */
1386 
1387 bool
is_url_valid(const string & url)1388 HTTPCache::is_url_valid(const string &url)
1389 {
1390     lock_cache_interface();
1391 
1392     bool freshness;
1393     HTTPCacheTable::CacheEntry *entry = 0;
1394 
1395     DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
1396 
1397     try {
1398         if (d_always_validate) {
1399             unlock_cache_interface();
1400             return false;  // force re-validation.
1401         }
1402 
1403         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1404         if (!entry)
1405             throw Error(internal_error, "There is no cache entry for the URL: " + url);
1406 
1407         // If we supported range requests, we'd need code here to check if
1408         // there was only a partial response in the cache. 10/02/02 jhrg
1409 
1410         // In case this entry is of type "must-revalidate" then we consider it
1411         // invalid.
1412         if (entry->get_must_revalidate()) {
1413             entry->unlock_read_response();
1414             unlock_cache_interface();
1415             return false;
1416         }
1417 
1418         time_t resident_time = time(NULL) - entry->get_response_time();
1419         time_t current_age = entry->get_corrected_initial_age() + resident_time;
1420 
1421         // Check that the max-age, max-stale, and min-fresh directives
1422         // given in the request cache control header is followed.
1423         if (d_max_age >= 0 && current_age > d_max_age) {
1424             DBG(cerr << "Cache....... Max-age validation" << endl);
1425             entry->unlock_read_response();
1426             unlock_cache_interface();
1427             return false;
1428         }
1429         if (d_min_fresh >= 0
1430             && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1431             DBG(cerr << "Cache....... Min-fresh validation" << endl);
1432             entry->unlock_read_response();
1433             unlock_cache_interface();
1434             return false;
1435         }
1436 
1437         freshness = (entry->get_freshness_lifetime()
1438                      + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1439         entry->unlock_read_response();
1440         unlock_cache_interface();
1441     }
1442     catch (...) {
1443     	if (entry) {
1444     	    entry->unlock_read_response();
1445     	}
1446     	unlock_cache_interface();
1447         throw;
1448     }
1449 
1450     return freshness;
1451 }
1452 
1453 /** Get information from the cache. For a given URL, get the headers, cache
1454     object name and body
1455     stored in the cache. Note that this method increments the hit counter for
1456     <code>url</code>'s entry and \e locks that entry. To release the lock,
1457     the method release_cached_response() \e must be called. Methods that
1458     block on a locked entry are: get_conditional_request_headers(),
1459     update_response() and is_url_valid(). In addition, purge_cache() throws
1460     Error if it's called and any entries are locked. The garbage collection
1461     system will not reclaim locked entries (but works fine when some entries
1462     are locked).
1463 
1464     This method locks the class' interface.
1465 
1466     This method does \e not check to see that the response is valid, just
1467     that it is in the cache. To see if a cached response is valid, use
1468     is_url_valid(). The FILE* returned can be used for both reading and
1469     writing. The latter allows a client to update the body of a cached
1470     response without having to first dump it all to a separate file and then
1471     copy it into the cache (using cache_response()).
1472 
1473     @param url Get response information for this URL.
1474     @param headers Return the response headers in this parameter
1475     @param cacheName A value-result parameter; the name of the cache file
1476     @return A FILE * to the response body.
1477     @exception Error Thrown if the URL's response is not in the cache.
1478     @exception InternalErr Thrown if the persistent store cannot be opened. */
1479 
get_cached_response(const string & url,vector<string> & headers,string & cacheName)1480 FILE * HTTPCache::get_cached_response(const string &url,
1481 		vector<string> &headers, string &cacheName) {
1482     lock_cache_interface();
1483 
1484     FILE *body = 0;
1485     HTTPCacheTable::CacheEntry *entry = 0;
1486 
1487     DBG(cerr << "Getting the cached response for " << url << endl);
1488 
1489     try {
1490         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1491         if (!entry) {
1492         	unlock_cache_interface();
1493         	return 0;
1494         }
1495 
1496         cacheName = entry->get_cachename();
1497         read_metadata(entry->get_cachename(), headers);
1498 
1499         DBG(cerr << "Headers just read from cache: " << endl);
1500         DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1501 
1502         body = open_body(entry->get_cachename());
1503 
1504         DBG(cerr << "Returning: " << url << " from the cache." << endl);
1505 
1506         d_http_cache_table->bind_entry_to_data(entry, body);
1507     }
1508     catch (...) {
1509     	// Why make this unlock operation conditional on entry?
1510         if (entry)
1511         	unlock_cache_interface();
1512         if (body != 0)
1513             fclose(body);
1514         throw;
1515     }
1516 
1517     unlock_cache_interface();
1518 
1519     return body;
1520 }
1521 
1522 /** Get information from the cache. This is a convenience method that calls
1523  	the three parameter version of get_cache_response().
1524 
1525     This method locks the class' interface.
1526 
1527     @param url Get response information for this URL.
1528     @param headers Return the response headers in this parameter
1529     @return A FILE * to the response body.
1530     @exception Error Thrown if the URL's response is not in the cache.
1531     @exception InternalErr Thrown if the persistent store cannot be opened. */
1532 
1533 FILE *
get_cached_response(const string & url,vector<string> & headers)1534 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
1535 {
1536 	string discard_name;
1537 	return get_cached_response(url, headers, discard_name);
1538 }
1539 
1540 /** Get a pointer to a cached response body. This is a convenience method that
1541  	calls the three parameter version of get_cache_response().
1542 
1543     This method locks the class' interface.
1544 
1545     @param url Find the body associated with this URL.
1546     @return A FILE* that points to the response body.
1547     @exception Error Thrown if the URL is not in the cache.
1548     @exception InternalErr Thrown if an I/O error is detected. */
1549 
1550 FILE *
get_cached_response(const string & url)1551 HTTPCache::get_cached_response(const string &url)
1552 {
1553 	string discard_name;
1554 	vector<string> discard_headers;
1555 	return get_cached_response(url, discard_headers, discard_name);
1556 }
1557 
1558 /** Call this method to inform the cache that a particular response is no
1559     longer in use. When a response is accessed using get_cached_response(), it
1560     is locked so that updates and removal (e.g., by the garbage collector)
1561     are not possible. Calling this method frees that lock.
1562 
1563     This method locks the class' interface.
1564 
1565     @param body Release the lock on the response information associated with
1566     this FILE *.
1567     @exception Error Thrown if \c body does not belong to an entry in the
1568     cache or if the entry was already released. */
1569 
1570 void
release_cached_response(FILE * body)1571 HTTPCache::release_cached_response(FILE *body)
1572 {
1573     lock_cache_interface();
1574 
1575     try {
1576     	// fclose(body); This results in a seg fault on linux jhrg 8/27/13
1577     	d_http_cache_table->uncouple_entry_from_data(body);
1578     }
1579     catch (...) {
1580         unlock_cache_interface();
1581         throw;
1582     }
1583 
1584     unlock_cache_interface();
1585 }
1586 
1587 /** Purge both the in-memory cache table and the contents of the cache on
1588     disk. This method deletes every entry in the persistent store but leaves
1589     the structure intact. The client of HTTPCache is responsible for making
1590     sure that all threads have released any responses they pulled from the
1591     cache. If this method is called when a response is still in use, it will
1592     throw an Error object and not purge the cache.
1593 
1594     This method locks the class' interface.
1595 
1596     @exception Error Thrown if an attempt is made to purge the cache when
1597     an entry is still in use. */
1598 
1599 void
purge_cache()1600 HTTPCache::purge_cache()
1601 {
1602     lock_cache_interface();
1603 
1604     try {
1605         if (d_http_cache_table->is_locked_read_responses())
1606             throw Error(internal_error, "Attempt to purge the cache with entries in use.");
1607 
1608         d_http_cache_table->delete_all_entries();
1609     }
1610     catch (...) {
1611         unlock_cache_interface();
1612         throw;
1613     }
1614 
1615     unlock_cache_interface();
1616 }
1617 
1618 } // namespace libdap
1619