1
2 // -*- mode: c++; c-basic-offset:4 -*-
3
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26 #include "config.h"
27
28 // #define DODS_DEBUG
29 // #define DODS_DEBUG2
30 #undef USE_GETENV
31
32 #include <pthread.h>
33 #include <limits.h>
34 #include <unistd.h> // for stat
35 #include <sys/types.h> // for stat and mkdir
36 #include <sys/stat.h>
37
38 #include <cstring>
39 #include <cerrno>
40
41 #include <iostream>
42 #include <sstream>
43 #include <algorithm>
44 #include <iterator>
45 #include <set>
46
47 #include "Error.h"
48 #include "InternalErr.h"
49 #include "ResponseTooBigErr.h"
50 #ifndef WIN32
51 #include "SignalHandler.h"
52 #endif
53 #include "HTTPCacheInterruptHandler.h"
54 #include "HTTPCacheTable.h"
55 #include "HTTPCache.h"
56 #include "HTTPCacheMacros.h"
57 #include "SignalHandlerRegisteredErr.h"
58
59 #include "util_mit.h"
60 #include "debug.h"
61
62 using namespace std;
63
64 namespace libdap {
65
66 HTTPCache *HTTPCache::_instance = 0;
67
68 // instance_mutex is used to ensure that only one instance is created.
69 // That is, it protects the body of the HTTPCache::instance() method. This
70 // mutex is initialized from within the static function once_init_routine()
71 // and the call to that takes place using pthread_once_init() where the mutex
72 // once_block is used to protect that call. All of this ensures that no matter
73 // how many threads call the instance() method, only one instance is ever
74 // made.
75 static pthread_mutex_t instance_mutex;
76 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
77
78
79 #define NO_LM_EXPIRATION 24*3600 // 24 hours
80
81 #define DUMP_FREQUENCY 10 // Dump index every x loads
82
83 #define MEGA 0x100000L
84 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
85 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
86 #define CACHE_GC_PCT 10 // 10% of cache size free after GC
87 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
88 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
89
90 static void
once_init_routine()91 once_init_routine()
92 {
93 int status;
94 status = INIT(&instance_mutex);
95
96 if (status != 0)
97 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
98 }
99
100 /** Get a pointer to the HTTP 1.1 compliant cache. If not already
101 instantiated, this creates an instance of the HTTP cache object and
102 initializes it to use \c cache_root as the location of the persistent
103 store. If there's an index (\c .index) file in that directory, it is read
104 as part of the initialization. If the cache has already been initialized,
105 this method returns a pointer to that instance. Note HTTPCache uses the
106 singleton pattern; A process may have only one instance of this object.
107 Also note that HTTPCache is MT-safe. However, if the \c force parameter
108 is set to true, it may be possible for two or more processes to access
109 the persistent store at the same time resulting in undefined behavior.
110
111 Default values: is_cache_enabled(): true, is_cache_protected(): false,
112 is_expire_ignored(): false, the total size of the cache is 20M, 2M of that
113 is reserved for response headers, during GC the cache is reduced to at
114 least 18M (total size - 10% of the total size), and the max size for an
115 individual entry is 3M. It is possible to change the size of the cache,
116 but not to make it smaller than 5M. If expiration information is not sent
117 with a response, it is assumed to expire in 24 hours.
118
119 @param cache_root The fully qualified pathname of the directory which
120 will hold the cache data (i.e., the persistent store).
121 @param force Force access to the persistent store if true. By default
122 false. Use this only if you're sure no one else is using the same cache
123 root! This is included so that programs may use a cache that was
124 left in an inconsistent state.
125 @return A pointer to the HTTPCache object.
126 @exception Error thrown if the cache root cannot set. */
127
128 HTTPCache *
instance(const string & cache_root,bool force)129 HTTPCache::instance(const string &cache_root, bool force)
130 {
131 int status = pthread_once(&once_block, once_init_routine);
132 if (status != 0)
133 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
134
135 LOCK(&instance_mutex);
136
137 DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" << "... ");
138
139 try {
140 if (!_instance) {
141 _instance = new HTTPCache(cache_root, force);
142
143 DBG(cerr << "New instance: " << _instance << ", cache root: "
144 << _instance->d_cache_root << endl);
145
146 atexit(delete_instance);
147
148 #ifndef WIN32
149 // Register the interrupt handler. If we've already registered
150 // one, barf. If this becomes a problem, hack SignalHandler so
151 // that we can chain these handlers... 02/10/04 jhrg
152 //
153 // Technically we're leaking memory here. However, since this
154 // class is a singleton, we know that only three objects will
155 // ever be created and they will all exist until the process
156 // exits. We can let this slide... 02/12/04 jhrg
157 EventHandler *old_eh = SignalHandler::instance()->register_handler(SIGINT, new HTTPCacheInterruptHandler, true);
158 if (old_eh) {
159 SignalHandler::instance()->register_handler(SIGINT, old_eh);
160 throw SignalHandlerRegisteredErr(
161 "Could not register event handler for SIGINT without superseding an existing one.");
162 }
163
164 old_eh = SignalHandler::instance()->register_handler(SIGPIPE, new HTTPCacheInterruptHandler, true);
165 if (old_eh) {
166 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
167 throw SignalHandlerRegisteredErr(
168 "Could not register event handler for SIGPIPE without superseding an existing one.");
169 }
170
171 old_eh = SignalHandler::instance()->register_handler(SIGTERM, new HTTPCacheInterruptHandler, true);
172 if (old_eh) {
173 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
174 throw SignalHandlerRegisteredErr(
175 "Could not register event handler for SIGTERM without superseding an existing one.");
176 }
177 #endif
178 }
179 }
180 catch (...) {
181 DBG2(cerr << "The constructor threw an Error!" << endl);
182 UNLOCK(&instance_mutex);
183 throw;
184 }
185
186 UNLOCK(&instance_mutex);
187 DBGN(cerr << "returning " << hex << _instance << dec << endl);
188
189 return _instance;
190 }
191
192 /** This static method is called using atexit(). It deletes the singleton;
193 see ~HTTPCache for all that implies. */
194
195 void
delete_instance()196 HTTPCache::delete_instance()
197 {
198 DBG(cerr << "Entering delete_instance()..." << endl);
199
200 if (HTTPCache::_instance) {
201 DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
202 delete HTTPCache::_instance;
203 HTTPCache::_instance = 0;
204
205 //Now remove the signal handlers
206 delete SignalHandler::instance()->remove_handler(SIGINT);
207 delete SignalHandler::instance()->remove_handler(SIGPIPE);
208 delete SignalHandler::instance()->remove_handler(SIGTERM);
209 }
210
211 DBG(cerr << "Exiting delete_instance()" << endl);
212 }
213
214 /** Create an instance of the HTTP 1.1 compliant cache. This initializes the
215 both the cache root and the path to the index file. It then reads the
216 cache index file if one is present.
217
218 A private method.
219
220 @note This assumes that the cache directory structure should be created!
221 @param cache_root The fully qualified pathname of the directory which
222 will hold the cache data.
223 @param force Force access to the persistent store!
224 @exception Error Thrown if the single user/process lock for the
225 persistent store cannot be obtained.
226 @see cache_index_read */
227
HTTPCache(string cache_root,bool force)228 HTTPCache::HTTPCache(string cache_root, bool force) :
229 d_locked_open_file(0),
230 d_cache_enabled(false),
231 d_cache_protected(false),
232
233 d_cache_disconnected(DISCONNECT_NONE),
234
235 d_expire_ignored(false),
236 d_always_validate(false),
237 d_total_size(CACHE_TOTAL_SIZE * MEGA),
238 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
239 d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
240 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
241 d_default_expiration(NO_LM_EXPIRATION),
242 d_max_age(-1),
243 d_max_stale(-1),
244 d_min_fresh(-1),
245 d_http_cache_table(0)
246 {
247 DBG(cerr << "Entering the constructor for " << this << "... ");
248 #if 0
249 int status = pthread_once(&once_block, once_init_routine);
250 if (status != 0)
251 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
252 #endif
253 INIT(&d_cache_mutex);
254
255 // This used to throw an Error object if we could not get the
256 // single user lock. However, that results in an invalid object. It's
257 // better to have an instance that has default values. If we cannot get
258 // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
259 //
260 // I fixed this block so that the cache root is set before we try to get
261 // the single user lock. That was the fix for bug #661. To make that
262 // work, I had to move the call to create_cache_root out of
263 // set_cache_root(). 09/08/03 jhrg
264
265 set_cache_root(cache_root);
266 int block_size;
267
268 if (!get_single_user_lock(force))
269 throw Error(internal_error, "Could not get single user lock for the cache");
270
271 #ifdef WIN32
272 // Windows is unable to provide us this information. 4096 appears
273 // a best guess. It is likely to be in the range [2048, 8192] on
274 // windows, but will the level of truth of that statement vary over
275 // time ?
276 block_size = 4096;
277 #else
278 struct stat s;
279 if (stat(cache_root.c_str(), &s) == 0)
280 block_size = s.st_blksize;
281 else
282 throw Error(internal_error, "Could not set file system block size.");
283 #endif
284 d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
285 d_cache_enabled = true;
286
287 DBGN(cerr << "exiting" << endl);
288 }
289
290 /** Destroy an instance of HTTPCache. This writes the cache index and frees
291 the in-memory cache table structure. The persistent cache (the response
292 headers and bodies and the index file) are not removed. To remove those,
293 either erase the directory that contains the cache using a file system
294 command or use the purge_cache() method (which leaves the cache directory
295 structure in place but removes all the cached information).
296
297 This class uses the singleton pattern. Clients should \e never call this
298 method. The HTTPCache::instance() method arranges to call the
299 HTTPCache::delete_instance() using \c atexit(). If delete is called more
300 than once, the result will likely be an index file that is corrupt. */
301
~HTTPCache()302 HTTPCache::~HTTPCache()
303 {
304 DBG(cerr << "Entering the destructor for " << this << "... ");
305
306 try {
307 if (startGC())
308 perform_garbage_collection();
309
310 d_http_cache_table->cache_index_write();
311 }
312 catch (Error &e) {
313 // If the cache index cannot be written, we've got problems. However,
314 // unless we're debugging, still free up the cache table in memory.
315 // How should we let users know they cache index is not being
316 // written?? 10/03/02 jhrg
317 DBG(cerr << e.get_error_message() << endl);
318 }
319
320 delete d_http_cache_table;
321
322 release_single_user_lock();
323
324 DBGN(cerr << "exiting destructor." << endl);
325 DESTROY(&d_cache_mutex);
326 }
327
328
329 /** @name Garbage collection
330 These private methods manage the garbage collection tasks for the cache. */
331 //@{
332
333 /** Enough removed from cache? A private method.
334 @return True if enough has been removed from the cache. */
335
336 bool
stopGC() const337 HTTPCache::stopGC() const
338 {
339 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
340 }
341
342 /** Is there too much in the cache. A private method.
343
344 @todo Modify this method so that it does not count locked entries. See
345 the note for hits_gc().
346 @return True if garbage collection should be performed. */
347
348 bool
startGC() const349 HTTPCache::startGC() const
350 {
351 DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
352 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
353 }
354
355 /** Perform garbage collection on the cache. First, all expired responses are
356 removed. Then, if the size of the cache is still too large, the cache is
357 scanned for responses larger than the max_entry_size property. At the
358 same time, responses are removed based on the number of cache hits. This
359 process continues until the size of the cache has been reduced to 90% of
360 the max_size property value. Once the garbage collection is complete,
361 update the index file. Note that locked entries are not removed!
362
363 A private method.
364
365 @see stopGC
366 @see expired_gc
367 @see hits_gc */
368
369 void
perform_garbage_collection()370 HTTPCache::perform_garbage_collection()
371 {
372 DBG(cerr << "Performing garbage collection" << endl);
373
374 // Remove all the expired responses.
375 expired_gc();
376
377 // Remove entries larger than max_entry_size.
378 too_big_gc();
379
380 // Remove entries starting with zero hits, 1, ..., until stopGC()
381 // returns true.
382 hits_gc();
383 }
384
385 /** Scan the current cache table and remove anything that has expired. Don't
386 remove locked entries.
387
388 A private method. */
389
390 void
expired_gc()391 HTTPCache::expired_gc()
392 {
393 if (!d_expire_ignored) {
394 d_http_cache_table->delete_expired_entries();
395 }
396 }
397
398 /** Scan the cache for entires that are larger than max_entry_size. Also
399 start removing entires with low hit counts. Start looking for entries
400 with zero hits, then one, and so on. Stop when the method stopGC returns
401 true. Locked entries are never removed.
402
403 @note Potential infinite loop. What if more than 80% of the cache holds
404 entries that are locked? One solution is to modify startGC() so that it
405 does not count locked entries.
406
407 @todo Change this method to that it looks at the oldest entries first,
408 using the CacheEntry::date to determine entry age. Using the current
409 algorithm it's possible to remove the latest entry which is probably not
410 what we want.
411
412 A private method. */
413
414 void
hits_gc()415 HTTPCache::hits_gc()
416 {
417 int hits = 0;
418
419 if (startGC()) {
420 while (!stopGC()) {
421 d_http_cache_table->delete_by_hits(hits);
422 hits++;
423 }
424 }
425 }
426
427 /** Scan the current cache table and remove anything that has is too big.
428 Don't remove locked entries.
429
430 A private method. */
too_big_gc()431 void HTTPCache::too_big_gc() {
432 if (startGC())
433 d_http_cache_table->delete_by_size(d_max_entry_size);
434 }
435
436 //@} End of the garbage collection methods.
437
438 /** Lock the persistent store part of the cache. Return true if the cache lock
439 was acquired, false otherwise. This is a single user cache, so it
440 requires locking at the process level.
441
442 A private method.
443
444 @param force If True force access to the persistent store. False by
445 default.
446 @return True if the cache was locked for our use, False otherwise. */
447
get_single_user_lock(bool force)448 bool HTTPCache::get_single_user_lock(bool force)
449 {
450 if (!d_locked_open_file) {
451 FILE * fp = NULL;
452
453 try {
454 // It's OK to call create_cache_root if the directory already
455 // exists.
456 create_cache_root(d_cache_root);
457 }
458 catch (Error &e) {
459 // We need to catch and return false because this method is
460 // called from a ctor and throwing at this point will result in a
461 // partially constructed object. 01/22/04 jhrg
462 DBG(cerr << "Failure to create the cache root" << endl);
463 return false;
464 }
465
466 // Try to read the lock file. If we can open for reading, it exists.
467 string lock = d_cache_root + CACHE_LOCK;
468 if ((fp = fopen(lock.c_str(), "r")) != NULL) {
469 int res = fclose(fp);
470 if (res) {
471 DBG(cerr << "Failed to close " << (void *)fp << endl);
472 }
473 if (force)
474 REMOVE(lock.c_str());
475 else
476 return false;
477 }
478
479 if ((fp = fopen(lock.c_str(), "w")) == NULL) {
480 DBG(cerr << "Could not open for write access" << endl);
481 return false;
482 }
483
484 d_locked_open_file = fp;
485 return true;
486 }
487
488 DBG(cerr << "locked_open_file is true" << endl);
489 return false;
490 }
491
492 /** Release the single user (process) lock. A private method. */
493
494 void
release_single_user_lock()495 HTTPCache::release_single_user_lock()
496 {
497 if (d_locked_open_file) {
498 int res = fclose(d_locked_open_file);
499 if (res) {
500 DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
501 }
502 d_locked_open_file = 0;
503 }
504
505 string lock = d_cache_root + CACHE_LOCK;
506 REMOVE(lock.c_str());
507 }
508
509 /** @name Accessors and Mutators for various properties. */
510 //@{
511
512 /** Get the current cache root directory.
513 @return A string that contains the cache root directory. */
514
515 string
get_cache_root() const516 HTTPCache::get_cache_root() const
517 {
518 return d_cache_root;
519 }
520
521
522 /** Create the cache's root directory. This is the persistent store used by
523 the cache. Paths must always end in DIR_SEPARATOR_CHAR.
524
525 A private method.
526
527 @param cache_root The pathname to the desired cache root directory.
528 @exception Error Thrown if the given pathname cannot be created. */
529
530 void
create_cache_root(const string & cache_root)531 HTTPCache::create_cache_root(const string &cache_root)
532 {
533 #ifdef WIN32
534 string::size_type cur = cache_root[1] == ':' ? 3 : 1;
535 typedef int mode_t;
536
537 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
538 string dir = cache_root.substr(0, cur);
539 struct stat stat_info;
540 if (stat(dir.c_str(), &stat_info) == -1) {
541 DBG2(cerr << "Cache....... Creating " << dir << endl);
542 mode_t mask = UMASK(0);
543 if (MKDIR(dir.c_str(), 0777) < 0) {
544 DBG2(cerr << "Error: can't create." << endl);
545 UMASK(mask);
546 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
547 }
548 UMASK(mask);
549 }
550 else {
551 DBG2(cerr << "Cache....... Found " << dir << endl);
552 }
553 cur++;
554 }
555 #else
556 // OSX and Linux
557
558 // Save the mask
559 mode_t mask = umask(0);
560
561 // Ignore the error if the directory exists
562 errno = 0;
563 if (mkdir(cache_root.c_str(), 0777) < 0 && errno != EEXIST) {
564 umask(mask);
565 throw Error("Could not create the directory for the cache at '" + cache_root + "' (" + strerror(errno) + ").");
566 }
567
568 // Restore themask
569 umask(mask);
570
571 #endif
572 }
573
574 /** Set the cache's root directory to the given path. If no path is given,
575 look at the DODS_CACHE, TMP and TEMP environment variables (in that
576 order) to guess at a good location. If those are all NULL, use \c /tmp.
577 If the cache root directory cannot be created, throw an exception.
578
579 Note that in most cases callers should look for this path in the user's
580 .dodsrc file.
581
582 A private method.
583
584 @see RCReader
585 @param root Set the cache root to this pathname. Defaults to "".
586 @exception Error Thrown if the path can neither be deduced nor created. */
587
588 void
set_cache_root(const string & root)589 HTTPCache::set_cache_root(const string &root)
590 {
591 if (root != "") {
592 d_cache_root = root;
593 // cache root should end in /.
594 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
595 d_cache_root += DIR_SEPARATOR_CHAR;
596 }
597 else {
598 // If no cache root has been indicated then look for a suitable
599 // location.
600 #ifdef USE_GETENV
601 char * cr = (char *) getenv("DODS_CACHE");
602 if (!cr) cr = (char *) getenv("TMP");
603 if (!cr) cr = (char *) getenv("TEMP");
604 if (!cr) cr = (char*)CACHE_LOCATION;
605 d_cache_root = cr;
606 #else
607 d_cache_root = CACHE_LOCATION;
608 #endif
609
610 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
611 d_cache_root += DIR_SEPARATOR_CHAR;
612
613 d_cache_root += CACHE_ROOT;
614 }
615
616 // Test d_hhtp_cache_table because this method can be called before that
617 // instance is created and also can be called later to change the cache
618 // root. jhrg 05.14.08
619 if (d_http_cache_table)
620 d_http_cache_table->set_cache_root(d_cache_root);
621 }
622
623 /** Enable or disable the cache. The cache can be temporarily suspended using
624 the enable/disable property. This does not prevent the cache from being
625 enabled/disable at a later point in time.
626
627 Default: yes
628
629 This method locks the class' interface.
630
631 @param mode True if the cache should be enabled, False if it should be
632 disabled. */
633
634 void
set_cache_enabled(bool mode)635 HTTPCache::set_cache_enabled(bool mode)
636 {
637 lock_cache_interface();
638
639 d_cache_enabled = mode;
640
641 unlock_cache_interface();
642 }
643
644 /** Is the cache currently enabled? */
645
646 bool
is_cache_enabled() const647 HTTPCache::is_cache_enabled() const
648 {
649 DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
650 << endl);
651 return d_cache_enabled;
652 }
653
654 /** Set the cache's disconnected property. The cache can operate either
655 disconnected from the network or using a proxy cache (but tell that proxy
656 not to use the network).
657
658 This method locks the class' interface.
659
660 @param mode One of DISCONNECT_NONE, DISCONNECT_NORMAL or
661 DISCONNECT_EXTERNAL.
662 @see CacheDIsconnectedMode */
663 void
set_cache_disconnected(CacheDisconnectedMode mode)664 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
665 {
666 lock_cache_interface();
667
668 d_cache_disconnected = mode;
669
670 unlock_cache_interface();
671 }
672
673 /** Get the cache's disconnected mode property. */
674
675 CacheDisconnectedMode
get_cache_disconnected() const676 HTTPCache::get_cache_disconnected() const
677 {
678 return d_cache_disconnected;
679 }
680
681 /** How should the cache handle the Expires header?
682 Default: no
683
684 This method locks the class' interface.
685
686 @param mode True if a responses Expires header should be ignored, False
687 otherwise. */
688
689 void
set_expire_ignored(bool mode)690 HTTPCache::set_expire_ignored(bool mode)
691 {
692 lock_cache_interface();
693
694 d_expire_ignored = mode;
695
696 unlock_cache_interface();
697 }
698
699 /* Is the cache ignoring Expires headers returned with responses that have
700 been cached? */
701
702 bool
is_expire_ignored() const703 HTTPCache::is_expire_ignored() const
704 {
705 return d_expire_ignored;
706 }
707
708 /** Cache size management. The default cache size is 20M. The minimum size is
709 5M in order not to get into weird problems while writing the cache. The
710 size is indicated in Mega bytes. Note that reducing the size of the cache
711 may trigger a garbage collection operation.
712
713 @note The maximum cache size is UINT_MAX bytes (usually 4294967295 for
714 32-bit computers). If \e size is larger the value will be truncated to
715 the value of that constant. It seems pretty unlikely that will happen
716 given that the parameter is an unsigned long. This is a fix for bug 689
717 which was reported when the parameter type was signed.
718
719 This method locks the class' interface.
720
721 @param size The maximum size of the cache in megabytes. */
722
723 void
set_max_size(unsigned long size)724 HTTPCache::set_max_size(unsigned long size)
725 {
726 lock_cache_interface();
727
728 try {
729 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
730 MIN_CACHE_TOTAL_SIZE * MEGA : size * MEGA;
731 unsigned long old_size = d_total_size;
732 d_total_size = new_size;
733 d_folder_size = d_total_size / CACHE_FOLDER_PCT;
734 d_gc_buffer = d_total_size / CACHE_GC_PCT;
735
736 if (new_size < old_size && startGC()) {
737 perform_garbage_collection();
738 d_http_cache_table->cache_index_write();
739 }
740 }
741 catch (...) {
742 unlock_cache_interface();
743 DBGN(cerr << "Unlocking interface." << endl);
744 throw;
745 }
746
747 DBG2(cerr << "Cache....... Total cache size: " << d_total_size
748 << " with " << d_folder_size
749 << " bytes for meta information and folders and at least "
750 << d_gc_buffer << " bytes free after every gc" << endl);
751
752 unlock_cache_interface();
753 }
754
755 /** How big is the cache? The value returned is the size in megabytes. */
756
757 unsigned long
get_max_size() const758 HTTPCache::get_max_size() const
759 {
760 return d_total_size / MEGA;
761 }
762
763 /** Set the maximum size for a single entry in the cache.
764
765 Default: 3M
766
767 This method locks the class' interface.
768
769 @param size The size in megabytes. */
770
771 void
set_max_entry_size(unsigned long size)772 HTTPCache::set_max_entry_size(unsigned long size)
773 {
774 lock_cache_interface();
775
776 try {
777 unsigned long new_size = size * MEGA;
778 if (new_size > 0 && new_size < d_total_size - d_folder_size) {
779 unsigned long old_size = d_max_entry_size;
780 d_max_entry_size = new_size;
781 if (new_size < old_size && startGC()) {
782 perform_garbage_collection();
783 d_http_cache_table->cache_index_write();
784 }
785 }
786 }
787 catch (...) {
788 unlock_cache_interface();
789 throw;
790 }
791
792 DBG2(cerr << "Cache...... Max entry cache size is "
793 << d_max_entry_size << endl);
794
795 unlock_cache_interface();
796 }
797
798 /** Get the maximum size of an individual entry in the cache.
799
800 @return The maximum size in megabytes. */
801
802 unsigned long
get_max_entry_size() const803 HTTPCache::get_max_entry_size() const
804 {
805 return d_max_entry_size / MEGA;
806 }
807
808 /** Set the default expiration time. Use the <i>default expiration</i>
809 property to determine when a cached response becomes stale if the
810 response lacks the information necessary to compute a specific value.
811
812 Default: 24 hours (86,400 seconds)
813
814 This method locks the class' interface.
815
816 @param exp_time The time in seconds. */
817
818 void
set_default_expiration(const int exp_time)819 HTTPCache::set_default_expiration(const int exp_time)
820 {
821 lock_cache_interface();
822
823 d_default_expiration = exp_time;
824
825 unlock_cache_interface();
826 }
827
828 /** Get the default expiration time used by the cache. */
829
830 int
get_default_expiration() const831 HTTPCache::get_default_expiration() const
832 {
833 return d_default_expiration;
834 }
835
836 /** Should every cache entry be validated?
837 @param validate True if every cache entry should be validated before
838 being used. */
839
840 void
set_always_validate(bool validate)841 HTTPCache::set_always_validate(bool validate)
842 {
843 d_always_validate = validate;
844 }
845
846 /** Should every cache entry be validated before each use?
847 @return True if all cache entries require validation. */
848
849 bool
get_always_validate() const850 HTTPCache::get_always_validate() const
851 {
852 return d_always_validate;
853 }
854
855 /** Set the request Cache-Control headers. If a request must be satisfied
856 using HTTP, these headers should be included in request since they might
857 be pertinent to a proxy cache.
858
859 Ignored headers: no-transform, only-if-cached. These headers are not used
860 by HTTPCache and are not recorded. However, if present in the vector
861 passed to this method, they will be present in the vector returned by
862 get_cache_control.
863
864 This method locks the class' interface.
865
866 @param cc A vector of strings, each string holds one Cache-Control
867 header.
868 @exception InternalErr Thrown if one of the strings in \c cc does not
869 start with 'Cache-Control: '. */
870
871 void
set_cache_control(const vector<string> & cc)872 HTTPCache::set_cache_control(const vector<string> &cc)
873 {
874 lock_cache_interface();
875
876 try {
877 d_cache_control = cc;
878
879 vector<string>::const_iterator i;
880 for (i = cc.begin(); i != cc.end(); ++i) {
881 string header = (*i).substr(0, (*i).find(':'));
882 string value = (*i).substr((*i).find(": ") + 2);
883 if (header != "Cache-Control") {
884 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
885 }
886 else {
887 if (value == "no-cache" || value == "no-store")
888 d_cache_enabled = false;
889 else if (value.find("max-age") != string::npos) {
890 string max_age = value.substr(value.find("=" + 1));
891 d_max_age = parse_time(max_age.c_str());
892 }
893 else if (value == "max-stale")
894 d_max_stale = 0; // indicates will take anything;
895 else if (value.find("max-stale") != string::npos) {
896 string max_stale = value.substr(value.find("=" + 1));
897 d_max_stale = parse_time(max_stale.c_str());
898 }
899 else if (value.find("min-fresh") != string::npos) {
900 string min_fresh = value.substr(value.find("=" + 1));
901 d_min_fresh = parse_time(min_fresh.c_str());
902 }
903 }
904 }
905 }
906 catch (...) {
907 unlock_cache_interface();
908 throw;
909 }
910
911 unlock_cache_interface();
912 }
913
914
915 /** Get the Cache-Control headers.
916
917 @return A vector of strings, one string for each header. */
918
919 vector<string>
get_cache_control()920 HTTPCache::get_cache_control()
921 {
922 return d_cache_control;
923 }
924
925 //@}
926
927 /** Look in the cache for the given \c url. Is it in the cache table?
928
929 This method locks the class' interface.
930
931 @todo Remove this is broken.
932 @param url The url to look for.
933 @return True if \c url is found, otherwise False. */
934
935 bool
is_url_in_cache(const string & url)936 HTTPCache::is_url_in_cache(const string &url)
937 {
938 DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
939
940 HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
941 bool status = entry != 0;
942 if (entry) {
943 entry->unlock_read_response();
944 }
945 return status;
946 }
947
948 /** Is the header a hop by hop header? If so, we're not supposed to store it
949 in the cache. See RFC 2616, Section 13.5.1.
950
951 @return True if the header is, otherwise False. */
952
953 bool
is_hop_by_hop_header(const string & header)954 is_hop_by_hop_header(const string &header)
955 {
956 return header.find("Connection") != string::npos
957 || header.find("Keep-Alive") != string::npos
958 || header.find("Proxy-Authenticate") != string::npos
959 || header.find("Proxy-Authorization") != string::npos
960 || header.find("Transfer-Encoding") != string::npos
961 || header.find("Upgrade") != string::npos;
962 }
963
964 /** Dump the headers out to the meta data file. The file is truncated if it
965 already exists.
966
967 @todo This code could be replaced with STL/iostream stuff.
968
969 A private method.
970
971 @param cachename Base name of file for meta data.
972 @param headers A vector of strings, one header per string.
973 @exception InternalErr Thrown if the file cannot be opened. */
974
975 void
write_metadata(const string & cachename,const vector<string> & headers)976 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
977 {
978 string fname = cachename + CACHE_META;
979 d_open_files.push_back(fname);
980
981 FILE *dest = fopen(fname.c_str(), "w");
982 if (!dest) {
983 throw InternalErr(__FILE__, __LINE__,
984 "Could not open named cache entry file.");
985 }
986
987 vector<string>::const_iterator i;
988 for (i = headers.begin(); i != headers.end(); ++i) {
989 if (!is_hop_by_hop_header(*i)) {
990 int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
991 if (s != 1) {
992 fclose(dest);
993 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
994 }
995 s = fwrite("\n", 1, 1, dest);
996 if (s != 1) {
997 fclose(dest);
998 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
999 }
1000 }
1001 }
1002
1003 int res = fclose(dest);
1004 if (res) {
1005 DBG(cerr << "HTTPCache::write_metadata - Failed to close "
1006 << dest << endl);
1007 }
1008
1009 d_open_files.pop_back();
1010 }
1011
1012 /** Read headers from a .meta.
1013
1014 @todo This code could be replaced with STL/iostream code.
1015
1016 A private method.
1017
1018 @param cachename The name of the file in the persistent store.
1019 @param headers The headers are returned using this parameter.
1020 @exception InternalErr Thrown if the file cannot be opened. */
1021
1022 void
read_metadata(const string & cachename,vector<string> & headers)1023 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
1024 {
1025 FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
1026 if (!md) {
1027 throw InternalErr(__FILE__, __LINE__,
1028 "Could not open named cache entry meta data file.");
1029 }
1030
1031 char line[1024];
1032 while (!feof(md) && fgets(line, 1024, md)) {
1033 line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
1034 headers.push_back(string(line));
1035 }
1036
1037 int res = fclose(md);
1038 if (res) {
1039 DBG(cerr << "HTTPCache::read_metadata - Failed to close "
1040 << md << endl);
1041 }
1042 }
1043
1044 /** Write the body of the HTTP response to the cache.
1045
1046 This method used to throw ResponseTooBig if any response was larger than
1047 max_entry_size. I've disabled that since perform_garbage_collection will
1048 remove any such entry if it's causing problems. Note that if
1049 parse_headers finds a Content-Length header that indicates a response is
1050 too big, the response won't be cached. The idea here is that once we've
1051 already written a bunch of bytes to the cache, we might as well continue.
1052 If it overflows the cache, perform_garbage_collection() will remove it.
1053
1054 A private method.
1055
1056 @param cachename Write data to this file.
1057 @param src Read data from this stream.
1058 @return The total number of bytes written.
1059 @exception InternalErr Thrown if the file cannot be opened or if an I/O
1060 error was detected.
1061 @exception ResponseTooBig Thrown if the response was found to be bigger
1062 than the max_entry_size property. This is not longer thrown. 10/11/02
1063 jhrg */
1064
1065 int
write_body(const string & cachename,const FILE * src)1066 HTTPCache::write_body(const string &cachename, const FILE *src)
1067 {
1068 d_open_files.push_back(cachename);
1069
1070 FILE *dest = fopen(cachename.c_str(), "wb");
1071 if (!dest) {
1072 throw InternalErr(__FILE__, __LINE__,
1073 "Could not open named cache entry file.");
1074 }
1075
1076 // Read and write in 1k blocks; an attempt at doing this efficiently.
1077 // 09/30/02 jhrg
1078 char line[1024];
1079 size_t n;
1080 int total = 0;
1081 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
1082 total += fwrite(line, 1, n, dest);
1083 DBG2(sleep(3));
1084 }
1085
1086 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
1087 int res = fclose(dest);
1088 res = res & unlink(cachename.c_str());
1089 if (res) {
1090 DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
1091 << dest << endl);
1092 }
1093 throw InternalErr(__FILE__, __LINE__,
1094 "I/O error transferring data to the cache.");
1095 }
1096
1097 rewind(const_cast<FILE *>(src));
1098
1099 int res = fclose(dest);
1100 if (res) {
1101 DBG(cerr << "HTTPCache::write_body - Failed to close "
1102 << dest << endl);
1103 }
1104
1105 d_open_files.pop_back();
1106
1107 return total;
1108 }
1109
1110 /** Get a pointer to file that contains the body of a cached response. The
1111 returned FILE* can be used both for reading and for writing.
1112
1113 A private method.
1114
1115 @param cachename The name of the file that holds the response body.
1116 @exception InternalErr Thrown if the file cannot be opened. */
1117
1118 FILE *
open_body(const string & cachename)1119 HTTPCache::open_body(const string &cachename)
1120 {
1121 DBG(cerr << "cachename: " << cachename << endl);
1122
1123 FILE *src = fopen(cachename.c_str(), "rb"); // Read only
1124 if (!src)
1125 throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
1126
1127 return src;
1128 }
1129
1130 /** Add a new response to the cache, or replace an existing cached response
1131 with new data. This method returns True if the information for \c url was
1132 added to the cache. A response might not be cache-able; in that case this
1133 method returns false. (For example, the response might contain the
1134 'Cache-Control: no-cache' header.)
1135
1136 Note that the FILE *body is rewound so that the caller can re-read it
1137 without using fseek or rewind.
1138
1139 If a response for \c url is already present in the cache, it will be
1140 replaced by the new headers and body. To update a response in the cache
1141 with new meta data, use update_response().
1142
1143 This method locks the class' interface.
1144
1145 @param url A string which holds the request URL.
1146 @param request_time The time when the request was made, in seconds since
1147 1 Jan 1970.
1148 @param headers A vector of strings which hold the response headers.
1149 @param body A FILE * to a file which holds the response body.
1150 @return True if the response was cached, False if the response could not
1151 be cached.
1152 @exception InternalErr Thrown if there was a I/O error while writing to
1153 the persistent store. */
1154
1155 bool
cache_response(const string & url,time_t request_time,const vector<string> & headers,const FILE * body)1156 HTTPCache::cache_response(const string &url, time_t request_time,
1157 const vector<string> &headers, const FILE *body)
1158 {
1159 lock_cache_interface();
1160
1161 DBG(cerr << "Caching url: " << url << "." << endl);
1162
1163 try {
1164 // If this is not an http or https URL, don't cache.
1165 if (url.find("http:") == string::npos &&
1166 url.find("https:") == string::npos) {
1167 unlock_cache_interface();
1168 return false;
1169 }
1170
1171 // This does nothing if url is not already in the cache. It's
1172 // more efficient to do this than to first check and see if the entry
1173 // exists. 10/10/02 jhrg
1174 d_http_cache_table->remove_entry_from_cache_table(url);
1175
1176 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url);
1177 entry->lock_write_response();
1178
1179 try {
1180 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
1181 if (entry->is_no_cache()) {
1182 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
1183 << "(" << url << ")" << endl);
1184 entry->unlock_write_response();
1185 delete entry; entry = 0;
1186 unlock_cache_interface();
1187 return false;
1188 }
1189
1190 // corrected_initial_age, freshness_lifetime, response_time.
1191 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1192
1193 d_http_cache_table->create_location(entry); // cachename, cache_body_fd
1194 // move these write function to cache table
1195 entry->set_size(write_body(entry->get_cachename(), body));
1196 write_metadata(entry->get_cachename(), headers);
1197 d_http_cache_table->add_entry_to_cache_table(entry);
1198 entry->unlock_write_response();
1199 }
1200 catch (ResponseTooBigErr &e) {
1201 // Oops. Bummer. Clean up and exit.
1202 DBG(cerr << e.get_error_message() << endl);
1203 REMOVE(entry->get_cachename().c_str());
1204 REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
1205 DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
1206 << ")" << endl);
1207 entry->unlock_write_response();
1208 delete entry; entry = 0;
1209 unlock_cache_interface();
1210 return false;
1211 }
1212
1213 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
1214 if (startGC())
1215 perform_garbage_collection();
1216
1217 d_http_cache_table->cache_index_write(); // resets new_entries
1218 }
1219 }
1220 catch (...) {
1221 unlock_cache_interface();
1222 throw;
1223 }
1224
1225 unlock_cache_interface();
1226
1227 return true;
1228 }
1229
1230 /** Build the headers to send along with a GET request to make that request
1231 conditional. This method examines the headers for a given response in the
1232 cache and formulates the correct headers for a valid HTTP 1.1 conditional
1233 GET request. See RFC 2616, Section 13.3.4.
1234
1235 Rules: If an ETag is present, it must be used. Use If-None-Match. If a
1236 Last-Modified header is present, use it. Use If-Modified-Since. If both
1237 are present, use both (this means that HTTP 1.0 daemons are more likely
1238 to work). If a Last-Modified header is not present, use the value of the
1239 Cache-Control max-age or Expires header(s). Note that a 'Cache-Control:
1240 max-age' header overrides an Expires header (Sec 14.9.3).
1241
1242 This method locks the cache interface and the cache entry.
1243
1244 @param url Get the HTTPCacheTable::CacheEntry for this URL.
1245 @return A vector of strings, one request header per string.
1246 @exception Error Thrown if the \e url is not in the cache. */
1247
1248 vector<string>
get_conditional_request_headers(const string & url)1249 HTTPCache::get_conditional_request_headers(const string &url)
1250 {
1251 lock_cache_interface();
1252
1253 HTTPCacheTable::CacheEntry *entry = 0;
1254 vector<string> headers;
1255
1256 DBG(cerr << "Getting conditional request headers for " << url << endl);
1257
1258 try {
1259 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1260 if (!entry)
1261 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1262
1263 if (entry->get_etag() != "")
1264 headers.push_back(string("If-None-Match: ") + entry->get_etag());
1265
1266 if (entry->get_lm() > 0) {
1267 time_t lm = entry->get_lm();
1268 headers.push_back(string("If-Modified-Since: ")
1269 + date_time_str(&lm));
1270 }
1271 else if (entry->get_max_age() > 0) {
1272 time_t max_age = entry->get_max_age();
1273 headers.push_back(string("If-Modified-Since: ")
1274 + date_time_str(&max_age));
1275 }
1276 else if (entry->get_expires() > 0) {
1277 time_t expires = entry->get_expires();
1278 headers.push_back(string("If-Modified-Since: ")
1279 + date_time_str(&expires));
1280 }
1281 entry->unlock_read_response();
1282 unlock_cache_interface();
1283 }
1284 catch (...) {
1285 unlock_cache_interface();
1286 if (entry) {
1287 entry->unlock_read_response();
1288 }
1289 throw;
1290 }
1291
1292 return headers;
1293 }
1294
1295 /** Functor/Predicate which orders two MIME headers based on the header name
1296 only (discounting the value). */
1297
1298 struct HeaderLess: binary_function<const string&, const string&, bool>
1299 {
operator ()libdap::HeaderLess1300 bool operator()(const string &s1, const string &s2) const {
1301 return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
1302 }
1303 };
1304
1305 /** Update the meta data for a response already in the cache. This method
1306 provides a way to merge response headers returned from a conditional GET
1307 request, for the given URL, with those already present.
1308
1309 This method locks the class' interface and the cache entry.
1310
1311 @param url Update the meta data for this cache entry.
1312 @param request_time The time (Unix time, seconds since 1 Jan 1970) that
1313 the conditional request was made.
1314 @param headers New headers, one header per string, returned in the
1315 response.
1316 @exception Error Thrown if the \c url is not in the cache. */
1317
1318 void
update_response(const string & url,time_t request_time,const vector<string> & headers)1319 HTTPCache::update_response(const string &url, time_t request_time,
1320 const vector<string> &headers)
1321 {
1322 lock_cache_interface();
1323
1324 HTTPCacheTable::CacheEntry *entry = 0;
1325 DBG(cerr << "Updating the response headers for: " << url << endl);
1326
1327 try {
1328 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
1329 if (!entry)
1330 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1331
1332 // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
1333 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
1334
1335 // Update corrected_initial_age, freshness_lifetime, response_time.
1336 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
1337
1338 // Merge the new headers with those in the persistent store. How:
1339 // Load the new headers into a set, then merge the old headers. Since
1340 // set<> ignores duplicates, old headers with the same name as a new
1341 // header will got into the bit bucket. Define a special compare
1342 // functor to make sure that headers are compared using only their
1343 // name and not their value too.
1344 set<string, HeaderLess> merged_headers;
1345
1346 // Load in the new headers
1347 copy(headers.begin(), headers.end(),
1348 inserter(merged_headers, merged_headers.begin()));
1349
1350 // Get the old headers and load them in.
1351 vector<string> old_headers;
1352 read_metadata(entry->get_cachename(), old_headers);
1353 copy(old_headers.begin(), old_headers.end(),
1354 inserter(merged_headers, merged_headers.begin()));
1355
1356 // Read the values back out. Use reverse iterators with back_inserter
1357 // to preserve header order. NB: vector<> does not support push_front
1358 // so we can't use front_inserter(). 01/09/03 jhrg
1359 vector<string> result;
1360 copy(merged_headers.rbegin(), merged_headers.rend(),
1361 back_inserter(result));
1362
1363 write_metadata(entry->get_cachename(), result);
1364 entry->unlock_write_response();
1365 unlock_cache_interface();
1366 }
1367 catch (...) {
1368 if (entry) {
1369 entry->unlock_read_response();
1370 }
1371 unlock_cache_interface();
1372 throw;
1373 }
1374 }
1375
1376 /** Look in the cache and return the status (validity) of the cached
1377 response. This method should be used to determine if a cached response
1378 requires validation.
1379
1380 This method locks the class' interface and the cache entry.
1381
1382 @param url Find the cached response associated with this URL.
1383 @return True indicates that the response can be used, False indicates
1384 that it must first be validated.
1385 @exception Error Thrown if the URL's response is not in the cache. */
1386
1387 bool
is_url_valid(const string & url)1388 HTTPCache::is_url_valid(const string &url)
1389 {
1390 lock_cache_interface();
1391
1392 bool freshness;
1393 HTTPCacheTable::CacheEntry *entry = 0;
1394
1395 DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
1396
1397 try {
1398 if (d_always_validate) {
1399 unlock_cache_interface();
1400 return false; // force re-validation.
1401 }
1402
1403 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1404 if (!entry)
1405 throw Error(internal_error, "There is no cache entry for the URL: " + url);
1406
1407 // If we supported range requests, we'd need code here to check if
1408 // there was only a partial response in the cache. 10/02/02 jhrg
1409
1410 // In case this entry is of type "must-revalidate" then we consider it
1411 // invalid.
1412 if (entry->get_must_revalidate()) {
1413 entry->unlock_read_response();
1414 unlock_cache_interface();
1415 return false;
1416 }
1417
1418 time_t resident_time = time(NULL) - entry->get_response_time();
1419 time_t current_age = entry->get_corrected_initial_age() + resident_time;
1420
1421 // Check that the max-age, max-stale, and min-fresh directives
1422 // given in the request cache control header is followed.
1423 if (d_max_age >= 0 && current_age > d_max_age) {
1424 DBG(cerr << "Cache....... Max-age validation" << endl);
1425 entry->unlock_read_response();
1426 unlock_cache_interface();
1427 return false;
1428 }
1429 if (d_min_fresh >= 0
1430 && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
1431 DBG(cerr << "Cache....... Min-fresh validation" << endl);
1432 entry->unlock_read_response();
1433 unlock_cache_interface();
1434 return false;
1435 }
1436
1437 freshness = (entry->get_freshness_lifetime()
1438 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
1439 entry->unlock_read_response();
1440 unlock_cache_interface();
1441 }
1442 catch (...) {
1443 if (entry) {
1444 entry->unlock_read_response();
1445 }
1446 unlock_cache_interface();
1447 throw;
1448 }
1449
1450 return freshness;
1451 }
1452
1453 /** Get information from the cache. For a given URL, get the headers, cache
1454 object name and body
1455 stored in the cache. Note that this method increments the hit counter for
1456 <code>url</code>'s entry and \e locks that entry. To release the lock,
1457 the method release_cached_response() \e must be called. Methods that
1458 block on a locked entry are: get_conditional_request_headers(),
1459 update_response() and is_url_valid(). In addition, purge_cache() throws
1460 Error if it's called and any entries are locked. The garbage collection
1461 system will not reclaim locked entries (but works fine when some entries
1462 are locked).
1463
1464 This method locks the class' interface.
1465
1466 This method does \e not check to see that the response is valid, just
1467 that it is in the cache. To see if a cached response is valid, use
1468 is_url_valid(). The FILE* returned can be used for both reading and
1469 writing. The latter allows a client to update the body of a cached
1470 response without having to first dump it all to a separate file and then
1471 copy it into the cache (using cache_response()).
1472
1473 @param url Get response information for this URL.
1474 @param headers Return the response headers in this parameter
1475 @param cacheName A value-result parameter; the name of the cache file
1476 @return A FILE * to the response body.
1477 @exception Error Thrown if the URL's response is not in the cache.
1478 @exception InternalErr Thrown if the persistent store cannot be opened. */
1479
get_cached_response(const string & url,vector<string> & headers,string & cacheName)1480 FILE * HTTPCache::get_cached_response(const string &url,
1481 vector<string> &headers, string &cacheName) {
1482 lock_cache_interface();
1483
1484 FILE *body = 0;
1485 HTTPCacheTable::CacheEntry *entry = 0;
1486
1487 DBG(cerr << "Getting the cached response for " << url << endl);
1488
1489 try {
1490 entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
1491 if (!entry) {
1492 unlock_cache_interface();
1493 return 0;
1494 }
1495
1496 cacheName = entry->get_cachename();
1497 read_metadata(entry->get_cachename(), headers);
1498
1499 DBG(cerr << "Headers just read from cache: " << endl);
1500 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
1501
1502 body = open_body(entry->get_cachename());
1503
1504 DBG(cerr << "Returning: " << url << " from the cache." << endl);
1505
1506 d_http_cache_table->bind_entry_to_data(entry, body);
1507 }
1508 catch (...) {
1509 // Why make this unlock operation conditional on entry?
1510 if (entry)
1511 unlock_cache_interface();
1512 if (body != 0)
1513 fclose(body);
1514 throw;
1515 }
1516
1517 unlock_cache_interface();
1518
1519 return body;
1520 }
1521
1522 /** Get information from the cache. This is a convenience method that calls
1523 the three parameter version of get_cache_response().
1524
1525 This method locks the class' interface.
1526
1527 @param url Get response information for this URL.
1528 @param headers Return the response headers in this parameter
1529 @return A FILE * to the response body.
1530 @exception Error Thrown if the URL's response is not in the cache.
1531 @exception InternalErr Thrown if the persistent store cannot be opened. */
1532
1533 FILE *
get_cached_response(const string & url,vector<string> & headers)1534 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
1535 {
1536 string discard_name;
1537 return get_cached_response(url, headers, discard_name);
1538 }
1539
1540 /** Get a pointer to a cached response body. This is a convenience method that
1541 calls the three parameter version of get_cache_response().
1542
1543 This method locks the class' interface.
1544
1545 @param url Find the body associated with this URL.
1546 @return A FILE* that points to the response body.
1547 @exception Error Thrown if the URL is not in the cache.
1548 @exception InternalErr Thrown if an I/O error is detected. */
1549
1550 FILE *
get_cached_response(const string & url)1551 HTTPCache::get_cached_response(const string &url)
1552 {
1553 string discard_name;
1554 vector<string> discard_headers;
1555 return get_cached_response(url, discard_headers, discard_name);
1556 }
1557
1558 /** Call this method to inform the cache that a particular response is no
1559 longer in use. When a response is accessed using get_cached_response(), it
1560 is locked so that updates and removal (e.g., by the garbage collector)
1561 are not possible. Calling this method frees that lock.
1562
1563 This method locks the class' interface.
1564
1565 @param body Release the lock on the response information associated with
1566 this FILE *.
1567 @exception Error Thrown if \c body does not belong to an entry in the
1568 cache or if the entry was already released. */
1569
1570 void
release_cached_response(FILE * body)1571 HTTPCache::release_cached_response(FILE *body)
1572 {
1573 lock_cache_interface();
1574
1575 try {
1576 // fclose(body); This results in a seg fault on linux jhrg 8/27/13
1577 d_http_cache_table->uncouple_entry_from_data(body);
1578 }
1579 catch (...) {
1580 unlock_cache_interface();
1581 throw;
1582 }
1583
1584 unlock_cache_interface();
1585 }
1586
1587 /** Purge both the in-memory cache table and the contents of the cache on
1588 disk. This method deletes every entry in the persistent store but leaves
1589 the structure intact. The client of HTTPCache is responsible for making
1590 sure that all threads have released any responses they pulled from the
1591 cache. If this method is called when a response is still in use, it will
1592 throw an Error object and not purge the cache.
1593
1594 This method locks the class' interface.
1595
1596 @exception Error Thrown if an attempt is made to purge the cache when
1597 an entry is still in use. */
1598
1599 void
purge_cache()1600 HTTPCache::purge_cache()
1601 {
1602 lock_cache_interface();
1603
1604 try {
1605 if (d_http_cache_table->is_locked_read_responses())
1606 throw Error(internal_error, "Attempt to purge the cache with entries in use.");
1607
1608 d_http_cache_table->delete_all_entries();
1609 }
1610 catch (...) {
1611 unlock_cache_interface();
1612 throw;
1613 }
1614
1615 unlock_cache_interface();
1616 }
1617
1618 } // namespace libdap
1619