1 2 // -*- mode: c++; c-basic-offset:4 -*- 3 4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 5 // Access Protocol. 6 7 // Copyright (c) 2002,2008 OPeNDAP, Inc. 8 // Author: James Gallagher <jgallagher@opendap.org> 9 // 10 // This library is free software; you can redistribute it and/or 11 // modify it under the terms of the GNU Lesser General Public 12 // License as published by the Free Software Foundation; either 13 // version 2.1 of the License, or (at your option) any later version. 14 // 15 // This library is distributed in the hope that it will be useful, 16 // but WITHOUT ANY WARRANTY; without even the implied warranty of 17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 // Lesser General Public License for more details. 19 // 20 // You should have received a copy of the GNU Lesser General Public 21 // License along with this library; if not, write to the Free Software 22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 // 24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 25 26 #ifndef _http_cache_h 27 #define _http_cache_h 28 29 #include <pthread.h> 30 31 #ifdef WIN32 32 #include <io.h> // stat for win32? 09/05/02 jhrg 33 #endif 34 35 #include <string> 36 #include <vector> 37 #include <map> 38 39 #include "HTTPCacheTable.h" // included for macros 40 41 #include "HTTPCacheDisconnectedMode.h" 42 //using namespace std; 43 44 namespace libdap 45 { 46 47 class HTTPCacheTabe; 48 49 // This function is exported so the test code can use it too. 50 bool is_hop_by_hop_header(const string &header); 51 52 /** Implements a multi-process MT-safe HTTP 1.1 compliant (mostly) cache. 53 54 <i>Clients that run as users lacking a writable HOME directory MUST 55 disable this cache. Use Connect::set_cache_enable(false).</i> 56 57 The original design of this class was taken from the W3C libwww software, 58 written by Henrik Frystyk Nielsen, Copyright MIT 59 1995. See the file MIT_COPYRIGHT. This software is a complete rewrite in 60 C++ with additional features useful to the DODS and OPeNDAP projects. 61 62 This cache does not implement range checking. Partial responses should 63 not be cached (HFN's version did, but it doesn't mesh well with the DAP 64 for which this is being written). 65 66 The cache uses the local file system to store responses. If it is being 67 used in a MT application, care should be taken to ensure that the number 68 of available file descriptors is not exceeded. 69 70 In addition, when used in a MT program only one thread should use the 71 mutators to set property values. Even though the methods are robust WRT 72 MT software, having several threads change values of cache's properties 73 will lead to odd behavior on the part of the cache. Many of the public 74 methods lock access to the class' interface. This is noted in the 75 documentation for those methods. 76 77 Even though the public interface to the cache is typically locked when 78 accessed, an extra locking mechanism is in place for `entries' which are 79 accessed. If a thread accesses a entry, that response must be locked to 80 prevent it from being updated until the thread tells the cache that it's 81 no longer using it. The method get_cache_response() and 82 get_cache_response_body() both lock an entry; use 83 release_cache_response() to release the lock. Entries are locked using a 84 combination of a counter and a mutex. The following methods block when 85 called on a locked entry: is_url_valid(), 86 get_conditional_request_headers(), update_response(). (The locking scheme 87 could be modified so that a distinction is made between reading from and 88 writing to an entry. In this case is_url_valid() and 89 get_conditional_request_headers() would only lock when an entry is in use 90 for writing. But I haven't done that.) 91 92 @todo Update documentation: get_cache_response() now also serves as 93 is_url_in_cache() and is_url_valid() should only be called after a locked 94 cached response is accessed using get_cahced_response(). These lock the 95 cache for reading. The methods cache_response() and update_response() 96 lock an entry for writing. 97 98 @todo Check that the lock-for-write and lock-for-read work together since 99 it's possible that an entry in use might have a stream of readers and never 100 free the 'read-lock' thus blocking a writer. 101 102 @author James Gallagher <jgallagher@opendap.org> */ 103 class HTTPCache 104 { 105 private: 106 string d_cache_root; 107 FILE *d_locked_open_file; // Lock for single process use. 108 109 bool d_cache_enabled; 110 bool d_cache_protected; 111 CacheDisconnectedMode d_cache_disconnected; 112 bool d_expire_ignored; 113 bool d_always_validate; 114 115 unsigned long d_total_size; // How much can we store? 116 unsigned long d_folder_size; // How much of that is meta data? 117 unsigned long d_gc_buffer; // How much memory needed as buffer? 118 unsigned long d_max_entry_size; // Max individual entry size. 119 int d_default_expiration; 120 121 vector<string> d_cache_control; 122 // these are values read from a request-directive Cache-Control header. 123 // Not to be confused with values read from the response or a cached 124 // response (e.g., CacheEntry has a max_age field, too). These fields are 125 // set when the set_cache_control method is called. 126 time_t d_max_age; 127 time_t d_max_stale; // -1: not set, 0:any response, >0 max time. 128 time_t d_min_fresh; 129 130 // Lock non-const methods (also ones that use the STL). 131 pthread_mutex_t d_cache_mutex; 132 133 HTTPCacheTable *d_http_cache_table; 134 135 // d_open_files is used by the interrupt handler to clean up 136 vector<string> d_open_files; 137 138 static HTTPCache *_instance; 139 140 friend class HTTPCacheTest; // Unit tests 141 friend class HTTPConnectTest; 142 143 friend class HTTPCacheInterruptHandler; 144 145 // Private methods 146 HTTPCache(const HTTPCache &); 147 HTTPCache(); 148 HTTPCache &operator=(const HTTPCache &); 149 150 HTTPCache(string cache_root, bool force); 151 152 static void delete_instance(); // Run by atexit (hence static) 153 154 void set_cache_root(const string &root = ""); 155 void create_cache_root(const string &cache_root); 156 157 // These will go away when the cache can be used by multiple processes. 158 bool get_single_user_lock(bool force = false); 159 void release_single_user_lock(); 160 161 bool is_url_in_cache(const string &url); 162 163 // I made these four methods so they could be tested by HTTPCacheTest. 164 // Otherwise they would be static functions. jhrg 10/01/02 165 void write_metadata(const string &cachename, const vector<string> &headers); 166 void read_metadata(const string &cachename, vector<string> &headers); 167 int write_body(const string &cachename, const FILE *src); 168 FILE *open_body(const string &cachename); 169 170 bool stopGC() const; 171 bool startGC() const; 172 173 void perform_garbage_collection(); 174 void too_big_gc(); 175 void expired_gc(); 176 void hits_gc(); 177 178 public: 179 static HTTPCache *instance(const string &cache_root, bool force = false); 180 virtual ~HTTPCache(); 181 182 string get_cache_root() const; 183 184 void set_cache_enabled(bool mode); 185 bool is_cache_enabled() const; 186 187 void set_cache_disconnected(CacheDisconnectedMode mode); 188 CacheDisconnectedMode get_cache_disconnected() const; 189 190 void set_expire_ignored(bool mode); 191 bool is_expire_ignored() const; 192 193 void set_max_size(unsigned long size); 194 unsigned long get_max_size() const; 195 196 void set_max_entry_size(unsigned long size); 197 unsigned long get_max_entry_size() const; 198 199 void set_default_expiration(int exp_time); 200 int get_default_expiration() const; 201 202 void set_always_validate(bool validate); 203 bool get_always_validate() const; 204 205 void set_cache_control(const vector<string> &cc); 206 vector<string> get_cache_control(); 207 lock_cache_interface()208 void lock_cache_interface() { 209 DBG(cerr << "Locking interface... "); 210 LOCK(&d_cache_mutex); 211 DBGN(cerr << "Done" << endl); 212 } unlock_cache_interface()213 void unlock_cache_interface() { 214 DBG(cerr << "Unlocking interface... " ); 215 UNLOCK(&d_cache_mutex); 216 DBGN(cerr << "Done" << endl); 217 } 218 219 // This must lock for writing 220 bool cache_response(const string &url, time_t request_time, 221 const vector<string> &headers, const FILE *body); 222 void update_response(const string &url, time_t request_time, 223 const vector<string> &headers); 224 225 // This is separate from get_cached_response() because often an invalid 226 // cache entry just needs a header update. That is best left to the HTTP 227 // Connection code. 228 bool is_url_valid(const string &url); 229 230 // Lock these for reading 231 vector<string> get_conditional_request_headers(const string &url); 232 FILE *get_cached_response(const string &url, vector<string> &headers, 233 string &cacheName); 234 FILE *get_cached_response(const string &url, vector<string> &headers); 235 FILE *get_cached_response(const string &url); 236 237 void release_cached_response(FILE *response); 238 239 void purge_cache(); 240 }; 241 242 } // namespace libdap 243 244 #endif // _http_cache_h 245