1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2008 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #ifndef _http_cache_h
27 #define _http_cache_h
28 
29 #include <pthread.h>
30 
31 #ifdef WIN32
32 #include <io.h>   // stat for win32? 09/05/02 jhrg
33 #endif
34 
35 #include <string>
36 #include <vector>
37 #include <map>
38 
39 #include "HTTPCacheTable.h" // included for macros
40 
41 #include "HTTPCacheDisconnectedMode.h"
42 //using namespace std;
43 
44 namespace libdap
45 {
46 
47 class HTTPCacheTabe;
48 
49 // This function is exported so the test code can use it too.
50 bool is_hop_by_hop_header(const string &header);
51 
52 /** Implements a multi-process MT-safe HTTP 1.1 compliant (mostly) cache.
53 
54     <i>Clients that run as users lacking a writable HOME directory MUST
55     disable this cache. Use Connect::set_cache_enable(false).</i>
56 
57     The original design of this class was taken from the W3C libwww software,
58     written by Henrik Frystyk Nielsen, Copyright MIT
59     1995. See the file MIT_COPYRIGHT. This software is a complete rewrite in
60     C++ with additional features useful to the DODS and OPeNDAP projects.
61 
62     This cache does not implement range checking. Partial responses should
63     not be cached (HFN's version did, but it doesn't mesh well with the DAP
64     for which this is being written).
65 
66     The cache uses the local file system to store responses. If it is being
67     used in a MT application, care should be taken to ensure that the number
68     of available file descriptors is not exceeded.
69 
70     In addition, when used in a MT program only one thread should use the
71     mutators to set property values. Even though the methods are robust WRT
72     MT software, having several threads change values of cache's properties
73     will lead to odd behavior on the part of the cache. Many of the public
74     methods lock access to the class' interface. This is noted in the
75     documentation for those methods.
76 
77     Even though the public interface to the cache is typically locked when
78     accessed, an extra locking mechanism is in place for `entries' which are
79     accessed. If a thread accesses a entry, that response must be locked to
80     prevent it from being updated until the thread tells the cache that it's
81     no longer using it. The method get_cache_response() and
82     get_cache_response_body() both lock an entry; use
83     release_cache_response() to release the lock. Entries are locked using a
84     combination of a counter and a mutex. The following methods block when
85     called on a locked entry: is_url_valid(),
86     get_conditional_request_headers(), update_response(). (The locking scheme
87     could be modified so that a distinction is made between reading from and
88     writing to an entry. In this case is_url_valid() and
89     get_conditional_request_headers() would only lock when an entry is in use
90     for writing. But I haven't done that.)
91 
92 	@todo Update documentation: get_cache_response() now also serves as
93 	is_url_in_cache() and is_url_valid() should only be called after a locked
94 	cached response is accessed using get_cahced_response(). These lock the
95 	cache for reading. The methods cache_response() and update_response()
96 	lock an entry for writing.
97 
98 	@todo Check that the lock-for-write and lock-for-read work together since
99 	it's possible that an entry in use might have a stream of readers and never
100 	free the 'read-lock' thus blocking a writer.
101 
102     @author James Gallagher <jgallagher@opendap.org> */
103 class HTTPCache
104 {
105 private:
106     string d_cache_root;
107     FILE *d_locked_open_file; // Lock for single process use.
108 
109     bool d_cache_enabled;
110     bool d_cache_protected;
111     CacheDisconnectedMode d_cache_disconnected;
112     bool d_expire_ignored;
113     bool d_always_validate;
114 
115     unsigned long d_total_size; // How much can we store?
116     unsigned long d_folder_size; // How much of that is meta data?
117     unsigned long d_gc_buffer; // How much memory needed as buffer?
118     unsigned long d_max_entry_size; // Max individual entry size.
119     int d_default_expiration;
120 
121     vector<string> d_cache_control;
122     // these are values read from a request-directive Cache-Control header.
123     // Not to be confused with values read from the response or a cached
124     // response (e.g., CacheEntry has a max_age field, too). These fields are
125     // set when the set_cache_control method is called.
126     time_t d_max_age;
127     time_t d_max_stale;  // -1: not set, 0:any response, >0 max time.
128     time_t d_min_fresh;
129 
130     // Lock non-const methods (also ones that use the STL).
131     pthread_mutex_t d_cache_mutex;
132 
133     HTTPCacheTable *d_http_cache_table;
134 
135     // d_open_files is used by the interrupt handler to clean up
136     vector<string> d_open_files;
137 
138     static HTTPCache *_instance;
139 
140     friend class HTTPCacheTest; // Unit tests
141     friend class HTTPConnectTest;
142 
143     friend class HTTPCacheInterruptHandler;
144 
145     // Private methods
146     HTTPCache(const HTTPCache &);
147     HTTPCache();
148     HTTPCache &operator=(const HTTPCache &);
149 
150     HTTPCache(string cache_root, bool force);
151 
152     static void delete_instance(); // Run by atexit (hence static)
153 
154     void set_cache_root(const string &root = "");
155     void create_cache_root(const string &cache_root);
156 
157     // These will go away when the cache can be used by multiple processes.
158     bool get_single_user_lock(bool force = false);
159     void release_single_user_lock();
160 
161     bool is_url_in_cache(const string &url);
162 
163     // I made these four methods so they could be tested by HTTPCacheTest.
164     // Otherwise they would be static functions. jhrg 10/01/02
165     void write_metadata(const string &cachename, const vector<string> &headers);
166     void read_metadata(const string &cachename, vector<string> &headers);
167     int write_body(const string &cachename, const FILE *src);
168     FILE *open_body(const string &cachename);
169 
170     bool stopGC() const;
171     bool startGC() const;
172 
173     void perform_garbage_collection();
174     void too_big_gc();
175     void expired_gc();
176     void hits_gc();
177 
178 public:
179     static HTTPCache *instance(const string &cache_root, bool force = false);
180     virtual ~HTTPCache();
181 
182     string get_cache_root() const;
183 
184     void set_cache_enabled(bool mode);
185     bool is_cache_enabled() const;
186 
187     void set_cache_disconnected(CacheDisconnectedMode mode);
188     CacheDisconnectedMode get_cache_disconnected() const;
189 
190     void set_expire_ignored(bool mode);
191     bool is_expire_ignored() const;
192 
193     void set_max_size(unsigned long size);
194     unsigned long get_max_size() const;
195 
196     void set_max_entry_size(unsigned long size);
197     unsigned long get_max_entry_size() const;
198 
199     void set_default_expiration(int exp_time);
200     int get_default_expiration() const;
201 
202     void set_always_validate(bool validate);
203     bool get_always_validate() const;
204 
205     void set_cache_control(const vector<string> &cc);
206     vector<string> get_cache_control();
207 
lock_cache_interface()208     void lock_cache_interface() {
209     	DBG(cerr << "Locking interface... ");
210     	LOCK(&d_cache_mutex);
211     	DBGN(cerr << "Done" << endl);
212     }
unlock_cache_interface()213     void unlock_cache_interface() {
214     	DBG(cerr << "Unlocking interface... " );
215     	UNLOCK(&d_cache_mutex);
216     	DBGN(cerr << "Done" << endl);
217     }
218 
219     // This must lock for writing
220     bool cache_response(const string &url, time_t request_time,
221                         const vector<string> &headers, const FILE *body);
222     void update_response(const string &url, time_t request_time,
223                          const vector<string> &headers);
224 
225     // This is separate from get_cached_response() because often an invalid
226     // cache entry just needs a header update. That is best left to the HTTP
227     // Connection code.
228     bool is_url_valid(const string &url);
229 
230     // Lock these for reading
231     vector<string> get_conditional_request_headers(const string &url);
232     FILE *get_cached_response(const string &url, vector<string> &headers,
233 			      			  string &cacheName);
234     FILE *get_cached_response(const string &url, vector<string> &headers);
235     FILE *get_cached_response(const string &url);
236 
237     void release_cached_response(FILE *response);
238 
239     void purge_cache();
240 };
241 
242 } // namespace libdap
243 
244 #endif // _http_cache_h
245