1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2  * contributor license agreements.  See the NOTICE file distributed with
3  * this work for additional information regarding copyright ownership.
4  * The ASF licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "apr_lib.h"
18 #include "apr_file_io.h"
19 #include "apr_strings.h"
20 #include "mod_cache.h"
21 #include "mod_cache_disk.h"
22 #include "http_config.h"
23 #include "http_log.h"
24 #include "http_core.h"
25 #include "ap_provider.h"
26 #include "util_filter.h"
27 #include "util_script.h"
28 #include "util_charset.h"
29 
30 /*
31  * mod_cache_disk: Disk Based HTTP 1.1 Cache.
32  *
33  * Flow to Find the .data file:
34  *   Incoming client requests URI /foo/bar/baz
35  *   Generate <hash> off of /foo/bar/baz
36  *   Open <hash>.header
37  *   Read in <hash>.header file (may contain Format #1 or Format #2)
38  *   If format #1 (Contains a list of Vary Headers):
39  *      Use each header name (from .header) with our request values (headers_in) to
40  *      regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
41  *      re-read in <hash>.header (must be format #2)
42  *   read in <hash>.data
43  *
44  * Format #1:
45  *   apr_uint32_t format;
46  *   apr_time_t expire;
47  *   apr_array_t vary_headers (delimited by CRLF)
48  *
49  * Format #2:
50  *   disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
51  *   entity name (dobj->name) [length is in disk_cache_info_t->name_len]
52  *   r->headers_out (delimited by CRLF)
53  *   CRLF
54  *   r->headers_in (delimited by CRLF)
55  *   CRLF
56  */
57 
58 module AP_MODULE_DECLARE_DATA cache_disk_module;
59 
60 /* Forward declarations */
61 static int remove_entity(cache_handle_t *h);
62 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
63 static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *in,
64                                apr_bucket_brigade *out);
65 static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
66 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
67 static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
68                                apr_file_t *file);
69 
70 /*
71  * Local static functions
72  */
73 
header_file(apr_pool_t * p,disk_cache_conf * conf,disk_cache_object_t * dobj,const char * name)74 static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
75                          disk_cache_object_t *dobj, const char *name)
76 {
77     if (!dobj->hashfile) {
78         dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
79                                                 conf->dirlength, name);
80     }
81 
82     if (dobj->prefix) {
83         return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX "/",
84                            dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
85      }
86      else {
87         return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
88                            CACHE_HEADER_SUFFIX, NULL);
89      }
90 }
91 
data_file(apr_pool_t * p,disk_cache_conf * conf,disk_cache_object_t * dobj,const char * name)92 static char *data_file(apr_pool_t *p, disk_cache_conf *conf,
93                        disk_cache_object_t *dobj, const char *name)
94 {
95     if (!dobj->hashfile) {
96         dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
97                                                 conf->dirlength, name);
98     }
99 
100     if (dobj->prefix) {
101         return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX "/",
102                            dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
103      }
104      else {
105         return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
106                            CACHE_DATA_SUFFIX, NULL);
107      }
108 }
109 
mkdir_structure(disk_cache_conf * conf,const char * file,apr_pool_t * pool)110 static apr_status_t mkdir_structure(disk_cache_conf *conf, const char *file, apr_pool_t *pool)
111 {
112     apr_status_t rv;
113     char *p;
114 
115     for (p = (char*)file + conf->cache_root_len + 1;;) {
116         p = strchr(p, '/');
117         if (!p)
118             break;
119         *p = '\0';
120 
121         rv = apr_dir_make(file,
122                           APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool);
123         if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
124             return rv;
125         }
126         *p = '/';
127         ++p;
128     }
129     return APR_SUCCESS;
130 }
131 
132 /* htcacheclean may remove directories underneath us.
133  * So, we'll try renaming three times at a cost of 0.002 seconds.
134  */
safe_file_rename(disk_cache_conf * conf,const char * src,const char * dest,apr_pool_t * pool)135 static apr_status_t safe_file_rename(disk_cache_conf *conf,
136                                      const char *src, const char *dest,
137                                      apr_pool_t *pool)
138 {
139     apr_status_t rv;
140 
141     rv = apr_file_rename(src, dest, pool);
142 
143     if (rv != APR_SUCCESS) {
144         int i;
145 
146         for (i = 0; i < 2 && rv != APR_SUCCESS; i++) {
147             /* 1000 micro-seconds aka 0.001 seconds. */
148             apr_sleep(1000);
149 
150             rv = mkdir_structure(conf, dest, pool);
151             if (rv != APR_SUCCESS)
152                 continue;
153 
154             rv = apr_file_rename(src, dest, pool);
155         }
156     }
157 
158     return rv;
159 }
160 
file_cache_el_final(disk_cache_conf * conf,disk_cache_file_t * file,request_rec * r)161 static apr_status_t file_cache_el_final(disk_cache_conf *conf, disk_cache_file_t *file,
162                                         request_rec *r)
163 {
164     apr_status_t rv = APR_SUCCESS;
165 
166     /* This assumes that the tempfiles are on the same file system
167      * as the cache_root. If not, then we need a file copy/move
168      * rather than a rename.
169      */
170 
171     /* move the file over */
172     if (file->tempfd) {
173 
174         rv = safe_file_rename(conf, file->tempfile, file->file, file->pool);
175         if (rv != APR_SUCCESS) {
176             ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00699)
177                     "rename tempfile to file failed:"
178                     " %s -> %s", file->tempfile, file->file);
179             apr_file_remove(file->tempfile, file->pool);
180         }
181 
182         file->tempfd = NULL;
183     }
184 
185     return rv;
186 }
187 
file_cache_temp_cleanup(void * dummy)188 static apr_status_t file_cache_temp_cleanup(void *dummy) {
189     disk_cache_file_t *file = (disk_cache_file_t *)dummy;
190 
191     /* clean up the temporary file */
192     if (file->tempfd) {
193         apr_file_remove(file->tempfile, file->pool);
194         file->tempfd = NULL;
195     }
196     file->tempfile = NULL;
197     file->pool = NULL;
198 
199     return APR_SUCCESS;
200 }
201 
file_cache_create(disk_cache_conf * conf,disk_cache_file_t * file,apr_pool_t * pool)202 static apr_status_t file_cache_create(disk_cache_conf *conf, disk_cache_file_t *file,
203                                       apr_pool_t *pool)
204 {
205     file->pool = pool;
206     file->tempfile = apr_pstrcat(pool, conf->cache_root, AP_TEMPFILE, NULL);
207 
208     apr_pool_cleanup_register(pool, file, file_cache_temp_cleanup, apr_pool_cleanup_null);
209 
210     return APR_SUCCESS;
211 }
212 
213 /* These two functions get and put state information into the data
214  * file for an ap_cache_el, this state information will be read
215  * and written transparent to clients of this module
216  */
file_cache_recall_mydata(apr_file_t * fd,cache_info * info,disk_cache_object_t * dobj,request_rec * r)217 static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
218                                     disk_cache_object_t *dobj, request_rec *r)
219 {
220     apr_status_t rv;
221     char *urlbuff;
222     apr_size_t len;
223 
224     /* read the data from the cache file */
225     len = sizeof(disk_cache_info_t);
226     rv = apr_file_read_full(fd, &dobj->disk_info, len, &len);
227     if (rv != APR_SUCCESS) {
228         return rv;
229     }
230 
231     /* Store it away so we can get it later. */
232     info->status = dobj->disk_info.status;
233     info->date = dobj->disk_info.date;
234     info->expire = dobj->disk_info.expire;
235     info->request_time = dobj->disk_info.request_time;
236     info->response_time = dobj->disk_info.response_time;
237 
238     memcpy(&info->control, &dobj->disk_info.control, sizeof(cache_control_t));
239 
240     /* Note that we could optimize this by conditionally doing the palloc
241      * depending upon the size. */
242     urlbuff = apr_palloc(r->pool, dobj->disk_info.name_len + 1);
243     len = dobj->disk_info.name_len;
244     rv = apr_file_read_full(fd, urlbuff, len, &len);
245     if (rv != APR_SUCCESS) {
246         return rv;
247     }
248     urlbuff[dobj->disk_info.name_len] = '\0';
249 
250     /* check that we have the same URL */
251     /* Would strncmp be correct? */
252     if (strcmp(urlbuff, dobj->name) != 0) {
253         return APR_EGENERAL;
254     }
255 
256     return APR_SUCCESS;
257 }
258 
regen_key(apr_pool_t * p,apr_table_t * headers,apr_array_header_t * varray,const char * oldkey)259 static const char* regen_key(apr_pool_t *p, apr_table_t *headers,
260                              apr_array_header_t *varray, const char *oldkey)
261 {
262     struct iovec *iov;
263     int i, k;
264     int nvec;
265     const char *header;
266     const char **elts;
267 
268     nvec = (varray->nelts * 2) + 1;
269     iov = apr_palloc(p, sizeof(struct iovec) * nvec);
270     elts = (const char **) varray->elts;
271 
272     /* TODO:
273      *    - Handle multiple-value headers better. (sort them?)
274      *    - Handle Case in-sensitive Values better.
275      *        This isn't the end of the world, since it just lowers the cache
276      *        hit rate, but it would be nice to fix.
277      *
278      * The majority are case insenstive if they are values (encoding etc).
279      * Most of rfc2616 is case insensitive on header contents.
280      *
281      * So the better solution may be to identify headers which should be
282      * treated case-sensitive?
283      *  HTTP URI's (3.2.3) [host and scheme are insensitive]
284      *  HTTP method (5.1.1)
285      *  HTTP-date values (3.3.1)
286      *  3.7 Media Types [exerpt]
287      *     The type, subtype, and parameter attribute names are case-
288      *     insensitive. Parameter values might or might not be case-sensitive,
289      *     depending on the semantics of the parameter name.
290      *  4.20 Except [exerpt]
291      *     Comparison of expectation values is case-insensitive for unquoted
292      *     tokens (including the 100-continue token), and is case-sensitive for
293      *     quoted-string expectation-extensions.
294      */
295 
296     for(i=0, k=0; i < varray->nelts; i++) {
297         header = apr_table_get(headers, elts[i]);
298         if (!header) {
299             header = "";
300         }
301         iov[k].iov_base = (char*) elts[i];
302         iov[k].iov_len = strlen(elts[i]);
303         k++;
304         iov[k].iov_base = (char*) header;
305         iov[k].iov_len = strlen(header);
306         k++;
307     }
308     iov[k].iov_base = (char*) oldkey;
309     iov[k].iov_len = strlen(oldkey);
310     k++;
311 
312     return apr_pstrcatv(p, iov, k, NULL);
313 }
314 
array_alphasort(const void * fn1,const void * fn2)315 static int array_alphasort(const void *fn1, const void *fn2)
316 {
317     return strcmp(*(char**)fn1, *(char**)fn2);
318 }
319 
tokens_to_array(apr_pool_t * p,const char * data,apr_array_header_t * arr)320 static void tokens_to_array(apr_pool_t *p, const char *data,
321                             apr_array_header_t *arr)
322 {
323     char *token;
324 
325     while ((token = ap_get_list_item(p, &data)) != NULL) {
326         *((const char **) apr_array_push(arr)) = token;
327     }
328 
329     /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
330     qsort((void *) arr->elts, arr->nelts,
331          sizeof(char *), array_alphasort);
332 }
333 
334 /*
335  * Hook and mod_cache callback functions
336  */
create_entity(cache_handle_t * h,request_rec * r,const char * key,apr_off_t len,apr_bucket_brigade * bb)337 static int create_entity(cache_handle_t *h, request_rec *r, const char *key, apr_off_t len,
338                          apr_bucket_brigade *bb)
339 {
340     disk_cache_dir_conf *dconf = ap_get_module_config(r->per_dir_config, &cache_disk_module);
341     disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
342                                                  &cache_disk_module);
343     cache_object_t *obj;
344     disk_cache_object_t *dobj;
345     apr_pool_t *pool;
346 
347     if (conf->cache_root == NULL) {
348         return DECLINED;
349     }
350 
351     /* we don't support caching of range requests (yet) */
352     if (r->status == HTTP_PARTIAL_CONTENT) {
353         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00700)
354                 "URL %s partial content response not cached",
355                 key);
356         return DECLINED;
357     }
358 
359     /* Note, len is -1 if unknown so don't trust it too hard */
360     if (len > dconf->maxfs) {
361         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00701)
362                 "URL %s failed the size check "
363                 "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")",
364                 key, len, dconf->maxfs);
365         return DECLINED;
366     }
367     if (len >= 0 && len < dconf->minfs) {
368         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00702)
369                 "URL %s failed the size check "
370                 "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")",
371                 key, len, dconf->minfs);
372         return DECLINED;
373     }
374 
375     /* Allocate and initialize cache_object_t and disk_cache_object_t */
376     h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj));
377     obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj));
378 
379     obj->key = apr_pstrdup(r->pool, key);
380 
381     dobj->name = obj->key;
382     dobj->prefix = NULL;
383     /* Save the cache root */
384     dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
385     dobj->root_len = conf->cache_root_len;
386 
387     apr_pool_create(&pool, r->pool);
388     apr_pool_tag(pool, "mod_cache (create_entity)");
389 
390     file_cache_create(conf, &dobj->hdrs, pool);
391     file_cache_create(conf, &dobj->vary, pool);
392     file_cache_create(conf, &dobj->data, pool);
393 
394     dobj->data.file = data_file(r->pool, conf, dobj, key);
395     dobj->hdrs.file = header_file(r->pool, conf, dobj, key);
396     dobj->vary.file = header_file(r->pool, conf, dobj, key);
397 
398     dobj->disk_info.header_only = r->header_only;
399 
400     return OK;
401 }
402 
open_entity(cache_handle_t * h,request_rec * r,const char * key)403 static int open_entity(cache_handle_t *h, request_rec *r, const char *key)
404 {
405     apr_uint32_t format;
406     apr_size_t len;
407     const char *nkey;
408     apr_status_t rc;
409     static int error_logged = 0;
410     disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
411                                                  &cache_disk_module);
412 #ifdef APR_SENDFILE_ENABLED
413     core_dir_config *coreconf = ap_get_core_module_config(r->per_dir_config);
414 #endif
415     apr_finfo_t finfo;
416     cache_object_t *obj;
417     cache_info *info;
418     disk_cache_object_t *dobj;
419     int flags;
420     apr_pool_t *pool;
421 
422     h->cache_obj = NULL;
423 
424     /* Look up entity keyed to 'url' */
425     if (conf->cache_root == NULL) {
426         if (!error_logged) {
427             error_logged = 1;
428             ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00703)
429                     "Cannot cache files to disk without a CacheRoot specified.");
430         }
431         return DECLINED;
432     }
433 
434     /* Create and init the cache object */
435     obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
436     dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
437 
438     info = &(obj->info);
439 
440     /* Open the headers file */
441     dobj->prefix = NULL;
442 
443     /* Save the cache root */
444     dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
445     dobj->root_len = conf->cache_root_len;
446 
447     dobj->vary.file = header_file(r->pool, conf, dobj, key);
448     flags = APR_READ|APR_BINARY|APR_BUFFERED;
449     rc = apr_file_open(&dobj->vary.fd, dobj->vary.file, flags, 0, r->pool);
450     if (rc != APR_SUCCESS) {
451         return DECLINED;
452     }
453 
454     /* read the format from the cache file */
455     len = sizeof(format);
456     apr_file_read_full(dobj->vary.fd, &format, len, &len);
457 
458     if (format == VARY_FORMAT_VERSION) {
459         apr_array_header_t* varray;
460         apr_time_t expire;
461 
462         len = sizeof(expire);
463         apr_file_read_full(dobj->vary.fd, &expire, len, &len);
464 
465         varray = apr_array_make(r->pool, 5, sizeof(char*));
466         rc = read_array(r, varray, dobj->vary.fd);
467         if (rc != APR_SUCCESS) {
468             ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, APLOGNO(00704)
469                     "Cannot parse vary header file: %s",
470                     dobj->vary.file);
471             apr_file_close(dobj->vary.fd);
472             return DECLINED;
473         }
474         apr_file_close(dobj->vary.fd);
475 
476         nkey = regen_key(r->pool, r->headers_in, varray, key);
477 
478         dobj->hashfile = NULL;
479         dobj->prefix = dobj->vary.file;
480         dobj->hdrs.file = header_file(r->pool, conf, dobj, nkey);
481 
482         flags = APR_READ|APR_BINARY|APR_BUFFERED;
483         rc = apr_file_open(&dobj->hdrs.fd, dobj->hdrs.file, flags, 0, r->pool);
484         if (rc != APR_SUCCESS) {
485             return DECLINED;
486         }
487     }
488     else if (format != DISK_FORMAT_VERSION) {
489         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00705)
490                 "File '%s' has a version mismatch. File had version: %d.",
491                 dobj->vary.file, format);
492         apr_file_close(dobj->vary.fd);
493         return DECLINED;
494     }
495     else {
496         apr_off_t offset = 0;
497 
498         /* oops, not vary as it turns out */
499         dobj->hdrs.fd = dobj->vary.fd;
500         dobj->vary.fd = NULL;
501         dobj->hdrs.file = dobj->vary.file;
502 
503         /* This wasn't a Vary Format file, so we must seek to the
504          * start of the file again, so that later reads work.
505          */
506         apr_file_seek(dobj->hdrs.fd, APR_SET, &offset);
507         nkey = key;
508     }
509 
510     obj->key = nkey;
511     dobj->key = nkey;
512     dobj->name = key;
513 
514     apr_pool_create(&pool, r->pool);
515     apr_pool_tag(pool, "mod_cache (open_entity)");
516 
517     file_cache_create(conf, &dobj->hdrs, pool);
518     file_cache_create(conf, &dobj->vary, pool);
519     file_cache_create(conf, &dobj->data, pool);
520 
521     dobj->data.file = data_file(r->pool, conf, dobj, nkey);
522 
523     /* Read the bytes to setup the cache_info fields */
524     rc = file_cache_recall_mydata(dobj->hdrs.fd, info, dobj, r);
525     if (rc != APR_SUCCESS) {
526         ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, APLOGNO(00706)
527                 "Cannot read header file %s", dobj->hdrs.file);
528         apr_file_close(dobj->hdrs.fd);
529         return DECLINED;
530     }
531 
532     apr_file_close(dobj->hdrs.fd);
533 
534     /* Is this a cached HEAD request? */
535     if (dobj->disk_info.header_only && !r->header_only) {
536         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r, APLOGNO(00707)
537                 "HEAD request cached, non-HEAD requested, ignoring: %s",
538                 dobj->hdrs.file);
539         return DECLINED;
540     }
541 
542     /* Open the data file */
543     if (dobj->disk_info.has_body) {
544         flags = APR_READ | APR_BINARY;
545 #ifdef APR_SENDFILE_ENABLED
546         /* When we are in the quick handler we don't have the per-directory
547          * configuration, so this check only takes the global setting of
548          * the EnableSendFile directive into account.
549          */
550         flags |= AP_SENDFILE_ENABLED(coreconf->enable_sendfile);
551 #endif
552         rc = apr_file_open(&dobj->data.fd, dobj->data.file, flags, 0, r->pool);
553         if (rc != APR_SUCCESS) {
554             ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, APLOGNO(00708)
555                     "Cannot open data file %s", dobj->data.file);
556             apr_file_close(dobj->hdrs.fd);
557             return DECLINED;
558         }
559 
560         rc = apr_file_info_get(&finfo, APR_FINFO_SIZE | APR_FINFO_IDENT,
561                 dobj->data.fd);
562         if (rc == APR_SUCCESS) {
563             dobj->file_size = finfo.size;
564         }
565 
566         /* Atomic check - does the body file belong to the header file? */
567         if (dobj->disk_info.inode == finfo.inode &&
568                 dobj->disk_info.device == finfo.device) {
569 
570             /* Initialize the cache_handle callback functions */
571             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00709)
572                     "Recalled cached URL info header %s", dobj->name);
573 
574             /* make the configuration stick */
575             h->cache_obj = obj;
576             obj->vobj = dobj;
577 
578             return OK;
579         }
580 
581     }
582     else {
583 
584         /* make the configuration stick */
585         h->cache_obj = obj;
586         obj->vobj = dobj;
587 
588         return OK;
589     }
590 
591     /* Oh dear, no luck matching header to the body */
592     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00710)
593             "Cached URL info header '%s' didn't match body, ignoring this entry",
594             dobj->name);
595 
596     return DECLINED;
597 }
598 
close_disk_cache_fd(disk_cache_file_t * file)599 static void close_disk_cache_fd(disk_cache_file_t *file)
600 {
601    if (file->fd != NULL) {
602        apr_file_close(file->fd);
603        file->fd = NULL;
604    }
605    if (file->tempfd != NULL) {
606        apr_file_close(file->tempfd);
607        file->tempfd = NULL;
608    }
609 }
610 
remove_entity(cache_handle_t * h)611 static int remove_entity(cache_handle_t *h)
612 {
613     disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
614 
615     close_disk_cache_fd(&(dobj->hdrs));
616     close_disk_cache_fd(&(dobj->vary));
617     close_disk_cache_fd(&(dobj->data));
618 
619     /* Null out the cache object pointer so next time we start from scratch  */
620     h->cache_obj = NULL;
621     return OK;
622 }
623 
remove_url(cache_handle_t * h,request_rec * r)624 static int remove_url(cache_handle_t *h, request_rec *r)
625 {
626     apr_status_t rc;
627     disk_cache_object_t *dobj;
628 
629     /* Get disk cache object from cache handle */
630     dobj = (disk_cache_object_t *) h->cache_obj->vobj;
631     if (!dobj) {
632         return DECLINED;
633     }
634 
635     /* Delete headers file */
636     if (dobj->hdrs.file) {
637         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00711)
638                 "Deleting %s from cache.", dobj->hdrs.file);
639 
640         rc = apr_file_remove(dobj->hdrs.file, r->pool);
641         if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
642             /* Will only result in an output if httpd is started with -e debug.
643              * For reason see log_error_core for the case s == NULL.
644              */
645             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r, APLOGNO(00712)
646                     "Failed to delete headers file %s from cache.",
647                     dobj->hdrs.file);
648             return DECLINED;
649         }
650     }
651 
652     /* Delete data file */
653     if (dobj->data.file) {
654         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00713)
655                 "Deleting %s from cache.", dobj->data.file);
656 
657         rc = apr_file_remove(dobj->data.file, r->pool);
658         if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
659             /* Will only result in an output if httpd is started with -e debug.
660              * For reason see log_error_core for the case s == NULL.
661              */
662             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r, APLOGNO(00714)
663                     "Failed to delete data file %s from cache.",
664                     dobj->data.file);
665             return DECLINED;
666         }
667     }
668 
669     /* now delete directories as far as possible up to our cache root */
670     if (dobj->root) {
671         const char *str_to_copy;
672 
673         str_to_copy = dobj->hdrs.file ? dobj->hdrs.file : dobj->data.file;
674         if (str_to_copy) {
675             char *dir, *slash, *q;
676 
677             dir = apr_pstrdup(r->pool, str_to_copy);
678 
679             /* remove filename */
680             slash = strrchr(dir, '/');
681             *slash = '\0';
682 
683             /*
684              * now walk our way back to the cache root, delete everything
685              * in the way as far as possible
686              *
687              * Note: due to the way we constructed the file names in
688              * header_file and data_file, we are guaranteed that the
689              * cache_root is suffixed by at least one '/' which will be
690              * turned into a terminating null by this loop.  Therefore,
691              * we won't either delete or go above our cache root.
692              */
693             for (q = dir + dobj->root_len; *q ; ) {
694                  ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00715)
695                         "Deleting directory %s from cache", dir);
696 
697                  rc = apr_dir_remove(dir, r->pool);
698                  if (rc != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rc)) {
699                     break;
700                  }
701                  slash = strrchr(q, '/');
702                  *slash = '\0';
703             }
704         }
705     }
706 
707     return OK;
708 }
709 
read_array(request_rec * r,apr_array_header_t * arr,apr_file_t * file)710 static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
711                                apr_file_t *file)
712 {
713     char w[MAX_STRING_LEN];
714     int p;
715     apr_status_t rv;
716 
717     while (1) {
718         rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
719         if (rv != APR_SUCCESS) {
720             ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00716)
721                           "Premature end of vary array.");
722             return rv;
723         }
724 
725         p = strlen(w);
726         if (p > 0 && w[p - 1] == '\n') {
727             if (p > 1 && w[p - 2] == CR) {
728                 w[p - 2] = '\0';
729             }
730             else {
731                 w[p - 1] = '\0';
732             }
733         }
734 
735         /* If we've finished reading the array, break out of the loop. */
736         if (w[0] == '\0') {
737             break;
738         }
739 
740        *((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
741     }
742 
743     return APR_SUCCESS;
744 }
745 
store_array(apr_file_t * fd,apr_array_header_t * arr)746 static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr)
747 {
748     int i;
749     apr_status_t rv;
750     struct iovec iov[2];
751     apr_size_t amt;
752     const char **elts;
753 
754     elts = (const char **) arr->elts;
755 
756     for (i = 0; i < arr->nelts; i++) {
757         iov[0].iov_base = (char*) elts[i];
758         iov[0].iov_len = strlen(elts[i]);
759         iov[1].iov_base = CRLF;
760         iov[1].iov_len = sizeof(CRLF) - 1;
761 
762         rv = apr_file_writev_full(fd, (const struct iovec *) &iov, 2, &amt);
763         if (rv != APR_SUCCESS) {
764             return rv;
765         }
766     }
767 
768     iov[0].iov_base = CRLF;
769     iov[0].iov_len = sizeof(CRLF) - 1;
770 
771     return apr_file_writev_full(fd, (const struct iovec *) &iov, 1, &amt);
772 }
773 
read_table(cache_handle_t * handle,request_rec * r,apr_table_t * table,apr_file_t * file)774 static apr_status_t read_table(cache_handle_t *handle, request_rec *r,
775                                apr_table_t *table, apr_file_t *file)
776 {
777     char w[MAX_STRING_LEN];
778     char *l;
779     int p;
780     apr_status_t rv;
781 
782     while (1) {
783 
784         /* ### What about APR_EOF? */
785         rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
786         if (rv != APR_SUCCESS) {
787             ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00717)
788                           "Premature end of cache headers.");
789             return rv;
790         }
791 
792         /* Delete terminal (CR?)LF */
793 
794         p = strlen(w);
795         /* Indeed, the host's '\n':
796            '\012' for UNIX; '\015' for MacOS; '\025' for OS/390
797            -- whatever the script generates.
798         */
799         if (p > 0 && w[p - 1] == '\n') {
800             if (p > 1 && w[p - 2] == CR) {
801                 w[p - 2] = '\0';
802             }
803             else {
804                 w[p - 1] = '\0';
805             }
806         }
807 
808         /* If we've finished reading the headers, break out of the loop. */
809         if (w[0] == '\0') {
810             break;
811         }
812 
813 #if APR_CHARSET_EBCDIC
814         /* Chances are that we received an ASCII header text instead of
815          * the expected EBCDIC header lines. Try to auto-detect:
816          */
817         if (!(l = strchr(w, ':'))) {
818             int maybeASCII = 0, maybeEBCDIC = 0;
819             unsigned char *cp, native;
820             apr_size_t inbytes_left, outbytes_left;
821 
822             for (cp = w; *cp != '\0'; ++cp) {
823                 native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp);
824                 if (apr_isprint(*cp) && !apr_isprint(native))
825                     ++maybeEBCDIC;
826                 if (!apr_isprint(*cp) && apr_isprint(native))
827                     ++maybeASCII;
828             }
829             if (maybeASCII > maybeEBCDIC) {
830                 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00718)
831                         "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
832                         r->filename);
833                 inbytes_left = outbytes_left = cp - w;
834                 apr_xlate_conv_buffer(ap_hdrs_from_ascii,
835                                       w, &inbytes_left, w, &outbytes_left);
836             }
837         }
838 #endif /*APR_CHARSET_EBCDIC*/
839 
840         /* if we see a bogus header don't ignore it. Shout and scream */
841         if (!(l = strchr(w, ':'))) {
842             return APR_EGENERAL;
843         }
844 
845         *l++ = '\0';
846         while (apr_isspace(*l)) {
847             ++l;
848         }
849 
850         apr_table_add(table, w, l);
851     }
852 
853     return APR_SUCCESS;
854 }
855 
856 /*
857  * Reads headers from a buffer and returns an array of headers.
858  * Returns NULL on file error
859  * This routine tries to deal with too long lines and continuation lines.
860  * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
861  * Is that okay, or should they be collapsed where possible?
862  */
recall_headers(cache_handle_t * h,request_rec * r)863 static apr_status_t recall_headers(cache_handle_t *h, request_rec *r)
864 {
865     disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
866 
867     /* This case should not happen... */
868     if (!dobj->hdrs.fd) {
869         ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00719)
870                 "recalling headers; but no header fd for %s", dobj->name);
871         return APR_NOTFOUND;
872     }
873 
874     h->req_hdrs = apr_table_make(r->pool, 20);
875     h->resp_hdrs = apr_table_make(r->pool, 20);
876 
877     /* Call routine to read the header lines/status line */
878     read_table(h, r, h->resp_hdrs, dobj->hdrs.fd);
879     read_table(h, r, h->req_hdrs, dobj->hdrs.fd);
880 
881     apr_file_close(dobj->hdrs.fd);
882 
883     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00720)
884             "Recalled headers for URL %s", dobj->name);
885     return APR_SUCCESS;
886 }
887 
recall_body(cache_handle_t * h,apr_pool_t * p,apr_bucket_brigade * bb)888 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb)
889 {
890     disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
891 
892     if (dobj->data.fd) {
893         apr_brigade_insert_file(bb, dobj->data.fd, 0, dobj->file_size, p);
894     }
895 
896     return APR_SUCCESS;
897 }
898 
store_table(apr_file_t * fd,apr_table_t * table)899 static apr_status_t store_table(apr_file_t *fd, apr_table_t *table)
900 {
901     int i;
902     apr_status_t rv;
903     struct iovec iov[4];
904     apr_size_t amt;
905     apr_table_entry_t *elts;
906 
907     elts = (apr_table_entry_t *) apr_table_elts(table)->elts;
908     for (i = 0; i < apr_table_elts(table)->nelts; ++i) {
909         if (elts[i].key != NULL) {
910             iov[0].iov_base = elts[i].key;
911             iov[0].iov_len = strlen(elts[i].key);
912             iov[1].iov_base = ": ";
913             iov[1].iov_len = sizeof(": ") - 1;
914             iov[2].iov_base = elts[i].val;
915             iov[2].iov_len = strlen(elts[i].val);
916             iov[3].iov_base = CRLF;
917             iov[3].iov_len = sizeof(CRLF) - 1;
918 
919             rv = apr_file_writev_full(fd, (const struct iovec *) &iov, 4, &amt);
920             if (rv != APR_SUCCESS) {
921                 return rv;
922             }
923         }
924     }
925     iov[0].iov_base = CRLF;
926     iov[0].iov_len = sizeof(CRLF) - 1;
927     rv = apr_file_writev_full(fd, (const struct iovec *) &iov, 1, &amt);
928     return rv;
929 }
930 
store_headers(cache_handle_t * h,request_rec * r,cache_info * info)931 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *info)
932 {
933     disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
934 
935     memcpy(&h->cache_obj->info, info, sizeof(cache_info));
936 
937     if (r->headers_out) {
938         dobj->headers_out = ap_cache_cacheable_headers_out(r);
939     }
940 
941     if (r->headers_in) {
942         dobj->headers_in = ap_cache_cacheable_headers_in(r);
943     }
944 
945     if (r->header_only && r->status != HTTP_NOT_MODIFIED) {
946         dobj->disk_info.header_only = 1;
947     }
948 
949     return APR_SUCCESS;
950 }
951 
write_headers(cache_handle_t * h,request_rec * r)952 static apr_status_t write_headers(cache_handle_t *h, request_rec *r)
953 {
954     disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
955                                                  &cache_disk_module);
956     apr_status_t rv;
957     apr_size_t amt;
958     disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
959 
960     disk_cache_info_t disk_info;
961     struct iovec iov[2];
962 
963     memset(&disk_info, 0, sizeof(disk_cache_info_t));
964 
965     if (dobj->headers_out) {
966         const char *tmp;
967 
968         tmp = apr_table_get(dobj->headers_out, "Vary");
969 
970         if (tmp) {
971             apr_array_header_t* varray;
972             apr_uint32_t format = VARY_FORMAT_VERSION;
973 
974             /* If we were initially opened as a vary format, rollback
975              * that internal state for the moment so we can recreate the
976              * vary format hints in the appropriate directory.
977              */
978             if (dobj->prefix) {
979                 dobj->hdrs.file = dobj->prefix;
980                 dobj->prefix = NULL;
981             }
982 
983             rv = mkdir_structure(conf, dobj->hdrs.file, r->pool);
984 
985             rv = apr_file_mktemp(&dobj->vary.tempfd, dobj->vary.tempfile,
986                                  APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
987                                  dobj->vary.pool);
988 
989             if (rv != APR_SUCCESS) {
990                 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00721)
991                         "could not create vary file %s",
992                         dobj->vary.tempfile);
993                 return rv;
994             }
995 
996             amt = sizeof(format);
997             rv = apr_file_write_full(dobj->vary.tempfd, &format, amt, NULL);
998             if (rv != APR_SUCCESS) {
999                 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00722)
1000                         "could not write to vary file %s",
1001                         dobj->vary.tempfile);
1002                 apr_file_close(dobj->vary.tempfd);
1003                 apr_pool_destroy(dobj->vary.pool);
1004                 return rv;
1005             }
1006 
1007             amt = sizeof(h->cache_obj->info.expire);
1008             rv = apr_file_write_full(dobj->vary.tempfd,
1009                                      &h->cache_obj->info.expire, amt, NULL);
1010             if (rv != APR_SUCCESS) {
1011                 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00723)
1012                         "could not write to vary file %s",
1013                         dobj->vary.tempfile);
1014                 apr_file_close(dobj->vary.tempfd);
1015                 apr_pool_destroy(dobj->vary.pool);
1016                 return rv;
1017             }
1018 
1019             varray = apr_array_make(r->pool, 6, sizeof(char*));
1020             tokens_to_array(r->pool, tmp, varray);
1021 
1022             store_array(dobj->vary.tempfd, varray);
1023 
1024             rv = apr_file_close(dobj->vary.tempfd);
1025             if (rv != APR_SUCCESS) {
1026                 ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00724)
1027                         "could not close vary file %s",
1028                         dobj->vary.tempfile);
1029                 apr_pool_destroy(dobj->vary.pool);
1030                 return rv;
1031             }
1032 
1033             tmp = regen_key(r->pool, dobj->headers_in, varray, dobj->name);
1034             dobj->prefix = dobj->hdrs.file;
1035             dobj->hashfile = NULL;
1036             dobj->data.file = data_file(r->pool, conf, dobj, tmp);
1037             dobj->hdrs.file = header_file(r->pool, conf, dobj, tmp);
1038         }
1039     }
1040 
1041 
1042     rv = apr_file_mktemp(&dobj->hdrs.tempfd, dobj->hdrs.tempfile,
1043                          APR_CREATE | APR_WRITE | APR_BINARY |
1044                          APR_BUFFERED | APR_EXCL, dobj->hdrs.pool);
1045 
1046     if (rv != APR_SUCCESS) {
1047        ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00725)
1048                 "could not create header file %s",
1049                 dobj->hdrs.tempfile);
1050         return rv;
1051     }
1052 
1053     disk_info.format = DISK_FORMAT_VERSION;
1054     disk_info.date = h->cache_obj->info.date;
1055     disk_info.expire = h->cache_obj->info.expire;
1056     disk_info.entity_version = dobj->disk_info.entity_version++;
1057     disk_info.request_time = h->cache_obj->info.request_time;
1058     disk_info.response_time = h->cache_obj->info.response_time;
1059     disk_info.status = h->cache_obj->info.status;
1060     disk_info.inode = dobj->disk_info.inode;
1061     disk_info.device = dobj->disk_info.device;
1062     disk_info.has_body = dobj->disk_info.has_body;
1063     disk_info.header_only = dobj->disk_info.header_only;
1064 
1065     disk_info.name_len = strlen(dobj->name);
1066 
1067     memcpy(&disk_info.control, &h->cache_obj->info.control, sizeof(cache_control_t));
1068 
1069     iov[0].iov_base = (void*)&disk_info;
1070     iov[0].iov_len = sizeof(disk_cache_info_t);
1071     iov[1].iov_base = (void*)dobj->name;
1072     iov[1].iov_len = disk_info.name_len;
1073 
1074     rv = apr_file_writev_full(dobj->hdrs.tempfd, (const struct iovec *) &iov,
1075                               2, &amt);
1076     if (rv != APR_SUCCESS) {
1077         ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00726)
1078                 "could not write info to header file %s",
1079                 dobj->hdrs.tempfile);
1080         apr_file_close(dobj->hdrs.tempfd);
1081         apr_pool_destroy(dobj->hdrs.pool);
1082         return rv;
1083     }
1084 
1085     if (dobj->headers_out) {
1086         rv = store_table(dobj->hdrs.tempfd, dobj->headers_out);
1087         if (rv != APR_SUCCESS) {
1088             ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00727)
1089                     "could not write out-headers to header file %s",
1090                     dobj->hdrs.tempfile);
1091             apr_file_close(dobj->hdrs.tempfd);
1092             apr_pool_destroy(dobj->hdrs.pool);
1093             return rv;
1094         }
1095     }
1096 
1097     /* Parse the vary header and dump those fields from the headers_in. */
1098     /* FIXME: Make call to the same thing cache_select calls to crack Vary. */
1099     if (dobj->headers_in) {
1100         rv = store_table(dobj->hdrs.tempfd, dobj->headers_in);
1101         if (rv != APR_SUCCESS) {
1102             ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00728)
1103                     "could not write in-headers to header file %s",
1104                     dobj->hdrs.tempfile);
1105             apr_file_close(dobj->hdrs.tempfd);
1106             apr_pool_destroy(dobj->hdrs.pool);
1107             return rv;
1108         }
1109     }
1110 
1111     rv = apr_file_close(dobj->hdrs.tempfd); /* flush and close */
1112     if (rv != APR_SUCCESS) {
1113         ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00729)
1114                 "could not close header file %s",
1115                 dobj->hdrs.tempfile);
1116         apr_pool_destroy(dobj->hdrs.pool);
1117         return rv;
1118     }
1119 
1120     return APR_SUCCESS;
1121 }
1122 
store_body(cache_handle_t * h,request_rec * r,apr_bucket_brigade * in,apr_bucket_brigade * out)1123 static apr_status_t store_body(cache_handle_t *h, request_rec *r,
1124                                apr_bucket_brigade *in, apr_bucket_brigade *out)
1125 {
1126     apr_bucket *e;
1127     apr_status_t rv = APR_SUCCESS;
1128     disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1129     disk_cache_dir_conf *dconf = ap_get_module_config(r->per_dir_config, &cache_disk_module);
1130     int seen_eos = 0;
1131 
1132     if (!dobj->offset) {
1133         dobj->offset = dconf->readsize;
1134     }
1135     if (!dobj->timeout && dconf->readtime) {
1136         dobj->timeout = apr_time_now() + dconf->readtime;
1137     }
1138 
1139     if (dobj->offset) {
1140         apr_brigade_partition(in, dobj->offset, &e);
1141     }
1142 
1143     while (APR_SUCCESS == rv && !APR_BRIGADE_EMPTY(in)) {
1144         const char *str;
1145         apr_size_t length, written;
1146 
1147         e = APR_BRIGADE_FIRST(in);
1148 
1149         /* are we done completely? if so, pass any trailing buckets right through */
1150         if (dobj->done || !dobj->data.pool) {
1151             APR_BUCKET_REMOVE(e);
1152             APR_BRIGADE_INSERT_TAIL(out, e);
1153             continue;
1154         }
1155 
1156         /* have we seen eos yet? */
1157         if (APR_BUCKET_IS_EOS(e)) {
1158             seen_eos = 1;
1159             dobj->done = 1;
1160             APR_BUCKET_REMOVE(e);
1161             APR_BRIGADE_INSERT_TAIL(out, e);
1162             break;
1163         }
1164 
1165         /* honour flush buckets, we'll get called again */
1166         if (APR_BUCKET_IS_FLUSH(e)) {
1167             APR_BUCKET_REMOVE(e);
1168             APR_BRIGADE_INSERT_TAIL(out, e);
1169             break;
1170         }
1171 
1172         /* metadata buckets are preserved as is */
1173         if (APR_BUCKET_IS_METADATA(e)) {
1174             APR_BUCKET_REMOVE(e);
1175             APR_BRIGADE_INSERT_TAIL(out, e);
1176             continue;
1177         }
1178 
1179         /* read the bucket, write to the cache */
1180         rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
1181         APR_BUCKET_REMOVE(e);
1182         APR_BRIGADE_INSERT_TAIL(out, e);
1183         if (rv != APR_SUCCESS) {
1184             ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00730)
1185                     "Error when reading bucket for URL %s",
1186                     h->cache_obj->key);
1187             /* Remove the intermediate cache file and return non-APR_SUCCESS */
1188             apr_pool_destroy(dobj->data.pool);
1189             return rv;
1190         }
1191 
1192         /* don't write empty buckets to the cache */
1193         if (!length) {
1194             continue;
1195         }
1196 
1197         if (!dobj->disk_info.header_only) {
1198 
1199             /* Attempt to create the data file at the last possible moment, if
1200              * the body is empty, we don't write a file at all, and save an inode.
1201              */
1202             if (!dobj->data.tempfd) {
1203                 apr_finfo_t finfo;
1204                 rv = apr_file_mktemp(&dobj->data.tempfd, dobj->data.tempfile,
1205                         APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED
1206                                 | APR_EXCL, dobj->data.pool);
1207                 if (rv != APR_SUCCESS) {
1208                     apr_pool_destroy(dobj->data.pool);
1209                     return rv;
1210                 }
1211                 dobj->file_size = 0;
1212                 rv = apr_file_info_get(&finfo, APR_FINFO_IDENT,
1213                         dobj->data.tempfd);
1214                 if (rv != APR_SUCCESS) {
1215                     apr_pool_destroy(dobj->data.pool);
1216                     return rv;
1217                 }
1218                 dobj->disk_info.device = finfo.device;
1219                 dobj->disk_info.inode = finfo.inode;
1220                 dobj->disk_info.has_body = 1;
1221             }
1222 
1223             /* write to the cache, leave if we fail */
1224             rv = apr_file_write_full(dobj->data.tempfd, str, length, &written);
1225             if (rv != APR_SUCCESS) {
1226                 ap_log_rerror(
1227                         APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00731) "Error when writing cache file for URL %s", h->cache_obj->key);
1228                 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1229                 apr_pool_destroy(dobj->data.pool);
1230                 return rv;
1231             }
1232             dobj->file_size += written;
1233             if (dobj->file_size > dconf->maxfs) {
1234                 ap_log_rerror(
1235                         APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00732) "URL %s failed the size check "
1236                         "(%" APR_OFF_T_FMT ">%" APR_OFF_T_FMT ")", h->cache_obj->key, dobj->file_size, dconf->maxfs);
1237                 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1238                 apr_pool_destroy(dobj->data.pool);
1239                 return APR_EGENERAL;
1240             }
1241 
1242         }
1243 
1244         /* have we reached the limit of how much we're prepared to write in one
1245          * go? If so, leave, we'll get called again. This prevents us from trying
1246          * to swallow too much data at once, or taking so long to write the data
1247          * the client times out.
1248          */
1249         dobj->offset -= length;
1250         if (dobj->offset <= 0) {
1251             dobj->offset = 0;
1252             break;
1253         }
1254         if ((dconf->readtime && apr_time_now() > dobj->timeout)) {
1255             dobj->timeout = 0;
1256             break;
1257         }
1258 
1259     }
1260 
1261     /* Was this the final bucket? If yes, close the temp file and perform
1262      * sanity checks.
1263      */
1264     if (seen_eos) {
1265         const char *cl_header = apr_table_get(r->headers_out, "Content-Length");
1266 
1267         if (!dobj->disk_info.header_only) {
1268 
1269             if (dobj->data.tempfd) {
1270                 rv = apr_file_close(dobj->data.tempfd);
1271                 if (rv != APR_SUCCESS) {
1272                     /* Buffered write failed, abandon attempt to write */
1273                     apr_pool_destroy(dobj->data.pool);
1274                     return rv;
1275                 }
1276             }
1277 
1278             if (r->connection->aborted || r->no_cache) {
1279                 ap_log_rerror(
1280                         APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00733) "Discarding body for URL %s "
1281                         "because connection has been aborted.", h->cache_obj->key);
1282                 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1283                 apr_pool_destroy(dobj->data.pool);
1284                 return APR_EGENERAL;
1285             }
1286             if (dobj->file_size < dconf->minfs) {
1287                 ap_log_rerror(
1288                         APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00734) "URL %s failed the size check "
1289                         "(%" APR_OFF_T_FMT "<%" APR_OFF_T_FMT ")", h->cache_obj->key, dobj->file_size, dconf->minfs);
1290                 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1291                 apr_pool_destroy(dobj->data.pool);
1292                 return APR_EGENERAL;
1293             }
1294             if (cl_header) {
1295                 apr_int64_t cl = apr_atoi64(cl_header);
1296                 if ((errno == 0) && (dobj->file_size != cl)) {
1297                     ap_log_rerror(
1298                             APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00735) "URL %s didn't receive complete response, not caching", h->cache_obj->key);
1299                     /* Remove the intermediate cache file and return non-APR_SUCCESS */
1300                     apr_pool_destroy(dobj->data.pool);
1301                     return APR_EGENERAL;
1302                 }
1303             }
1304 
1305         }
1306 
1307         /* All checks were fine, we're good to go when the commit comes */
1308     }
1309 
1310     return APR_SUCCESS;
1311 }
1312 
commit_entity(cache_handle_t * h,request_rec * r)1313 static apr_status_t commit_entity(cache_handle_t *h, request_rec *r)
1314 {
1315     disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
1316                                                  &cache_disk_module);
1317     disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1318     apr_status_t rv;
1319 
1320     /* write the headers to disk at the last possible moment */
1321     rv = write_headers(h, r);
1322 
1323     /* move header and data tempfiles to the final destination */
1324     if (APR_SUCCESS == rv) {
1325         rv = file_cache_el_final(conf, &dobj->hdrs, r);
1326     }
1327     if (APR_SUCCESS == rv) {
1328         rv = file_cache_el_final(conf, &dobj->vary, r);
1329     }
1330     if (APR_SUCCESS == rv) {
1331         if (!dobj->disk_info.header_only) {
1332             rv = file_cache_el_final(conf, &dobj->data, r);
1333         }
1334         else if (dobj->data.file){
1335             rv = apr_file_remove(dobj->data.file, dobj->data.pool);
1336         }
1337     }
1338 
1339     /* remove the cached items completely on any failure */
1340     if (APR_SUCCESS != rv) {
1341         remove_url(h, r);
1342         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00736)
1343                 "commit_entity: URL '%s' not cached due to earlier disk error.",
1344                 dobj->name);
1345     }
1346     else {
1347         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00737)
1348                 "commit_entity: Headers and body for URL %s cached.",
1349                 dobj->name);
1350     }
1351 
1352     apr_pool_destroy(dobj->data.pool);
1353 
1354     return APR_SUCCESS;
1355 }
1356 
invalidate_entity(cache_handle_t * h,request_rec * r)1357 static apr_status_t invalidate_entity(cache_handle_t *h, request_rec *r)
1358 {
1359     apr_status_t rv;
1360 
1361     rv = recall_headers(h, r);
1362     if (rv != APR_SUCCESS) {
1363         return rv;
1364     }
1365 
1366     /* mark the entity as invalidated */
1367     h->cache_obj->info.control.invalidated = 1;
1368 
1369     return commit_entity(h, r);
1370 }
1371 
create_dir_config(apr_pool_t * p,char * dummy)1372 static void *create_dir_config(apr_pool_t *p, char *dummy)
1373 {
1374     disk_cache_dir_conf *dconf = apr_pcalloc(p, sizeof(disk_cache_dir_conf));
1375 
1376     dconf->maxfs = DEFAULT_MAX_FILE_SIZE;
1377     dconf->minfs = DEFAULT_MIN_FILE_SIZE;
1378     dconf->readsize = DEFAULT_READSIZE;
1379     dconf->readtime = DEFAULT_READTIME;
1380 
1381     return dconf;
1382 }
1383 
merge_dir_config(apr_pool_t * p,void * basev,void * addv)1384 static void *merge_dir_config(apr_pool_t *p, void *basev, void *addv) {
1385     disk_cache_dir_conf *new = (disk_cache_dir_conf *) apr_pcalloc(p, sizeof(disk_cache_dir_conf));
1386     disk_cache_dir_conf *add = (disk_cache_dir_conf *) addv;
1387     disk_cache_dir_conf *base = (disk_cache_dir_conf *) basev;
1388 
1389     new->maxfs = (add->maxfs_set == 0) ? base->maxfs : add->maxfs;
1390     new->maxfs_set = add->maxfs_set || base->maxfs_set;
1391     new->minfs = (add->minfs_set == 0) ? base->minfs : add->minfs;
1392     new->minfs_set = add->minfs_set || base->minfs_set;
1393     new->readsize = (add->readsize_set == 0) ? base->readsize : add->readsize;
1394     new->readsize_set = add->readsize_set || base->readsize_set;
1395     new->readtime = (add->readtime_set == 0) ? base->readtime : add->readtime;
1396     new->readtime_set = add->readtime_set || base->readtime_set;
1397 
1398     return new;
1399 }
1400 
create_config(apr_pool_t * p,server_rec * s)1401 static void *create_config(apr_pool_t *p, server_rec *s)
1402 {
1403     disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
1404 
1405     /* XXX: Set default values */
1406     conf->dirlevels = DEFAULT_DIRLEVELS;
1407     conf->dirlength = DEFAULT_DIRLENGTH;
1408 
1409     conf->cache_root = NULL;
1410     conf->cache_root_len = 0;
1411 
1412     return conf;
1413 }
1414 
1415 /*
1416  * mod_cache_disk configuration directives handlers.
1417  */
1418 static const char
set_cache_root(cmd_parms * parms,void * in_struct_ptr,const char * arg)1419 *set_cache_root(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1420 {
1421     disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1422                                                  &cache_disk_module);
1423     conf->cache_root = arg;
1424     conf->cache_root_len = strlen(arg);
1425     /* TODO: canonicalize cache_root and strip off any trailing slashes */
1426 
1427     return NULL;
1428 }
1429 
1430 /*
1431  * Consider eliminating the next two directives in favor of
1432  * Ian's prime number hash...
1433  * key = hash_fn( r->uri)
1434  * filename = "/key % prime1 /key %prime2/key %prime3"
1435  */
1436 static const char
set_cache_dirlevels(cmd_parms * parms,void * in_struct_ptr,const char * arg)1437 *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1438 {
1439     disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1440                                                  &cache_disk_module);
1441     int val = atoi(arg);
1442     if (val < 1)
1443         return "CacheDirLevels value must be an integer greater than 0";
1444     if (val * conf->dirlength > CACHEFILE_LEN)
1445         return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1446     conf->dirlevels = val;
1447     return NULL;
1448 }
1449 static const char
set_cache_dirlength(cmd_parms * parms,void * in_struct_ptr,const char * arg)1450 *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1451 {
1452     disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1453                                                  &cache_disk_module);
1454     int val = atoi(arg);
1455     if (val < 1)
1456         return "CacheDirLength value must be an integer greater than 0";
1457     if (val * conf->dirlevels > CACHEFILE_LEN)
1458         return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1459 
1460     conf->dirlength = val;
1461     return NULL;
1462 }
1463 
1464 static const char
set_cache_minfs(cmd_parms * parms,void * in_struct_ptr,const char * arg)1465 *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1466 {
1467     disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1468 
1469     if (apr_strtoff(&dconf->minfs, arg, NULL, 10) != APR_SUCCESS ||
1470             dconf->minfs < 0)
1471     {
1472         return "CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes.";
1473     }
1474     dconf->minfs_set = 1;
1475     return NULL;
1476 }
1477 
1478 static const char
set_cache_maxfs(cmd_parms * parms,void * in_struct_ptr,const char * arg)1479 *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1480 {
1481     disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1482 
1483     if (apr_strtoff(&dconf->maxfs, arg, NULL, 10) != APR_SUCCESS ||
1484             dconf->maxfs < 0)
1485     {
1486         return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes.";
1487     }
1488     dconf->maxfs_set = 1;
1489     return NULL;
1490 }
1491 
1492 static const char
set_cache_readsize(cmd_parms * parms,void * in_struct_ptr,const char * arg)1493 *set_cache_readsize(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1494 {
1495     disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1496 
1497     if (apr_strtoff(&dconf->readsize, arg, NULL, 10) != APR_SUCCESS ||
1498             dconf->readsize < 0)
1499     {
1500         return "CacheReadSize argument must be a non-negative integer representing the max amount of data to cache in go.";
1501     }
1502     dconf->readsize_set = 1;
1503     return NULL;
1504 }
1505 
1506 static const char
set_cache_readtime(cmd_parms * parms,void * in_struct_ptr,const char * arg)1507 *set_cache_readtime(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1508 {
1509     disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1510     apr_off_t milliseconds;
1511 
1512     if (apr_strtoff(&milliseconds, arg, NULL, 10) != APR_SUCCESS ||
1513             milliseconds < 0)
1514     {
1515         return "CacheReadTime argument must be a non-negative integer representing the max amount of time taken to cache in go.";
1516     }
1517     dconf->readtime = apr_time_from_msec(milliseconds);
1518     dconf->readtime_set = 1;
1519     return NULL;
1520 }
1521 
1522 static const command_rec disk_cache_cmds[] =
1523 {
1524     AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
1525                  "The directory to store cache files"),
1526     AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
1527                   "The number of levels of subdirectories in the cache"),
1528     AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
1529                   "The number of characters in subdirectory names"),
1530     AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF | ACCESS_CONF,
1531                   "The minimum file size to cache a document"),
1532     AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF | ACCESS_CONF,
1533                   "The maximum file size to cache a document"),
1534     AP_INIT_TAKE1("CacheReadSize", set_cache_readsize, NULL, RSRC_CONF | ACCESS_CONF,
1535                   "The maximum quantity of data to attempt to read and cache in one go"),
1536     AP_INIT_TAKE1("CacheReadTime", set_cache_readtime, NULL, RSRC_CONF | ACCESS_CONF,
1537                   "The maximum time taken to attempt to read and cache in go"),
1538     {NULL}
1539 };
1540 
1541 static const cache_provider cache_disk_provider =
1542 {
1543     &remove_entity,
1544     &store_headers,
1545     &store_body,
1546     &recall_headers,
1547     &recall_body,
1548     &create_entity,
1549     &open_entity,
1550     &remove_url,
1551     &commit_entity,
1552     &invalidate_entity
1553 };
1554 
disk_cache_register_hook(apr_pool_t * p)1555 static void disk_cache_register_hook(apr_pool_t *p)
1556 {
1557     /* cache initializer */
1558     ap_register_provider(p, CACHE_PROVIDER_GROUP, "disk", "0",
1559                          &cache_disk_provider);
1560 }
1561 
1562 AP_DECLARE_MODULE(cache_disk) = {
1563     STANDARD20_MODULE_STUFF,
1564     create_dir_config,          /* create per-directory config structure */
1565     merge_dir_config,           /* merge per-directory config structures */
1566     create_config,              /* create per-server config structure */
1567     NULL,                       /* merge per-server config structures */
1568     disk_cache_cmds,            /* command apr_table_t */
1569     disk_cache_register_hook    /* register hooks */
1570 };
1571