1 
2 /*
3  * s3backer - FUSE-based single file backing store via Amazon S3
4  *
5  * Copyright 2008-2011 Archie L. Cobbs <archie@dellroad.org>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20  * 02110-1301, USA.
21  *
22  * In addition, as a special exception, the copyright holders give
23  * permission to link the code of portions of this program with the
24  * OpenSSL library under certain conditions as described in each
25  * individual source file, and distribute linked combinations including
26  * the two.
27  *
28  * You must obey the GNU General Public License in all respects for all
29  * of the code used other than OpenSSL. If you modify file(s) with this
30  * exception, you may extend this exception to your version of the
31  * file(s), but you are not obligated to do so. If you do not wish to do
32  * so, delete this exception statement from your version. If you delete
33  * this exception statement from all source files in the program, then
34  * also delete it here.
35  */
36 
37 #include "s3backer.h"
38 #include "dcache.h"
39 
40 /*
41  * This file implements a simple on-disk storage area for cached blocks.
42  * The file contains a header, a directory, and a data area. Each directory
43  * entry indicates which block is stored in the corresponding "data slot"
44  * in the data area and that block's MD5 checksum. Note the MD5 checksum is
45  * the checksum of the stored data, which will differ from the actual block
46  * data's MD5 if the block was compressed, encrypted, etc. when stored.
47  *
48  * File format:
49  *
50  *  [ struct file_header ]
51  *  directory entry for data slot #0
52  *  directory entry for data slot #1
53  *  ...
54  *  directory entry for data slot #N-1
55  *  padding up to getpagesize()
56  *  data slot #0
57  *  data slot #1
58  *  ...
59  *  data slot #N-1
60  */
61 
62 /* Definitions */
63 #define DCACHE_SIGNATURE            0xe496f17b
64 #define ROUNDUP2(x, y)              (((x) + (y) - 1) & ~((y) - 1))
65 #define DIRECTORY_READ_CHUNK        1024
66 
67 #define HDR_SIZE(flags)             (((flags) & HDRFLG_NEW_FORMAT) == 0 ? sizeof(struct ofile_header) : sizeof(struct file_header))
68 #define DIR_ENTSIZE(flags)          (((flags) & HDRFLG_NEW_FORMAT) == 0 ? sizeof(struct odir_entry) : sizeof(struct dir_entry))
69 #define DIR_OFFSET(flags, dslot)    ((off_t)HDR_SIZE(flags) + (off_t)(dslot) * DIR_ENTSIZE(flags))
70 #define DATA_OFFSET(priv, dslot)    ((off_t)(priv)->data + (off_t)(dslot) * (priv)->block_size)
71 
72 /* Bits for file_header.flags */
73 #define HDRFLG_NEW_FORMAT           0x00000001
74 #define HDRFLG_MASK                 0x00000001
75 
76 /* Bits for dir_entry.flags */
77 #define ENTFLG_DIRTY                0x00000001
78 #define ENTFLG_MASK                 0x00000001
79 
80 /* File header (old format) */
81 struct ofile_header {
82     uint32_t                        signature;
83     uint32_t                        header_size;
84     uint32_t                        u_int_size;
85     uint32_t                        s3b_block_t_size;
86     uint32_t                        block_size;
87     uint32_t                        data_align;
88     uint32_t                        flags;
89     u_int                           max_blocks;
90 } __attribute__ ((packed));
91 
92 /* File header */
93 struct file_header {
94     uint32_t                        signature;
95     uint32_t                        header_size;
96     uint32_t                        u_int_size;
97     uint32_t                        s3b_block_t_size;
98     uint32_t                        block_size;
99     uint32_t                        data_align;
100     uint32_t                        flags;
101     u_int                           max_blocks;
102     int32_t                         mount_token;
103     uint32_t                        spare[7];           /* future expansion */
104 } __attribute__ ((packed));
105 
106 /* One directory entry (old format) */
107 struct odir_entry {
108     s3b_block_t                     block_num;
109     u_char                          md5[MD5_DIGEST_LENGTH];
110 } __attribute__ ((packed));
111 
112 /* One directory entry (new format) */
113 struct dir_entry {
114     s3b_block_t                     block_num;
115     u_char                          md5[MD5_DIGEST_LENGTH];
116     uint32_t                        flags;
117 } __attribute__ ((packed));
118 
119 /* Private structure */
120 struct s3b_dcache {
121     int                             fd;
122     log_func_t                      *log;
123     char                            *filename;
124     void                            *zero_block;
125     u_int                           block_size;
126     u_int                           max_blocks;
127     u_int                           num_alloc;
128     uint32_t                        flags;              /* copy of file_header.flags */
129     off_t                           data;
130     u_int                           free_list_len;
131     u_int                           free_list_alloc;
132     s3b_block_t                     *free_list;
133 };
134 
135 /* Internal functions */
136 static int s3b_dcache_write_entry(struct s3b_dcache *priv, u_int dslot, const struct dir_entry *entry);
137 #ifndef NDEBUG
138 static int s3b_dcache_entry_is_empty(struct s3b_dcache *priv, u_int dslot);
139 static int s3b_dcache_entry_write_ok(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, u_int dirty);
140 static int s3b_dcache_read_entry(struct s3b_dcache *priv, u_int dslot, struct dir_entry *entryp);
141 #endif
142 static int s3b_dcache_create_file(struct s3b_dcache *priv, int *fdp, const char *filename, u_int max_blocks,
143             struct file_header *headerp);
144 static int s3b_dcache_resize_file(struct s3b_dcache *priv, const struct file_header *header);
145 static int s3b_dcache_init_free_list(struct s3b_dcache *priv, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty);
146 static int s3b_dcache_push(struct s3b_dcache *priv, u_int dslot);
147 static void s3b_dcache_pop(struct s3b_dcache *priv, u_int *dslotp);
148 static int s3b_dcache_read(struct s3b_dcache *priv, off_t offset, void *data, size_t len);
149 static int s3b_dcache_write(struct s3b_dcache *priv, off_t offset, const void *data, size_t len);
150 static int s3b_dcache_write2(struct s3b_dcache *priv, int fd, const char *filename, off_t offset, const void *data, size_t len);
151 
152 /* Internal variables */
153 static const struct dir_entry zero_entry;
154 
155 /* Public functions */
156 
157 int
s3b_dcache_open(struct s3b_dcache ** dcachep,log_func_t * log,const char * filename,u_int block_size,u_int max_blocks,s3b_dcache_visit_t * visitor,void * arg,u_int visit_dirty)158 s3b_dcache_open(struct s3b_dcache **dcachep, log_func_t *log, const char *filename,
159   u_int block_size, u_int max_blocks, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty)
160 {
161     struct ofile_header oheader;
162     struct file_header header;
163     struct s3b_dcache *priv;
164     struct stat sb;
165     int r;
166 
167     /* Sanity check */
168     if (max_blocks == 0)
169         return EINVAL;
170 
171     /* Initialize private structure */
172     if ((priv = malloc(sizeof(*priv))) == NULL)
173         return errno;
174     memset(priv, 0, sizeof(*priv));
175     priv->fd = -1;
176     priv->log = log;
177     priv->block_size = block_size;
178     priv->max_blocks = max_blocks;
179     if ((priv->filename = strdup(filename)) == NULL) {
180         r = errno;
181         goto fail1;
182     }
183     if ((priv->zero_block = calloc(1, block_size)) == NULL) {
184         r = errno;
185         goto fail2;
186     }
187 
188     /* Create cache file if it doesn't already exist */
189     if (stat(priv->filename, &sb) == -1 && errno == ENOENT) {
190         (*priv->log)(LOG_NOTICE, "creating new cache file `%s' with capacity %u blocks", priv->filename, priv->max_blocks);
191         if ((r = s3b_dcache_create_file(priv, &priv->fd, priv->filename, priv->max_blocks, NULL)) != 0)
192             goto fail3;
193         (void)close(priv->fd);
194         priv->fd = -1;
195     }
196 
197 retry:
198     /* Open cache file */
199     assert(priv->fd == -1);
200     if ((priv->fd = open(priv->filename, O_RDWR, 0)) == -1) {
201         r = errno;
202         (*priv->log)(LOG_ERR, "can't open cache file `%s': %s", priv->filename, strerror(r));
203         goto fail3;
204     }
205 
206     /* Get file info */
207     if (fstat(priv->fd, &sb) == -1) {
208         r = errno;
209         goto fail4;
210     }
211 
212     /* Read in header with backward compatible support for older header format */
213     if (sb.st_size < sizeof(oheader)) {
214         (*priv->log)(LOG_ERR, "invalid cache file `%s': file is truncated (size %ju < %u)",
215           priv->filename, (uintmax_t)sb.st_size, (u_int)sizeof(oheader));
216         r = EINVAL;
217         goto fail4;
218     }
219     if ((r = s3b_dcache_read(priv, (off_t)0, &oheader, sizeof(oheader))) != 0) {
220         (*priv->log)(LOG_ERR, "can't read cache file `%s' header: %s", priv->filename, strerror(r));
221         goto fail4;
222     }
223     switch (oheader.header_size) {
224     case sizeof(oheader):                               /* old format */
225         memset(&header, 0, sizeof(header));
226         memcpy(&header, &oheader, sizeof(oheader));
227         break;
228     case sizeof(header):                                /* new format */
229         if ((r = s3b_dcache_read(priv, (off_t)0, &header, sizeof(header))) != 0) {
230             (*priv->log)(LOG_ERR, "can't read cache file `%s' header: %s", priv->filename, strerror(r));
231             goto fail4;
232         }
233         break;
234     default:
235         (*priv->log)(LOG_ERR, "invalid cache file `%s': %s %d", priv->filename, "invalid header size", (int)oheader.header_size);
236         r = EINVAL;
237         goto fail4;
238     }
239 
240     /* Verify header - all but number of blocks */
241     r = EINVAL;
242     if (header.signature != DCACHE_SIGNATURE) {
243         (*priv->log)(LOG_ERR, "invalid cache file `%s': wrong signature %08x != %08x",
244           priv->filename, header.signature, DCACHE_SIGNATURE);
245         goto fail4;
246     }
247     if (header.header_size != HDR_SIZE(header.flags)) {
248         (*priv->log)(LOG_ERR, "invalid cache file `%s': %s %d != %d",
249           priv->filename, "invalid header size", (int)header.header_size, (int)HDR_SIZE(header.flags));
250         goto fail4;
251     }
252     if (header.u_int_size != sizeof(u_int)) {
253         (*priv->log)(LOG_ERR, "invalid cache file `%s': created with sizeof(u_int) %u != %u",
254           priv->filename, header.u_int_size, (u_int)sizeof(u_int));
255         goto fail4;
256     }
257     if (header.s3b_block_t_size != sizeof(s3b_block_t)) {
258         (*priv->log)(LOG_ERR, "invalid cache file `%s': created with sizeof(s3b_block_t) %u != %u",
259           priv->filename, header.s3b_block_t_size, (u_int)sizeof(s3b_block_t));
260         goto fail4;
261     }
262     if (header.block_size != priv->block_size) {
263         (*priv->log)(LOG_ERR, "invalid cache file `%s': created with block size %u != %u",
264           priv->filename, header.block_size, priv->block_size);
265         goto fail4;
266     }
267     if (header.data_align != getpagesize()) {
268         (*priv->log)(LOG_ERR, "invalid cache file `%s': created with alignment %u != %u",
269           priv->filename, header.data_align, getpagesize());
270         goto fail4;
271     }
272     if ((header.flags & ~HDRFLG_MASK) != 0) {
273         (*priv->log)(LOG_ERR, "invalid cache file `%s': %s", priv->filename, "unrecognized flags present");
274         goto fail4;
275     }
276     priv->flags = header.flags;
277 
278     /* Check number of blocks, shrinking or expanding if necessary */
279     if (header.max_blocks != priv->max_blocks) {
280         (*priv->log)(LOG_NOTICE, "cache file `%s' was created with capacity %u != %u blocks, automatically %s",
281           priv->filename, header.max_blocks, priv->max_blocks, header.max_blocks < priv->max_blocks ?
282            "expanding" : "shrinking");
283         if ((r = s3b_dcache_resize_file(priv, &header)) != 0)
284             goto fail4;
285         (*priv->log)(LOG_INFO, "successfully resized cache file `%s' from %u to %u blocks",
286           priv->filename, header.max_blocks, priv->max_blocks);
287         goto retry;
288     }
289 
290     /* Verify file's directory is not truncated */
291     if (sb.st_size < DIR_OFFSET(priv->flags, priv->max_blocks)) {
292         (*priv->log)(LOG_ERR, "invalid cache file `%s': file is truncated (size %ju < %ju)",
293           priv->filename, (uintmax_t)sb.st_size, (uintmax_t)DIR_OFFSET(priv->flags, priv->max_blocks));
294         goto fail4;
295     }
296 
297     /* Compute offset of first data block */
298     priv->data = ROUNDUP2(DIR_OFFSET(priv->flags, priv->max_blocks), header.data_align);
299 
300     /* Read the directory to build the free list and visit allocated blocks */
301     if (visitor != NULL && (r = s3b_dcache_init_free_list(priv, visitor, arg, visit_dirty)) != 0)
302         goto fail4;
303 
304     /* Done */
305     *dcachep = priv;
306     return 0;
307 
308 fail4:
309     close(priv->fd);
310 fail3:
311     free(priv->zero_block);
312 fail2:
313     free(priv->filename);
314 fail1:
315     free(priv->free_list);
316     free(priv);
317     return r;
318 }
319 
320 int
s3b_dcache_has_mount_token(struct s3b_dcache * priv)321 s3b_dcache_has_mount_token(struct s3b_dcache *priv)
322 {
323     return (priv->flags & HDRFLG_NEW_FORMAT) != 0;
324 }
325 
326 int
s3b_dcache_set_mount_token(struct s3b_dcache * priv,int32_t * old_valuep,int32_t new_value)327 s3b_dcache_set_mount_token(struct s3b_dcache *priv, int32_t *old_valuep, int32_t new_value)
328 {
329     int r;
330 
331     /* Read old value */
332     if (old_valuep != NULL) {
333         if ((r = s3b_dcache_read(priv, offsetof(struct file_header, mount_token), old_valuep, sizeof(*old_valuep))) != 0)
334             return r;
335     }
336 
337     /* Write new value */
338     if (new_value >= 0) {
339 
340         /* Update file */
341         if ((r = s3b_dcache_write(priv, offsetof(struct file_header, mount_token), &new_value, sizeof(new_value))) != 0)
342             return r;
343 
344         /* Sync to disk */
345         s3b_dcache_fsync(priv);
346     }
347 
348     /* Done */
349     return 0;
350 }
351 
352 void
s3b_dcache_close(struct s3b_dcache * priv)353 s3b_dcache_close(struct s3b_dcache *priv)
354 {
355     close(priv->fd);
356     free(priv->zero_block);
357     free(priv->filename);
358     free(priv->free_list);
359     free(priv);
360 }
361 
362 u_int
s3b_dcache_size(struct s3b_dcache * priv)363 s3b_dcache_size(struct s3b_dcache *priv)
364 {
365     return priv->num_alloc;
366 }
367 
368 /*
369  * Allocate a dslot for a block's data. We don't record this block in the directory yet;
370  * that is done by s3b_dcache_record_block().
371  */
372 int
s3b_dcache_alloc_block(struct s3b_dcache * priv,u_int * dslotp)373 s3b_dcache_alloc_block(struct s3b_dcache *priv, u_int *dslotp)
374 {
375     /* Any free dslots? */
376     if (priv->free_list_len == 0)
377         return ENOMEM;
378 
379     /* Pop off the next free dslot */
380     s3b_dcache_pop(priv, dslotp);
381 
382     /* Directory entry should be empty */
383     assert(s3b_dcache_entry_is_empty(priv, *dslotp));
384 
385     /* Done */
386     priv->num_alloc++;
387     return 0;
388 }
389 
390 /*
391  * Record a block's dslot in the directory. After this function is called, the block will
392  * be visible in the directory and picked up after a restart.
393  *
394  * If md5 != NULL, the block is CLEAN; if md5 == NULL, the block is DIRTY.
395  *
396  * This should be called AFTER the data for the block has already been written.
397  *
398  * There MUST NOT be a directory entry for the block.
399  */
400 int
s3b_dcache_record_block(struct s3b_dcache * priv,u_int dslot,s3b_block_t block_num,const u_char * md5)401 s3b_dcache_record_block(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, const u_char *md5)
402 {
403     const u_int dirty = md5 == NULL;
404     struct dir_entry entry;
405     int r;
406 
407     /* Sanity check */
408     assert(dslot < priv->max_blocks);
409 
410     /* Directory entry should be writable */
411     assert(s3b_dcache_entry_write_ok(priv, dslot, block_num, dirty));
412 
413     /* If cache file is older format, it doesn't store dirty blocks, so just erase it instead (prior behavior) */
414     if (dirty && (priv->flags & HDRFLG_NEW_FORMAT) == 0) {
415         s3b_dcache_erase_block(priv, dslot);
416         return 0;
417     }
418 
419     /* Make sure any new data is written to disk before updating the directory */
420     if ((r = s3b_dcache_fsync(priv)) != 0)
421         return r;
422 
423     /* Update directory */
424     memset(&entry, 0, sizeof(entry));
425     entry.block_num = block_num;
426     entry.flags = dirty ? ENTFLG_DIRTY : 0;
427     if (!dirty)
428         memcpy(&entry.md5, md5, MD5_DIGEST_LENGTH);
429     if ((r = s3b_dcache_write_entry(priv, dslot, &entry)) != 0)
430         return r;
431 
432     /* Done */
433     return 0;
434 }
435 
436 /*
437  * Erase the directory entry for a dslot. After this function is called, the block will
438  * no longer be visible in the directory after a restart.
439  *
440  * This should be called BEFORE any new data for the block is written.
441  *
442  * There MUST be a directory entry for the block.
443  */
444 int
s3b_dcache_erase_block(struct s3b_dcache * priv,u_int dslot)445 s3b_dcache_erase_block(struct s3b_dcache *priv, u_int dslot)
446 {
447     int r;
448 
449     /* Sanity check */
450     assert(dslot < priv->max_blocks);
451 
452     /* Update directory */
453     if ((r = s3b_dcache_write_entry(priv, dslot, &zero_entry)) != 0)
454         return r;
455 
456     /* Make sure directory entry is written to disk before any new data is written */
457     if ((r = s3b_dcache_fsync(priv)) != 0)
458         return r;
459 
460     /* Done */
461     return 0;
462 }
463 
464 /*
465  * Free a no-longer used dslot.
466  *
467  * There MUST NOT be a directory entry for the block.
468  */
469 int
s3b_dcache_free_block(struct s3b_dcache * priv,u_int dslot)470 s3b_dcache_free_block(struct s3b_dcache *priv, u_int dslot)
471 {
472     int r;
473 
474     /* Sanity check */
475     assert(dslot < priv->max_blocks);
476 
477     /* Directory entry should be empty */
478     assert(s3b_dcache_entry_is_empty(priv, dslot));
479 
480     /* Push dslot onto free list */
481     if ((r = s3b_dcache_push(priv, dslot)) != 0)
482         return r;
483 
484     /* Done */
485     priv->num_alloc--;
486     return 0;
487 }
488 
489 /*
490  * Read data from one dslot.
491  */
492 int
s3b_dcache_read_block(struct s3b_dcache * priv,u_int dslot,void * dest,u_int off,u_int len)493 s3b_dcache_read_block(struct s3b_dcache *priv, u_int dslot, void *dest, u_int off, u_int len)
494 {
495     /* Sanity check */
496     assert(dslot < priv->max_blocks);
497     assert(off <= priv->block_size);
498     assert(len <= priv->block_size);
499     assert(off + len <= priv->block_size);
500 
501     /* Read data */
502     return s3b_dcache_read(priv, DATA_OFFSET(priv, dslot) + off, dest, len);
503 }
504 
505 /*
506  * Write data into one dslot.
507  */
508 int
s3b_dcache_write_block(struct s3b_dcache * priv,u_int dslot,const void * src,u_int off,u_int len)509 s3b_dcache_write_block(struct s3b_dcache *priv, u_int dslot, const void *src, u_int off, u_int len)
510 {
511     /* Sanity check */
512     assert(dslot < priv->max_blocks);
513     assert(off <= priv->block_size);
514     assert(len <= priv->block_size);
515     assert(off + len <= priv->block_size);
516 
517     /* Write data */
518     return s3b_dcache_write(priv, DATA_OFFSET(priv, dslot) + off, src != NULL ? src : priv->zero_block, len);
519 }
520 
521 /*
522  * Synchronize outstanding changes to persistent storage.
523  */
524 int
s3b_dcache_fsync(struct s3b_dcache * priv)525 s3b_dcache_fsync(struct s3b_dcache *priv)
526 {
527     int r;
528 
529 #if HAVE_DECL_FDATASYNC
530     r = fdatasync(priv->fd);
531 #else
532     r = fsync(priv->fd);
533 #endif
534     if (r == -1) {
535         r = errno;
536         (*priv->log)(LOG_ERR, "error fsync'ing cache file `%s': %s", priv->filename, strerror(r));
537     }
538     return 0;
539 }
540 
541 /* Internal functions */
542 
543 #ifndef NDEBUG
544 static int
s3b_dcache_entry_is_empty(struct s3b_dcache * priv,u_int dslot)545 s3b_dcache_entry_is_empty(struct s3b_dcache *priv, u_int dslot)
546 {
547     struct dir_entry entry;
548 
549     (void)s3b_dcache_read_entry(priv, dslot, &entry);
550     return memcmp(&entry, &zero_entry, sizeof(entry)) == 0;
551 }
552 
553 static int
s3b_dcache_entry_write_ok(struct s3b_dcache * priv,u_int dslot,s3b_block_t block_num,u_int dirty)554 s3b_dcache_entry_write_ok(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, u_int dirty)
555 {
556     struct dir_entry entry;
557     u_int old_dirty;
558 
559     if (s3b_dcache_entry_is_empty(priv, dslot))
560         return 1;
561     (void)s3b_dcache_read_entry(priv, dslot, &entry);
562     old_dirty = (entry.flags & ENTFLG_DIRTY) != 0;
563     return entry.block_num == block_num && old_dirty != dirty;
564 }
565 
566 static int
s3b_dcache_read_entry(struct s3b_dcache * priv,u_int dslot,struct dir_entry * entry)567 s3b_dcache_read_entry(struct s3b_dcache *priv, u_int dslot, struct dir_entry *entry)
568 {
569     assert(dslot < priv->max_blocks);
570     memset(entry, 0, sizeof(*entry));
571     return s3b_dcache_read(priv, DIR_OFFSET(priv->flags, dslot), entry, DIR_ENTSIZE(priv->flags));
572 }
573 #endif
574 
575 /*
576  * Write a directory entry.
577  */
578 static int
s3b_dcache_write_entry(struct s3b_dcache * priv,u_int dslot,const struct dir_entry * entry)579 s3b_dcache_write_entry(struct s3b_dcache *priv, u_int dslot, const struct dir_entry *entry)
580 {
581     assert(dslot < priv->max_blocks);
582     assert((entry->flags & ~((priv->flags & HDRFLG_NEW_FORMAT) != 0 ? ENTFLG_MASK : 0)) == 0);
583     return s3b_dcache_write(priv, DIR_OFFSET(priv->flags, dslot), entry, DIR_ENTSIZE(priv->flags));
584 }
585 
586 /*
587  * Resize (and compress) an existing cache file. Upon successful return, priv->fd is closed
588  * and the cache file must be re-opened.
589  */
590 static int
s3b_dcache_resize_file(struct s3b_dcache * priv,const struct file_header * old_header)591 s3b_dcache_resize_file(struct s3b_dcache *priv, const struct file_header *old_header)
592 {
593     const u_int old_max_blocks = old_header->max_blocks;
594     const u_int new_max_blocks = priv->max_blocks;
595     struct file_header new_header;
596     off_t old_data_base;
597     off_t new_data_base;
598     u_int base_old_dslot;
599     u_int new_dslot = 0;
600     u_int num_entries;
601     u_char *block_buf = NULL;
602     char *tempfile = NULL;
603     int new_fd = -1;
604     int r;
605 
606     /* Create new temporary cache file */
607     if (asprintf(&tempfile, "%s.new", priv->filename) == -1) {
608         r = errno;
609         tempfile = NULL;
610         (*priv->log)(LOG_ERR, "can't allocate string: %s", strerror(r));
611         goto fail;
612     }
613     if ((r = s3b_dcache_create_file(priv, &new_fd, tempfile, new_max_blocks, &new_header)) != 0)
614         goto fail;
615 
616     /* Allocate block data buffer */
617     if ((block_buf = malloc(priv->block_size)) == NULL) {
618         r = errno;
619         (*priv->log)(LOG_ERR, "can't allocate buffer: %s", strerror(r));
620         goto fail;
621     }
622 
623     /* Copy non-empty cache entries from old file to new file */
624     old_data_base = ROUNDUP2(DIR_OFFSET(old_header->flags, old_max_blocks), old_header->data_align);
625     new_data_base = ROUNDUP2(DIR_OFFSET(new_header.flags, new_max_blocks), new_header.data_align);
626     for (base_old_dslot = 0; base_old_dslot < old_max_blocks; base_old_dslot += num_entries) {
627         char buffer[DIRECTORY_READ_CHUNK * DIR_ENTSIZE(old_header->flags)];
628         int i;
629 
630         /* Read in the next chunk of old directory entries */
631         num_entries = old_max_blocks - base_old_dslot;
632         if (num_entries > DIRECTORY_READ_CHUNK)
633             num_entries = DIRECTORY_READ_CHUNK;
634         if ((r = s3b_dcache_read(priv, DIR_OFFSET(old_header->flags, base_old_dslot),
635           buffer, num_entries * DIR_ENTSIZE(old_header->flags))) != 0) {
636             (*priv->log)(LOG_ERR, "error reading cache file `%s' directory: %s", priv->filename, strerror(r));
637             goto fail;
638         }
639 
640         /* For each dslot: if not free, copy it to the next slot in the new file */
641         for (i = 0; i < num_entries; i++) {
642             const u_int old_dslot = base_old_dslot + i;
643             struct dir_entry entry;
644             off_t old_data;
645             off_t new_data;
646 
647             /* Read old entry */
648             memset(&entry, 0, sizeof(entry));
649             memcpy(&entry, buffer + i * DIR_ENTSIZE(old_header->flags), DIR_ENTSIZE(old_header->flags));
650 
651             /* Is this entry non-empty? */
652             if (memcmp(&entry, &zero_entry, sizeof(entry)) == 0)
653                 continue;
654 
655             /* Any more space? */
656             if (new_dslot == new_max_blocks) {
657                 (*priv->log)(LOG_INFO, "cache file `%s' contains more than %u blocks; some will be discarded",
658                   priv->filename, new_max_blocks);
659                 goto done;
660             }
661 
662             /* Copy the directory entry */
663             assert(DIR_ENTSIZE(new_header.flags) == sizeof(entry));
664             if ((r = s3b_dcache_write2(priv, new_fd, tempfile,
665               DIR_OFFSET(new_header.flags, new_dslot), &entry, sizeof(entry))) != 0)
666                 goto fail;
667 
668             /* Copy the data block */
669             old_data = old_data_base + (off_t)old_dslot * priv->block_size;
670             new_data = new_data_base + (off_t)new_dslot * priv->block_size;
671             if ((r = s3b_dcache_read(priv, old_data, block_buf, priv->block_size)) != 0)
672                 goto fail;
673             if ((r = s3b_dcache_write2(priv, new_fd, tempfile, new_data, block_buf, priv->block_size)) != 0)
674                 goto fail;
675 
676             /* Advance to the next slot */
677             new_dslot++;
678         }
679     }
680 
681 done:
682     /* Close the new file */
683     if (close(new_fd) == -1) {
684         (*priv->log)(LOG_ERR, "error closing temporary cache file `%s': %s", tempfile, strerror(r));
685         goto fail;
686     }
687     new_fd = -1;
688 
689     /* Replace old cache file with new cache file */
690     if (rename(tempfile, priv->filename) == -1) {
691         r = errno;
692         (*priv->log)(LOG_ERR, "error renaming `%s' to `%s': %s", tempfile, priv->filename, strerror(r));
693         goto fail;
694     }
695     free(tempfile);
696     tempfile = NULL;
697 
698     /* Update flags */
699     priv->flags = new_header.flags;
700 
701     /* Close old file to release it and we're done */
702     close(priv->fd);
703     priv->fd = -1;
704     r = 0;
705 
706 fail:
707     /* Clean up */
708     if (block_buf != NULL)
709         free(block_buf);
710     if (new_fd != -1)
711         (void)close(new_fd);
712     if (tempfile != NULL) {
713         (void)unlink(tempfile);
714         free(tempfile);
715     }
716     return r;
717 }
718 
719 static int
s3b_dcache_create_file(struct s3b_dcache * priv,int * fdp,const char * filename,u_int max_blocks,struct file_header * headerp)720 s3b_dcache_create_file(struct s3b_dcache *priv, int *fdp, const char *filename, u_int max_blocks, struct file_header *headerp)
721 {
722     struct file_header header;
723     int r;
724 
725     /* Initialize header */
726     memset(&header, 0, sizeof(header));
727     header.signature = DCACHE_SIGNATURE;
728     header.flags = HDRFLG_NEW_FORMAT;
729     header.header_size = HDR_SIZE(header.flags);
730     header.u_int_size = sizeof(u_int);
731     header.s3b_block_t_size = sizeof(s3b_block_t);
732     header.block_size = priv->block_size;
733     header.max_blocks = priv->max_blocks;
734     header.data_align = getpagesize();
735 
736     /* Create file */
737     if ((*fdp = open(filename, O_RDWR|O_CREAT|O_EXCL, 0644)) == -1) {
738         r = errno;
739         (*priv->log)(LOG_ERR, "can't create file `%s': %s", filename, strerror(r));
740         return r;
741     }
742 
743     /* Write header */
744     if ((r = s3b_dcache_write2(priv, *fdp, filename, (off_t)0, &header, sizeof(header))) != 0) {
745         (*priv->log)(LOG_ERR, "error initializing cache file `%s': %s", filename, strerror(r));
746         goto fail;
747     }
748 
749     /* Extend the file to the required length; the directory will be filled with zeroes */
750     if (ftruncate(*fdp, sizeof(header)) == -1 || ftruncate(*fdp, DIR_OFFSET(header.flags, max_blocks)) == -1) {
751         r = errno;
752         (*priv->log)(LOG_ERR, "error initializing cache file `%s': %s", filename, strerror(r));
753         goto fail;
754     }
755 
756     /* Done */
757     if (headerp != NULL)
758         *headerp = header;
759     return 0;
760 
761 fail:
762     (void)unlink(filename);
763     (void)close(*fdp);
764     *fdp = -1;
765     return r;
766 }
767 
768 static int
s3b_dcache_init_free_list(struct s3b_dcache * priv,s3b_dcache_visit_t * visitor,void * arg,u_int visit_dirty)769 s3b_dcache_init_free_list(struct s3b_dcache *priv, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty)
770 {
771     off_t required_size;
772     struct stat sb;
773     u_int num_entries;
774     u_int num_dslots_used;
775     u_int base_dslot;
776     u_int i;
777     int r;
778 
779     /* Logging */
780     (*priv->log)(LOG_INFO, "reading meta-data from cache file `%s'", priv->filename);
781     assert(visitor != NULL);
782 
783     /* Inspect all directory entries */
784     for (num_dslots_used = base_dslot = 0; base_dslot < priv->max_blocks; base_dslot += num_entries) {
785         char buffer[DIRECTORY_READ_CHUNK * DIR_ENTSIZE(priv->flags)];
786 
787         /* Read in the next chunk of directory entries */
788         num_entries = priv->max_blocks - base_dslot;
789         if (num_entries > DIRECTORY_READ_CHUNK)
790             num_entries = DIRECTORY_READ_CHUNK;
791         if ((r = s3b_dcache_read(priv, DIR_OFFSET(priv->flags, base_dslot), buffer, num_entries * DIR_ENTSIZE(priv->flags))) != 0) {
792             (*priv->log)(LOG_ERR, "error reading cache file `%s' directory: %s", priv->filename, strerror(r));
793             return r;
794         }
795 
796         /* For each dslot: if free, add to the free list, else let visitor decide what to do */
797         for (i = 0; i < num_entries; i++) {
798             const u_int dslot = base_dslot + i;
799             struct dir_entry entry;
800 
801             memset(&entry, 0, sizeof(entry));
802             memcpy(&entry, buffer + i * DIR_ENTSIZE(priv->flags), DIR_ENTSIZE(priv->flags));
803             if (memcmp(&entry, &zero_entry, sizeof(entry)) == 0) {
804                 if ((r = s3b_dcache_push(priv, dslot)) != 0)
805                     return r;
806             } else if ((entry.flags & ENTFLG_DIRTY) != 0 && !visit_dirty) {     /* visitor doesn't want dirties, so just nuke it */
807                 if ((r = s3b_dcache_write_entry(priv, dslot, &zero_entry)) != 0)
808                     return r;
809                 if ((r = s3b_dcache_push(priv, dslot)) != 0)
810                     return r;
811             } else {
812                 priv->num_alloc++;
813                 if (dslot + 1 > num_dslots_used)                    /* keep track of the number of dslots in use */
814                     num_dslots_used = dslot + 1;
815                 if ((r = (*visitor)(arg, dslot, entry.block_num, (entry.flags & ENTFLG_DIRTY) == 0 ? entry.md5 : NULL)) != 0)
816                     return r;
817             }
818         }
819     }
820 
821     /* Reverse the free list so we allocate lower numbered slots first */
822     for (i = 0; i < priv->free_list_len / 2; i++) {
823         const s3b_block_t temp = priv->free_list[i];
824 
825         priv->free_list[i] = priv->free_list[priv->free_list_len - i - 1];
826         priv->free_list[priv->free_list_len - i - 1] = temp;
827     }
828 
829     /* Verify the cache file is not truncated */
830     required_size = DIR_OFFSET(priv->flags, priv->max_blocks);
831     if (num_dslots_used > 0) {
832         if (required_size < DATA_OFFSET(priv, num_dslots_used))
833             required_size = DATA_OFFSET(priv, num_dslots_used);
834     }
835     if (fstat(priv->fd, &sb) == -1) {
836         r = errno;
837         (*priv->log)(LOG_ERR, "error reading cache file `%s' length: %s", priv->filename, strerror(r));
838         return r;
839     }
840     if (sb.st_size < required_size) {
841         (*priv->log)(LOG_ERR, "cache file `%s' is truncated (has size %ju < %ju bytes)",
842           priv->filename, (uintmax_t)sb.st_size, (uintmax_t)required_size);
843         return EINVAL;
844     }
845 
846     /* Discard any unreferenced data beyond the last entry */
847     if (sb.st_size > required_size && ftruncate(priv->fd, required_size) == -1) {
848         r = errno;
849         (*priv->log)(LOG_ERR, "error trimming cache file `%s' to %ju bytes: %s",
850           priv->filename, (uintmax_t)required_size, strerror(r));
851         return EINVAL;
852     }
853 
854     /* Report results */
855     (*priv->log)(LOG_INFO, "loaded cache file `%s' with %u free and %u used blocks (max index %u)",
856       priv->filename, priv->free_list_len, priv->max_blocks - priv->free_list_len, num_dslots_used);
857 
858     /* Done */
859     return 0;
860 }
861 
862 /*
863  * Push a dslot onto the free list.
864  */
865 static int
s3b_dcache_push(struct s3b_dcache * priv,u_int dslot)866 s3b_dcache_push(struct s3b_dcache *priv, u_int dslot)
867 {
868     /* Sanity check */
869     assert(dslot < priv->max_blocks);
870     assert(priv->free_list_len < priv->max_blocks);
871 
872     /* Grow the free list array if necessary */
873     if (priv->free_list_alloc == priv->free_list_len) {
874         s3b_block_t *new_free_list;
875         s3b_block_t new_free_list_alloc;
876         int r;
877 
878         new_free_list_alloc = priv->free_list_alloc == 0 ? 1024 : 2 * priv->free_list_alloc;
879         if ((new_free_list = realloc(priv->free_list, new_free_list_alloc * sizeof(*new_free_list))) == NULL) {
880             r = errno;
881             (*priv->log)(LOG_ERR, "realloc: %s", strerror(r));
882             return r;
883         }
884         priv->free_list = new_free_list;
885         priv->free_list_alloc = new_free_list_alloc;
886     }
887 
888     /* Add new dslot */
889     assert(priv->free_list_len < priv->free_list_alloc);
890     priv->free_list[priv->free_list_len++] = dslot;
891     return 0;
892 }
893 
894 /*
895  * Pop the next dslot off of the free list. There must be one.
896  */
897 static void
s3b_dcache_pop(struct s3b_dcache * priv,u_int * dslotp)898 s3b_dcache_pop(struct s3b_dcache *priv, u_int *dslotp)
899 {
900     /* Sanity check */
901     assert(priv->free_list_len > 0);
902 
903     /* Pop off dslot at the head of the list */
904     *dslotp = priv->free_list[--priv->free_list_len];
905     assert(*dslotp < priv->max_blocks);
906 
907     /* See if we can give back some memory */
908     if (priv->free_list_alloc > 1024 && priv->free_list_len <= priv->free_list_alloc / 4) {
909         s3b_block_t *new_free_list;
910         s3b_block_t new_free_list_alloc;
911 
912         new_free_list_alloc = priv->free_list_alloc / 4;
913         if ((new_free_list = realloc(priv->free_list, new_free_list_alloc * sizeof(*new_free_list))) == NULL)
914             (*priv->log)(LOG_ERR, "can't shrink dcache free list: realloc: %s (ignored)", strerror(errno));
915         else {
916             priv->free_list = new_free_list;
917             priv->free_list_alloc = new_free_list_alloc;
918         }
919     }
920     assert(priv->free_list_len <= priv->free_list_alloc);
921 }
922 
923 static int
s3b_dcache_read(struct s3b_dcache * priv,off_t offset,void * data,size_t len)924 s3b_dcache_read(struct s3b_dcache *priv, off_t offset, void *data, size_t len)
925 {
926     size_t sofar;
927     ssize_t r;
928 
929     for (sofar = 0; sofar < len; sofar += r) {
930         const off_t posn = offset + sofar;
931 
932         if ((r = pread(priv->fd, (char *)data + sofar, len - sofar, offset + sofar)) == -1) {
933             (*priv->log)(LOG_ERR, "error reading cache file `%s' at offset %ju: %s",
934               priv->filename, (uintmax_t)posn, strerror(r));
935             return r;
936         }
937         if (r == 0) {           /* truncated input */
938             (*priv->log)(LOG_ERR, "error reading cache file `%s' at offset %ju: file is truncated",
939               priv->filename, (uintmax_t)posn);
940             return EINVAL;
941         }
942     }
943     return 0;
944 }
945 
946 static int
s3b_dcache_write(struct s3b_dcache * priv,off_t offset,const void * data,size_t len)947 s3b_dcache_write(struct s3b_dcache *priv, off_t offset, const void *data, size_t len)
948 {
949     return s3b_dcache_write2(priv, priv->fd, priv->filename, offset, data, len);
950 }
951 
952 static int
s3b_dcache_write2(struct s3b_dcache * priv,int fd,const char * filename,off_t offset,const void * data,size_t len)953 s3b_dcache_write2(struct s3b_dcache *priv, int fd, const char *filename, off_t offset, const void *data, size_t len)
954 {
955     size_t sofar;
956     ssize_t r;
957 
958     for (sofar = 0; sofar < len; sofar += r) {
959         const off_t posn = offset + sofar;
960 
961         if ((r = pwrite(fd, (const char *)data + sofar, len - sofar, offset + sofar)) == -1) {
962             (*priv->log)(LOG_ERR, "error writing cache file `%s' at offset %ju: %s",
963               filename, (uintmax_t)posn, strerror(r));
964             return r;
965         }
966     }
967     return 0;
968 }
969 
970