1
2 /*
3 * s3backer - FUSE-based single file backing store via Amazon S3
4 *
5 * Copyright 2008-2011 Archie L. Cobbs <archie@dellroad.org>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 * 02110-1301, USA.
21 *
22 * In addition, as a special exception, the copyright holders give
23 * permission to link the code of portions of this program with the
24 * OpenSSL library under certain conditions as described in each
25 * individual source file, and distribute linked combinations including
26 * the two.
27 *
28 * You must obey the GNU General Public License in all respects for all
29 * of the code used other than OpenSSL. If you modify file(s) with this
30 * exception, you may extend this exception to your version of the
31 * file(s), but you are not obligated to do so. If you do not wish to do
32 * so, delete this exception statement from your version. If you delete
33 * this exception statement from all source files in the program, then
34 * also delete it here.
35 */
36
37 #include "s3backer.h"
38 #include "dcache.h"
39
40 /*
41 * This file implements a simple on-disk storage area for cached blocks.
42 * The file contains a header, a directory, and a data area. Each directory
43 * entry indicates which block is stored in the corresponding "data slot"
44 * in the data area and that block's MD5 checksum. Note the MD5 checksum is
45 * the checksum of the stored data, which will differ from the actual block
46 * data's MD5 if the block was compressed, encrypted, etc. when stored.
47 *
48 * File format:
49 *
50 * [ struct file_header ]
51 * directory entry for data slot #0
52 * directory entry for data slot #1
53 * ...
54 * directory entry for data slot #N-1
55 * padding up to getpagesize()
56 * data slot #0
57 * data slot #1
58 * ...
59 * data slot #N-1
60 */
61
62 /* Definitions */
63 #define DCACHE_SIGNATURE 0xe496f17b
64 #define ROUNDUP2(x, y) (((x) + (y) - 1) & ~((y) - 1))
65 #define DIRECTORY_READ_CHUNK 1024
66
67 #define HDR_SIZE(flags) (((flags) & HDRFLG_NEW_FORMAT) == 0 ? sizeof(struct ofile_header) : sizeof(struct file_header))
68 #define DIR_ENTSIZE(flags) (((flags) & HDRFLG_NEW_FORMAT) == 0 ? sizeof(struct odir_entry) : sizeof(struct dir_entry))
69 #define DIR_OFFSET(flags, dslot) ((off_t)HDR_SIZE(flags) + (off_t)(dslot) * DIR_ENTSIZE(flags))
70 #define DATA_OFFSET(priv, dslot) ((off_t)(priv)->data + (off_t)(dslot) * (priv)->block_size)
71
72 /* Bits for file_header.flags */
73 #define HDRFLG_NEW_FORMAT 0x00000001
74 #define HDRFLG_MASK 0x00000001
75
76 /* Bits for dir_entry.flags */
77 #define ENTFLG_DIRTY 0x00000001
78 #define ENTFLG_MASK 0x00000001
79
80 /* File header (old format) */
81 struct ofile_header {
82 uint32_t signature;
83 uint32_t header_size;
84 uint32_t u_int_size;
85 uint32_t s3b_block_t_size;
86 uint32_t block_size;
87 uint32_t data_align;
88 uint32_t flags;
89 u_int max_blocks;
90 } __attribute__ ((packed));
91
92 /* File header */
93 struct file_header {
94 uint32_t signature;
95 uint32_t header_size;
96 uint32_t u_int_size;
97 uint32_t s3b_block_t_size;
98 uint32_t block_size;
99 uint32_t data_align;
100 uint32_t flags;
101 u_int max_blocks;
102 int32_t mount_token;
103 uint32_t spare[7]; /* future expansion */
104 } __attribute__ ((packed));
105
106 /* One directory entry (old format) */
107 struct odir_entry {
108 s3b_block_t block_num;
109 u_char md5[MD5_DIGEST_LENGTH];
110 } __attribute__ ((packed));
111
112 /* One directory entry (new format) */
113 struct dir_entry {
114 s3b_block_t block_num;
115 u_char md5[MD5_DIGEST_LENGTH];
116 uint32_t flags;
117 } __attribute__ ((packed));
118
119 /* Private structure */
120 struct s3b_dcache {
121 int fd;
122 log_func_t *log;
123 char *filename;
124 void *zero_block;
125 u_int block_size;
126 u_int max_blocks;
127 u_int num_alloc;
128 uint32_t flags; /* copy of file_header.flags */
129 off_t data;
130 u_int free_list_len;
131 u_int free_list_alloc;
132 s3b_block_t *free_list;
133 };
134
135 /* Internal functions */
136 static int s3b_dcache_write_entry(struct s3b_dcache *priv, u_int dslot, const struct dir_entry *entry);
137 #ifndef NDEBUG
138 static int s3b_dcache_entry_is_empty(struct s3b_dcache *priv, u_int dslot);
139 static int s3b_dcache_entry_write_ok(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, u_int dirty);
140 static int s3b_dcache_read_entry(struct s3b_dcache *priv, u_int dslot, struct dir_entry *entryp);
141 #endif
142 static int s3b_dcache_create_file(struct s3b_dcache *priv, int *fdp, const char *filename, u_int max_blocks,
143 struct file_header *headerp);
144 static int s3b_dcache_resize_file(struct s3b_dcache *priv, const struct file_header *header);
145 static int s3b_dcache_init_free_list(struct s3b_dcache *priv, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty);
146 static int s3b_dcache_push(struct s3b_dcache *priv, u_int dslot);
147 static void s3b_dcache_pop(struct s3b_dcache *priv, u_int *dslotp);
148 static int s3b_dcache_read(struct s3b_dcache *priv, off_t offset, void *data, size_t len);
149 static int s3b_dcache_write(struct s3b_dcache *priv, off_t offset, const void *data, size_t len);
150 static int s3b_dcache_write2(struct s3b_dcache *priv, int fd, const char *filename, off_t offset, const void *data, size_t len);
151
152 /* Internal variables */
153 static const struct dir_entry zero_entry;
154
155 /* Public functions */
156
157 int
s3b_dcache_open(struct s3b_dcache ** dcachep,log_func_t * log,const char * filename,u_int block_size,u_int max_blocks,s3b_dcache_visit_t * visitor,void * arg,u_int visit_dirty)158 s3b_dcache_open(struct s3b_dcache **dcachep, log_func_t *log, const char *filename,
159 u_int block_size, u_int max_blocks, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty)
160 {
161 struct ofile_header oheader;
162 struct file_header header;
163 struct s3b_dcache *priv;
164 struct stat sb;
165 int r;
166
167 /* Sanity check */
168 if (max_blocks == 0)
169 return EINVAL;
170
171 /* Initialize private structure */
172 if ((priv = malloc(sizeof(*priv))) == NULL)
173 return errno;
174 memset(priv, 0, sizeof(*priv));
175 priv->fd = -1;
176 priv->log = log;
177 priv->block_size = block_size;
178 priv->max_blocks = max_blocks;
179 if ((priv->filename = strdup(filename)) == NULL) {
180 r = errno;
181 goto fail1;
182 }
183 if ((priv->zero_block = calloc(1, block_size)) == NULL) {
184 r = errno;
185 goto fail2;
186 }
187
188 /* Create cache file if it doesn't already exist */
189 if (stat(priv->filename, &sb) == -1 && errno == ENOENT) {
190 (*priv->log)(LOG_NOTICE, "creating new cache file `%s' with capacity %u blocks", priv->filename, priv->max_blocks);
191 if ((r = s3b_dcache_create_file(priv, &priv->fd, priv->filename, priv->max_blocks, NULL)) != 0)
192 goto fail3;
193 (void)close(priv->fd);
194 priv->fd = -1;
195 }
196
197 retry:
198 /* Open cache file */
199 assert(priv->fd == -1);
200 if ((priv->fd = open(priv->filename, O_RDWR, 0)) == -1) {
201 r = errno;
202 (*priv->log)(LOG_ERR, "can't open cache file `%s': %s", priv->filename, strerror(r));
203 goto fail3;
204 }
205
206 /* Get file info */
207 if (fstat(priv->fd, &sb) == -1) {
208 r = errno;
209 goto fail4;
210 }
211
212 /* Read in header with backward compatible support for older header format */
213 if (sb.st_size < sizeof(oheader)) {
214 (*priv->log)(LOG_ERR, "invalid cache file `%s': file is truncated (size %ju < %u)",
215 priv->filename, (uintmax_t)sb.st_size, (u_int)sizeof(oheader));
216 r = EINVAL;
217 goto fail4;
218 }
219 if ((r = s3b_dcache_read(priv, (off_t)0, &oheader, sizeof(oheader))) != 0) {
220 (*priv->log)(LOG_ERR, "can't read cache file `%s' header: %s", priv->filename, strerror(r));
221 goto fail4;
222 }
223 switch (oheader.header_size) {
224 case sizeof(oheader): /* old format */
225 memset(&header, 0, sizeof(header));
226 memcpy(&header, &oheader, sizeof(oheader));
227 break;
228 case sizeof(header): /* new format */
229 if ((r = s3b_dcache_read(priv, (off_t)0, &header, sizeof(header))) != 0) {
230 (*priv->log)(LOG_ERR, "can't read cache file `%s' header: %s", priv->filename, strerror(r));
231 goto fail4;
232 }
233 break;
234 default:
235 (*priv->log)(LOG_ERR, "invalid cache file `%s': %s %d", priv->filename, "invalid header size", (int)oheader.header_size);
236 r = EINVAL;
237 goto fail4;
238 }
239
240 /* Verify header - all but number of blocks */
241 r = EINVAL;
242 if (header.signature != DCACHE_SIGNATURE) {
243 (*priv->log)(LOG_ERR, "invalid cache file `%s': wrong signature %08x != %08x",
244 priv->filename, header.signature, DCACHE_SIGNATURE);
245 goto fail4;
246 }
247 if (header.header_size != HDR_SIZE(header.flags)) {
248 (*priv->log)(LOG_ERR, "invalid cache file `%s': %s %d != %d",
249 priv->filename, "invalid header size", (int)header.header_size, (int)HDR_SIZE(header.flags));
250 goto fail4;
251 }
252 if (header.u_int_size != sizeof(u_int)) {
253 (*priv->log)(LOG_ERR, "invalid cache file `%s': created with sizeof(u_int) %u != %u",
254 priv->filename, header.u_int_size, (u_int)sizeof(u_int));
255 goto fail4;
256 }
257 if (header.s3b_block_t_size != sizeof(s3b_block_t)) {
258 (*priv->log)(LOG_ERR, "invalid cache file `%s': created with sizeof(s3b_block_t) %u != %u",
259 priv->filename, header.s3b_block_t_size, (u_int)sizeof(s3b_block_t));
260 goto fail4;
261 }
262 if (header.block_size != priv->block_size) {
263 (*priv->log)(LOG_ERR, "invalid cache file `%s': created with block size %u != %u",
264 priv->filename, header.block_size, priv->block_size);
265 goto fail4;
266 }
267 if (header.data_align != getpagesize()) {
268 (*priv->log)(LOG_ERR, "invalid cache file `%s': created with alignment %u != %u",
269 priv->filename, header.data_align, getpagesize());
270 goto fail4;
271 }
272 if ((header.flags & ~HDRFLG_MASK) != 0) {
273 (*priv->log)(LOG_ERR, "invalid cache file `%s': %s", priv->filename, "unrecognized flags present");
274 goto fail4;
275 }
276 priv->flags = header.flags;
277
278 /* Check number of blocks, shrinking or expanding if necessary */
279 if (header.max_blocks != priv->max_blocks) {
280 (*priv->log)(LOG_NOTICE, "cache file `%s' was created with capacity %u != %u blocks, automatically %s",
281 priv->filename, header.max_blocks, priv->max_blocks, header.max_blocks < priv->max_blocks ?
282 "expanding" : "shrinking");
283 if ((r = s3b_dcache_resize_file(priv, &header)) != 0)
284 goto fail4;
285 (*priv->log)(LOG_INFO, "successfully resized cache file `%s' from %u to %u blocks",
286 priv->filename, header.max_blocks, priv->max_blocks);
287 goto retry;
288 }
289
290 /* Verify file's directory is not truncated */
291 if (sb.st_size < DIR_OFFSET(priv->flags, priv->max_blocks)) {
292 (*priv->log)(LOG_ERR, "invalid cache file `%s': file is truncated (size %ju < %ju)",
293 priv->filename, (uintmax_t)sb.st_size, (uintmax_t)DIR_OFFSET(priv->flags, priv->max_blocks));
294 goto fail4;
295 }
296
297 /* Compute offset of first data block */
298 priv->data = ROUNDUP2(DIR_OFFSET(priv->flags, priv->max_blocks), header.data_align);
299
300 /* Read the directory to build the free list and visit allocated blocks */
301 if (visitor != NULL && (r = s3b_dcache_init_free_list(priv, visitor, arg, visit_dirty)) != 0)
302 goto fail4;
303
304 /* Done */
305 *dcachep = priv;
306 return 0;
307
308 fail4:
309 close(priv->fd);
310 fail3:
311 free(priv->zero_block);
312 fail2:
313 free(priv->filename);
314 fail1:
315 free(priv->free_list);
316 free(priv);
317 return r;
318 }
319
320 int
s3b_dcache_has_mount_token(struct s3b_dcache * priv)321 s3b_dcache_has_mount_token(struct s3b_dcache *priv)
322 {
323 return (priv->flags & HDRFLG_NEW_FORMAT) != 0;
324 }
325
326 int
s3b_dcache_set_mount_token(struct s3b_dcache * priv,int32_t * old_valuep,int32_t new_value)327 s3b_dcache_set_mount_token(struct s3b_dcache *priv, int32_t *old_valuep, int32_t new_value)
328 {
329 int r;
330
331 /* Read old value */
332 if (old_valuep != NULL) {
333 if ((r = s3b_dcache_read(priv, offsetof(struct file_header, mount_token), old_valuep, sizeof(*old_valuep))) != 0)
334 return r;
335 }
336
337 /* Write new value */
338 if (new_value >= 0) {
339
340 /* Update file */
341 if ((r = s3b_dcache_write(priv, offsetof(struct file_header, mount_token), &new_value, sizeof(new_value))) != 0)
342 return r;
343
344 /* Sync to disk */
345 s3b_dcache_fsync(priv);
346 }
347
348 /* Done */
349 return 0;
350 }
351
352 void
s3b_dcache_close(struct s3b_dcache * priv)353 s3b_dcache_close(struct s3b_dcache *priv)
354 {
355 close(priv->fd);
356 free(priv->zero_block);
357 free(priv->filename);
358 free(priv->free_list);
359 free(priv);
360 }
361
362 u_int
s3b_dcache_size(struct s3b_dcache * priv)363 s3b_dcache_size(struct s3b_dcache *priv)
364 {
365 return priv->num_alloc;
366 }
367
368 /*
369 * Allocate a dslot for a block's data. We don't record this block in the directory yet;
370 * that is done by s3b_dcache_record_block().
371 */
372 int
s3b_dcache_alloc_block(struct s3b_dcache * priv,u_int * dslotp)373 s3b_dcache_alloc_block(struct s3b_dcache *priv, u_int *dslotp)
374 {
375 /* Any free dslots? */
376 if (priv->free_list_len == 0)
377 return ENOMEM;
378
379 /* Pop off the next free dslot */
380 s3b_dcache_pop(priv, dslotp);
381
382 /* Directory entry should be empty */
383 assert(s3b_dcache_entry_is_empty(priv, *dslotp));
384
385 /* Done */
386 priv->num_alloc++;
387 return 0;
388 }
389
390 /*
391 * Record a block's dslot in the directory. After this function is called, the block will
392 * be visible in the directory and picked up after a restart.
393 *
394 * If md5 != NULL, the block is CLEAN; if md5 == NULL, the block is DIRTY.
395 *
396 * This should be called AFTER the data for the block has already been written.
397 *
398 * There MUST NOT be a directory entry for the block.
399 */
400 int
s3b_dcache_record_block(struct s3b_dcache * priv,u_int dslot,s3b_block_t block_num,const u_char * md5)401 s3b_dcache_record_block(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, const u_char *md5)
402 {
403 const u_int dirty = md5 == NULL;
404 struct dir_entry entry;
405 int r;
406
407 /* Sanity check */
408 assert(dslot < priv->max_blocks);
409
410 /* Directory entry should be writable */
411 assert(s3b_dcache_entry_write_ok(priv, dslot, block_num, dirty));
412
413 /* If cache file is older format, it doesn't store dirty blocks, so just erase it instead (prior behavior) */
414 if (dirty && (priv->flags & HDRFLG_NEW_FORMAT) == 0) {
415 s3b_dcache_erase_block(priv, dslot);
416 return 0;
417 }
418
419 /* Make sure any new data is written to disk before updating the directory */
420 if ((r = s3b_dcache_fsync(priv)) != 0)
421 return r;
422
423 /* Update directory */
424 memset(&entry, 0, sizeof(entry));
425 entry.block_num = block_num;
426 entry.flags = dirty ? ENTFLG_DIRTY : 0;
427 if (!dirty)
428 memcpy(&entry.md5, md5, MD5_DIGEST_LENGTH);
429 if ((r = s3b_dcache_write_entry(priv, dslot, &entry)) != 0)
430 return r;
431
432 /* Done */
433 return 0;
434 }
435
436 /*
437 * Erase the directory entry for a dslot. After this function is called, the block will
438 * no longer be visible in the directory after a restart.
439 *
440 * This should be called BEFORE any new data for the block is written.
441 *
442 * There MUST be a directory entry for the block.
443 */
444 int
s3b_dcache_erase_block(struct s3b_dcache * priv,u_int dslot)445 s3b_dcache_erase_block(struct s3b_dcache *priv, u_int dslot)
446 {
447 int r;
448
449 /* Sanity check */
450 assert(dslot < priv->max_blocks);
451
452 /* Update directory */
453 if ((r = s3b_dcache_write_entry(priv, dslot, &zero_entry)) != 0)
454 return r;
455
456 /* Make sure directory entry is written to disk before any new data is written */
457 if ((r = s3b_dcache_fsync(priv)) != 0)
458 return r;
459
460 /* Done */
461 return 0;
462 }
463
464 /*
465 * Free a no-longer used dslot.
466 *
467 * There MUST NOT be a directory entry for the block.
468 */
469 int
s3b_dcache_free_block(struct s3b_dcache * priv,u_int dslot)470 s3b_dcache_free_block(struct s3b_dcache *priv, u_int dslot)
471 {
472 int r;
473
474 /* Sanity check */
475 assert(dslot < priv->max_blocks);
476
477 /* Directory entry should be empty */
478 assert(s3b_dcache_entry_is_empty(priv, dslot));
479
480 /* Push dslot onto free list */
481 if ((r = s3b_dcache_push(priv, dslot)) != 0)
482 return r;
483
484 /* Done */
485 priv->num_alloc--;
486 return 0;
487 }
488
489 /*
490 * Read data from one dslot.
491 */
492 int
s3b_dcache_read_block(struct s3b_dcache * priv,u_int dslot,void * dest,u_int off,u_int len)493 s3b_dcache_read_block(struct s3b_dcache *priv, u_int dslot, void *dest, u_int off, u_int len)
494 {
495 /* Sanity check */
496 assert(dslot < priv->max_blocks);
497 assert(off <= priv->block_size);
498 assert(len <= priv->block_size);
499 assert(off + len <= priv->block_size);
500
501 /* Read data */
502 return s3b_dcache_read(priv, DATA_OFFSET(priv, dslot) + off, dest, len);
503 }
504
505 /*
506 * Write data into one dslot.
507 */
508 int
s3b_dcache_write_block(struct s3b_dcache * priv,u_int dslot,const void * src,u_int off,u_int len)509 s3b_dcache_write_block(struct s3b_dcache *priv, u_int dslot, const void *src, u_int off, u_int len)
510 {
511 /* Sanity check */
512 assert(dslot < priv->max_blocks);
513 assert(off <= priv->block_size);
514 assert(len <= priv->block_size);
515 assert(off + len <= priv->block_size);
516
517 /* Write data */
518 return s3b_dcache_write(priv, DATA_OFFSET(priv, dslot) + off, src != NULL ? src : priv->zero_block, len);
519 }
520
521 /*
522 * Synchronize outstanding changes to persistent storage.
523 */
524 int
s3b_dcache_fsync(struct s3b_dcache * priv)525 s3b_dcache_fsync(struct s3b_dcache *priv)
526 {
527 int r;
528
529 #if HAVE_DECL_FDATASYNC
530 r = fdatasync(priv->fd);
531 #else
532 r = fsync(priv->fd);
533 #endif
534 if (r == -1) {
535 r = errno;
536 (*priv->log)(LOG_ERR, "error fsync'ing cache file `%s': %s", priv->filename, strerror(r));
537 }
538 return 0;
539 }
540
541 /* Internal functions */
542
543 #ifndef NDEBUG
544 static int
s3b_dcache_entry_is_empty(struct s3b_dcache * priv,u_int dslot)545 s3b_dcache_entry_is_empty(struct s3b_dcache *priv, u_int dslot)
546 {
547 struct dir_entry entry;
548
549 (void)s3b_dcache_read_entry(priv, dslot, &entry);
550 return memcmp(&entry, &zero_entry, sizeof(entry)) == 0;
551 }
552
553 static int
s3b_dcache_entry_write_ok(struct s3b_dcache * priv,u_int dslot,s3b_block_t block_num,u_int dirty)554 s3b_dcache_entry_write_ok(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, u_int dirty)
555 {
556 struct dir_entry entry;
557 u_int old_dirty;
558
559 if (s3b_dcache_entry_is_empty(priv, dslot))
560 return 1;
561 (void)s3b_dcache_read_entry(priv, dslot, &entry);
562 old_dirty = (entry.flags & ENTFLG_DIRTY) != 0;
563 return entry.block_num == block_num && old_dirty != dirty;
564 }
565
566 static int
s3b_dcache_read_entry(struct s3b_dcache * priv,u_int dslot,struct dir_entry * entry)567 s3b_dcache_read_entry(struct s3b_dcache *priv, u_int dslot, struct dir_entry *entry)
568 {
569 assert(dslot < priv->max_blocks);
570 memset(entry, 0, sizeof(*entry));
571 return s3b_dcache_read(priv, DIR_OFFSET(priv->flags, dslot), entry, DIR_ENTSIZE(priv->flags));
572 }
573 #endif
574
575 /*
576 * Write a directory entry.
577 */
578 static int
s3b_dcache_write_entry(struct s3b_dcache * priv,u_int dslot,const struct dir_entry * entry)579 s3b_dcache_write_entry(struct s3b_dcache *priv, u_int dslot, const struct dir_entry *entry)
580 {
581 assert(dslot < priv->max_blocks);
582 assert((entry->flags & ~((priv->flags & HDRFLG_NEW_FORMAT) != 0 ? ENTFLG_MASK : 0)) == 0);
583 return s3b_dcache_write(priv, DIR_OFFSET(priv->flags, dslot), entry, DIR_ENTSIZE(priv->flags));
584 }
585
586 /*
587 * Resize (and compress) an existing cache file. Upon successful return, priv->fd is closed
588 * and the cache file must be re-opened.
589 */
590 static int
s3b_dcache_resize_file(struct s3b_dcache * priv,const struct file_header * old_header)591 s3b_dcache_resize_file(struct s3b_dcache *priv, const struct file_header *old_header)
592 {
593 const u_int old_max_blocks = old_header->max_blocks;
594 const u_int new_max_blocks = priv->max_blocks;
595 struct file_header new_header;
596 off_t old_data_base;
597 off_t new_data_base;
598 u_int base_old_dslot;
599 u_int new_dslot = 0;
600 u_int num_entries;
601 u_char *block_buf = NULL;
602 char *tempfile = NULL;
603 int new_fd = -1;
604 int r;
605
606 /* Create new temporary cache file */
607 if (asprintf(&tempfile, "%s.new", priv->filename) == -1) {
608 r = errno;
609 tempfile = NULL;
610 (*priv->log)(LOG_ERR, "can't allocate string: %s", strerror(r));
611 goto fail;
612 }
613 if ((r = s3b_dcache_create_file(priv, &new_fd, tempfile, new_max_blocks, &new_header)) != 0)
614 goto fail;
615
616 /* Allocate block data buffer */
617 if ((block_buf = malloc(priv->block_size)) == NULL) {
618 r = errno;
619 (*priv->log)(LOG_ERR, "can't allocate buffer: %s", strerror(r));
620 goto fail;
621 }
622
623 /* Copy non-empty cache entries from old file to new file */
624 old_data_base = ROUNDUP2(DIR_OFFSET(old_header->flags, old_max_blocks), old_header->data_align);
625 new_data_base = ROUNDUP2(DIR_OFFSET(new_header.flags, new_max_blocks), new_header.data_align);
626 for (base_old_dslot = 0; base_old_dslot < old_max_blocks; base_old_dslot += num_entries) {
627 char buffer[DIRECTORY_READ_CHUNK * DIR_ENTSIZE(old_header->flags)];
628 int i;
629
630 /* Read in the next chunk of old directory entries */
631 num_entries = old_max_blocks - base_old_dslot;
632 if (num_entries > DIRECTORY_READ_CHUNK)
633 num_entries = DIRECTORY_READ_CHUNK;
634 if ((r = s3b_dcache_read(priv, DIR_OFFSET(old_header->flags, base_old_dslot),
635 buffer, num_entries * DIR_ENTSIZE(old_header->flags))) != 0) {
636 (*priv->log)(LOG_ERR, "error reading cache file `%s' directory: %s", priv->filename, strerror(r));
637 goto fail;
638 }
639
640 /* For each dslot: if not free, copy it to the next slot in the new file */
641 for (i = 0; i < num_entries; i++) {
642 const u_int old_dslot = base_old_dslot + i;
643 struct dir_entry entry;
644 off_t old_data;
645 off_t new_data;
646
647 /* Read old entry */
648 memset(&entry, 0, sizeof(entry));
649 memcpy(&entry, buffer + i * DIR_ENTSIZE(old_header->flags), DIR_ENTSIZE(old_header->flags));
650
651 /* Is this entry non-empty? */
652 if (memcmp(&entry, &zero_entry, sizeof(entry)) == 0)
653 continue;
654
655 /* Any more space? */
656 if (new_dslot == new_max_blocks) {
657 (*priv->log)(LOG_INFO, "cache file `%s' contains more than %u blocks; some will be discarded",
658 priv->filename, new_max_blocks);
659 goto done;
660 }
661
662 /* Copy the directory entry */
663 assert(DIR_ENTSIZE(new_header.flags) == sizeof(entry));
664 if ((r = s3b_dcache_write2(priv, new_fd, tempfile,
665 DIR_OFFSET(new_header.flags, new_dslot), &entry, sizeof(entry))) != 0)
666 goto fail;
667
668 /* Copy the data block */
669 old_data = old_data_base + (off_t)old_dslot * priv->block_size;
670 new_data = new_data_base + (off_t)new_dslot * priv->block_size;
671 if ((r = s3b_dcache_read(priv, old_data, block_buf, priv->block_size)) != 0)
672 goto fail;
673 if ((r = s3b_dcache_write2(priv, new_fd, tempfile, new_data, block_buf, priv->block_size)) != 0)
674 goto fail;
675
676 /* Advance to the next slot */
677 new_dslot++;
678 }
679 }
680
681 done:
682 /* Close the new file */
683 if (close(new_fd) == -1) {
684 (*priv->log)(LOG_ERR, "error closing temporary cache file `%s': %s", tempfile, strerror(r));
685 goto fail;
686 }
687 new_fd = -1;
688
689 /* Replace old cache file with new cache file */
690 if (rename(tempfile, priv->filename) == -1) {
691 r = errno;
692 (*priv->log)(LOG_ERR, "error renaming `%s' to `%s': %s", tempfile, priv->filename, strerror(r));
693 goto fail;
694 }
695 free(tempfile);
696 tempfile = NULL;
697
698 /* Update flags */
699 priv->flags = new_header.flags;
700
701 /* Close old file to release it and we're done */
702 close(priv->fd);
703 priv->fd = -1;
704 r = 0;
705
706 fail:
707 /* Clean up */
708 if (block_buf != NULL)
709 free(block_buf);
710 if (new_fd != -1)
711 (void)close(new_fd);
712 if (tempfile != NULL) {
713 (void)unlink(tempfile);
714 free(tempfile);
715 }
716 return r;
717 }
718
719 static int
s3b_dcache_create_file(struct s3b_dcache * priv,int * fdp,const char * filename,u_int max_blocks,struct file_header * headerp)720 s3b_dcache_create_file(struct s3b_dcache *priv, int *fdp, const char *filename, u_int max_blocks, struct file_header *headerp)
721 {
722 struct file_header header;
723 int r;
724
725 /* Initialize header */
726 memset(&header, 0, sizeof(header));
727 header.signature = DCACHE_SIGNATURE;
728 header.flags = HDRFLG_NEW_FORMAT;
729 header.header_size = HDR_SIZE(header.flags);
730 header.u_int_size = sizeof(u_int);
731 header.s3b_block_t_size = sizeof(s3b_block_t);
732 header.block_size = priv->block_size;
733 header.max_blocks = priv->max_blocks;
734 header.data_align = getpagesize();
735
736 /* Create file */
737 if ((*fdp = open(filename, O_RDWR|O_CREAT|O_EXCL, 0644)) == -1) {
738 r = errno;
739 (*priv->log)(LOG_ERR, "can't create file `%s': %s", filename, strerror(r));
740 return r;
741 }
742
743 /* Write header */
744 if ((r = s3b_dcache_write2(priv, *fdp, filename, (off_t)0, &header, sizeof(header))) != 0) {
745 (*priv->log)(LOG_ERR, "error initializing cache file `%s': %s", filename, strerror(r));
746 goto fail;
747 }
748
749 /* Extend the file to the required length; the directory will be filled with zeroes */
750 if (ftruncate(*fdp, sizeof(header)) == -1 || ftruncate(*fdp, DIR_OFFSET(header.flags, max_blocks)) == -1) {
751 r = errno;
752 (*priv->log)(LOG_ERR, "error initializing cache file `%s': %s", filename, strerror(r));
753 goto fail;
754 }
755
756 /* Done */
757 if (headerp != NULL)
758 *headerp = header;
759 return 0;
760
761 fail:
762 (void)unlink(filename);
763 (void)close(*fdp);
764 *fdp = -1;
765 return r;
766 }
767
768 static int
s3b_dcache_init_free_list(struct s3b_dcache * priv,s3b_dcache_visit_t * visitor,void * arg,u_int visit_dirty)769 s3b_dcache_init_free_list(struct s3b_dcache *priv, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty)
770 {
771 off_t required_size;
772 struct stat sb;
773 u_int num_entries;
774 u_int num_dslots_used;
775 u_int base_dslot;
776 u_int i;
777 int r;
778
779 /* Logging */
780 (*priv->log)(LOG_INFO, "reading meta-data from cache file `%s'", priv->filename);
781 assert(visitor != NULL);
782
783 /* Inspect all directory entries */
784 for (num_dslots_used = base_dslot = 0; base_dslot < priv->max_blocks; base_dslot += num_entries) {
785 char buffer[DIRECTORY_READ_CHUNK * DIR_ENTSIZE(priv->flags)];
786
787 /* Read in the next chunk of directory entries */
788 num_entries = priv->max_blocks - base_dslot;
789 if (num_entries > DIRECTORY_READ_CHUNK)
790 num_entries = DIRECTORY_READ_CHUNK;
791 if ((r = s3b_dcache_read(priv, DIR_OFFSET(priv->flags, base_dslot), buffer, num_entries * DIR_ENTSIZE(priv->flags))) != 0) {
792 (*priv->log)(LOG_ERR, "error reading cache file `%s' directory: %s", priv->filename, strerror(r));
793 return r;
794 }
795
796 /* For each dslot: if free, add to the free list, else let visitor decide what to do */
797 for (i = 0; i < num_entries; i++) {
798 const u_int dslot = base_dslot + i;
799 struct dir_entry entry;
800
801 memset(&entry, 0, sizeof(entry));
802 memcpy(&entry, buffer + i * DIR_ENTSIZE(priv->flags), DIR_ENTSIZE(priv->flags));
803 if (memcmp(&entry, &zero_entry, sizeof(entry)) == 0) {
804 if ((r = s3b_dcache_push(priv, dslot)) != 0)
805 return r;
806 } else if ((entry.flags & ENTFLG_DIRTY) != 0 && !visit_dirty) { /* visitor doesn't want dirties, so just nuke it */
807 if ((r = s3b_dcache_write_entry(priv, dslot, &zero_entry)) != 0)
808 return r;
809 if ((r = s3b_dcache_push(priv, dslot)) != 0)
810 return r;
811 } else {
812 priv->num_alloc++;
813 if (dslot + 1 > num_dslots_used) /* keep track of the number of dslots in use */
814 num_dslots_used = dslot + 1;
815 if ((r = (*visitor)(arg, dslot, entry.block_num, (entry.flags & ENTFLG_DIRTY) == 0 ? entry.md5 : NULL)) != 0)
816 return r;
817 }
818 }
819 }
820
821 /* Reverse the free list so we allocate lower numbered slots first */
822 for (i = 0; i < priv->free_list_len / 2; i++) {
823 const s3b_block_t temp = priv->free_list[i];
824
825 priv->free_list[i] = priv->free_list[priv->free_list_len - i - 1];
826 priv->free_list[priv->free_list_len - i - 1] = temp;
827 }
828
829 /* Verify the cache file is not truncated */
830 required_size = DIR_OFFSET(priv->flags, priv->max_blocks);
831 if (num_dslots_used > 0) {
832 if (required_size < DATA_OFFSET(priv, num_dslots_used))
833 required_size = DATA_OFFSET(priv, num_dslots_used);
834 }
835 if (fstat(priv->fd, &sb) == -1) {
836 r = errno;
837 (*priv->log)(LOG_ERR, "error reading cache file `%s' length: %s", priv->filename, strerror(r));
838 return r;
839 }
840 if (sb.st_size < required_size) {
841 (*priv->log)(LOG_ERR, "cache file `%s' is truncated (has size %ju < %ju bytes)",
842 priv->filename, (uintmax_t)sb.st_size, (uintmax_t)required_size);
843 return EINVAL;
844 }
845
846 /* Discard any unreferenced data beyond the last entry */
847 if (sb.st_size > required_size && ftruncate(priv->fd, required_size) == -1) {
848 r = errno;
849 (*priv->log)(LOG_ERR, "error trimming cache file `%s' to %ju bytes: %s",
850 priv->filename, (uintmax_t)required_size, strerror(r));
851 return EINVAL;
852 }
853
854 /* Report results */
855 (*priv->log)(LOG_INFO, "loaded cache file `%s' with %u free and %u used blocks (max index %u)",
856 priv->filename, priv->free_list_len, priv->max_blocks - priv->free_list_len, num_dslots_used);
857
858 /* Done */
859 return 0;
860 }
861
862 /*
863 * Push a dslot onto the free list.
864 */
865 static int
s3b_dcache_push(struct s3b_dcache * priv,u_int dslot)866 s3b_dcache_push(struct s3b_dcache *priv, u_int dslot)
867 {
868 /* Sanity check */
869 assert(dslot < priv->max_blocks);
870 assert(priv->free_list_len < priv->max_blocks);
871
872 /* Grow the free list array if necessary */
873 if (priv->free_list_alloc == priv->free_list_len) {
874 s3b_block_t *new_free_list;
875 s3b_block_t new_free_list_alloc;
876 int r;
877
878 new_free_list_alloc = priv->free_list_alloc == 0 ? 1024 : 2 * priv->free_list_alloc;
879 if ((new_free_list = realloc(priv->free_list, new_free_list_alloc * sizeof(*new_free_list))) == NULL) {
880 r = errno;
881 (*priv->log)(LOG_ERR, "realloc: %s", strerror(r));
882 return r;
883 }
884 priv->free_list = new_free_list;
885 priv->free_list_alloc = new_free_list_alloc;
886 }
887
888 /* Add new dslot */
889 assert(priv->free_list_len < priv->free_list_alloc);
890 priv->free_list[priv->free_list_len++] = dslot;
891 return 0;
892 }
893
894 /*
895 * Pop the next dslot off of the free list. There must be one.
896 */
897 static void
s3b_dcache_pop(struct s3b_dcache * priv,u_int * dslotp)898 s3b_dcache_pop(struct s3b_dcache *priv, u_int *dslotp)
899 {
900 /* Sanity check */
901 assert(priv->free_list_len > 0);
902
903 /* Pop off dslot at the head of the list */
904 *dslotp = priv->free_list[--priv->free_list_len];
905 assert(*dslotp < priv->max_blocks);
906
907 /* See if we can give back some memory */
908 if (priv->free_list_alloc > 1024 && priv->free_list_len <= priv->free_list_alloc / 4) {
909 s3b_block_t *new_free_list;
910 s3b_block_t new_free_list_alloc;
911
912 new_free_list_alloc = priv->free_list_alloc / 4;
913 if ((new_free_list = realloc(priv->free_list, new_free_list_alloc * sizeof(*new_free_list))) == NULL)
914 (*priv->log)(LOG_ERR, "can't shrink dcache free list: realloc: %s (ignored)", strerror(errno));
915 else {
916 priv->free_list = new_free_list;
917 priv->free_list_alloc = new_free_list_alloc;
918 }
919 }
920 assert(priv->free_list_len <= priv->free_list_alloc);
921 }
922
923 static int
s3b_dcache_read(struct s3b_dcache * priv,off_t offset,void * data,size_t len)924 s3b_dcache_read(struct s3b_dcache *priv, off_t offset, void *data, size_t len)
925 {
926 size_t sofar;
927 ssize_t r;
928
929 for (sofar = 0; sofar < len; sofar += r) {
930 const off_t posn = offset + sofar;
931
932 if ((r = pread(priv->fd, (char *)data + sofar, len - sofar, offset + sofar)) == -1) {
933 (*priv->log)(LOG_ERR, "error reading cache file `%s' at offset %ju: %s",
934 priv->filename, (uintmax_t)posn, strerror(r));
935 return r;
936 }
937 if (r == 0) { /* truncated input */
938 (*priv->log)(LOG_ERR, "error reading cache file `%s' at offset %ju: file is truncated",
939 priv->filename, (uintmax_t)posn);
940 return EINVAL;
941 }
942 }
943 return 0;
944 }
945
946 static int
s3b_dcache_write(struct s3b_dcache * priv,off_t offset,const void * data,size_t len)947 s3b_dcache_write(struct s3b_dcache *priv, off_t offset, const void *data, size_t len)
948 {
949 return s3b_dcache_write2(priv, priv->fd, priv->filename, offset, data, len);
950 }
951
952 static int
s3b_dcache_write2(struct s3b_dcache * priv,int fd,const char * filename,off_t offset,const void * data,size_t len)953 s3b_dcache_write2(struct s3b_dcache *priv, int fd, const char *filename, off_t offset, const void *data, size_t len)
954 {
955 size_t sofar;
956 ssize_t r;
957
958 for (sofar = 0; sofar < len; sofar += r) {
959 const off_t posn = offset + sofar;
960
961 if ((r = pwrite(fd, (const char *)data + sofar, len - sofar, offset + sofar)) == -1) {
962 (*priv->log)(LOG_ERR, "error writing cache file `%s' at offset %ju: %s",
963 filename, (uintmax_t)posn, strerror(r));
964 return r;
965 }
966 }
967 return 0;
968 }
969
970