1 #include "bsdtar_platform.h"
2
3 #include <sys/stat.h>
4 #include <sys/types.h>
5
6 #include <assert.h>
7 #include <errno.h>
8 #include <inttypes.h>
9 #include <limits.h>
10 #include <stddef.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <unistd.h>
16
17 #include "ctassert.h"
18 #include "dirutil.h"
19 #include "rwhashtab.h"
20 #include "sysendian.h"
21 #include "warnp.h"
22
23 #include "chunks_internal.h"
24
25 /* On-disk extra data statistics structure; integers are little-endian. */
26 struct chunkstats_external {
27 uint8_t nchunks[8]; /* Number of files. */
28 uint8_t s_len[8]; /* Sum of file lengths. */
29 uint8_t s_zlen[8]; /* Sum of compressed lengths. */
30 };
31 CTASSERT(sizeof(struct chunkstats_external) == 24);
32
33 /* On-disk chunk metadata structure; integers are little-endian. */
34 struct chunkdata_external {
35 uint8_t hash[32]; /* HMAC of chunk. */
36 uint8_t len[4]; /* Length of chunk. */
37 uint8_t zlen[4]; /* Compressed length of chunk. */
38 uint8_t nrefs[4]; /* Number of existing tapes using this. */
39 uint8_t ncopies[4]; /* Number of copies of this chunk. */
40 };
41 CTASSERT(sizeof(struct chunkdata_external) == 48);
42
43 static int callback_write(void * rec, void * cookie);
44 static int callback_free(void * rec, void * cookie);
45
46 /**
47 * callback_write(rec, cookie):
48 * Convert chunkdata record ${rec} into a struct chunkdata_external and
49 * write it to the FILE * ${cookie}; but don't write entries with nrefs == 0.
50 */
51 static int
callback_write(void * rec,void * cookie)52 callback_write(void * rec, void * cookie)
53 {
54 struct chunkdata_external che;
55 struct chunkdata * ch = rec;
56 FILE * f = cookie;
57
58 /* If nrefs == 0, return without writing anything. */
59 if (ch->nrefs == 0)
60 return (0);
61
62 /* Convert to on-disk format. */
63 memcpy(che.hash, ch->hash, 32);
64 le32enc(che.len, ch->len);
65 le32enc(che.zlen, ch->zlen_flags & CHDATA_ZLEN);
66 le32enc(che.nrefs, ch->nrefs);
67 le32enc(che.ncopies, ch->ncopies);
68
69 /* Write. */
70 if (fwrite(&che, sizeof(che), 1, f) != 1) {
71 warnp("Error writing to chunk directory");
72 return (-1);
73 }
74
75 /* Success! */
76 return (0);
77 }
78
79 /**
80 * callback_free(rec, cookie):
81 * If the chunkdata record ${rec} was allocated via malloc(3), free it.
82 */
83 static int
callback_free(void * rec,void * cookie)84 callback_free(void * rec, void * cookie)
85 {
86 struct chunkdata * ch = rec;
87
88 (void)cookie; /* UNUSED */
89
90 if (ch->zlen_flags & CHDATA_MALLOC)
91 free(rec);
92
93 return (0);
94 }
95
96 /**
97 * chunks_directory_read(cachepath, dir, stats_unique, stats_all, stats_extra,
98 * mustexist, statstape):
99 * Read stats_extra statistics (statistics on non-chunks which are stored)
100 * and the chunk directory (if present) from "${cachepath}/directory" into
101 * memory allocated and assigned to ${*dir}; and return a hash table
102 * populated with struct chunkdata records. Populate stats_all with
103 * statistics for all the chunks listed in the directory (counting
104 * multiplicity) and populate stats_unique with statistics reflecting the
105 * unique chunks. If ${mustexist}, error out if the directory does not exist.
106 * If ${statstape}, allocate struct chunkdata_statstape records instead.
107 */
108 RWHASHTAB *
chunks_directory_read(const char * cachepath,void ** dir,struct chunkstats * stats_unique,struct chunkstats * stats_all,struct chunkstats * stats_extra,int mustexist,int statstape)109 chunks_directory_read(const char * cachepath, void ** dir,
110 struct chunkstats * stats_unique, struct chunkstats * stats_all,
111 struct chunkstats * stats_extra, int mustexist, int statstape)
112 {
113 struct chunkdata_external che;
114 struct chunkstats_external cse;
115 struct stat sb;
116 RWHASHTAB * HT;
117 char * s;
118 struct chunkdata * p = NULL;
119 struct chunkdata_statstape * ps = NULL;
120 FILE * f;
121 size_t numchunks;
122
123 /* Zero statistics. */
124 chunks_stats_zero(stats_unique);
125 chunks_stats_zero(stats_all);
126 chunks_stats_zero(stats_extra);
127
128 /* Create a hash table to hold the chunkdata structures. */
129 HT = rwhashtab_init(offsetof(struct chunkdata, hash), 32);
130 if (HT == NULL)
131 goto err0;
132
133 /* Bail if we're not using a cache directory. */
134 if (cachepath == NULL) {
135 *dir = NULL;
136 return (HT);
137 }
138
139 /* Construct the string "${cachepath}/directory". */
140 if (asprintf(&s, "%s/directory", cachepath) == -1) {
141 warnp("asprintf");
142 goto err1;
143 }
144 if (stat(s, &sb)) {
145 /* Could not stat ${cachepath}/directory. Error? */
146 if (errno != ENOENT) {
147 warnp("stat(%s)", s);
148 goto err2;
149 }
150
151 /* The directory doesn't exist; complain if mustexist != 0. */
152 if (mustexist) {
153 warn0("Error reading cache directory from %s",
154 cachepath);
155 goto err2;
156 }
157
158 /*
159 * ${cachepath}/directory does not exist; set ${*dir} to NULL
160 * and return the empty hash table.
161 */
162 free(s);
163 *dir = NULL;
164 return (HT);
165 }
166
167 /*
168 * Make sure the directory file isn't too large or too small, in
169 * order to avoid any possibility of integer overflows.
170 */
171 if ((sb.st_size < 0) ||
172 ((sizeof(off_t) > sizeof(size_t)) && (sb.st_size > SIZE_MAX))) {
173 warn0("on-disk directory has insane size (%jd bytes): %s",
174 (intmax_t)(sb.st_size), s);
175 goto err2;
176 }
177
178 /* Make sure the number of chunks is an integer. */
179 if (((size_t)sb.st_size - sizeof(struct chunkstats_external)) %
180 (sizeof(struct chunkdata_external))) {
181 warn0("on-disk directory is corrupt: %s", s);
182 goto err2;
183 }
184
185 /* Compute the number of on-disk chunks. */
186 numchunks =
187 ((size_t)sb.st_size - sizeof(struct chunkstats_external)) /
188 sizeof(struct chunkdata_external);
189
190 /* Make sure we don't get an integer overflow. */
191 if (numchunks >= SIZE_MAX / sizeof(struct chunkdata_statstape)) {
192 warn0("on-disk directory is too large: %s", s);
193 goto err2;
194 }
195
196 /*
197 * Allocate memory to ${*dir} large enough to store a struct
198 * chunkdata or struct chunkdata_statstape for each struct
199 * chunkdata_external in ${cachepath}/directory.
200 */
201 if (statstape) {
202 ps = malloc(numchunks * sizeof(struct chunkdata_statstape));
203 *dir = ps;
204 } else {
205 p = malloc(numchunks * sizeof(struct chunkdata));
206 *dir = p;
207 }
208 if (*dir == NULL)
209 goto err2;
210
211 /* Open the directory file. */
212 if ((f = fopen(s, "r")) == NULL) {
213 warnp("fopen(%s)", s);
214 goto err3;
215 }
216
217 /* Read the extra files statistics. */
218 if (fread(&cse, sizeof(cse), 1, f) != 1) {
219 warnp("fread(%s)", s);
220 goto err4;
221 }
222 stats_extra->nchunks = le64dec(cse.nchunks);
223 stats_extra->s_len = le64dec(cse.s_len);
224 stats_extra->s_zlen = le64dec(cse.s_zlen);
225
226 /* Read the chunk structures. */
227 for (; numchunks != 0; numchunks--) {
228 /* Set p to point at the struct chunkdata. */
229 if (statstape)
230 p = &ps->d;
231
232 /* Read the file one record at a time... */
233 if (fread(&che, sizeof(che), 1, f) != 1) {
234 warnp("fread(%s)", s);
235 goto err4;
236 }
237
238 /* ... creating struct chunkdata records... */
239 memcpy(p->hash, che.hash, 32);
240 p->len = le32dec(che.len);
241 p->zlen_flags = le32dec(che.zlen);
242 p->nrefs = le32dec(che.nrefs);
243 p->ncopies = le32dec(che.ncopies);
244
245 /* ... inserting them into the hash table... */
246 if (rwhashtab_insert(HT, p))
247 goto err4;
248
249 #if UINT32_MAX > SSIZE_MAX
250 /* ... paranoid check for number of copies... */
251 if (p->ncopies > SSIZE_MAX)
252 warn0("More than %zd copies of a chunk; "
253 "data is ok but stats may be inaccurate",
254 SSIZE_MAX);
255 #endif
256
257 /* ... and updating the statistics. */
258 chunks_stats_add(stats_unique, p->len, p->zlen_flags, 1);
259 chunks_stats_add(stats_all, p->len, p->zlen_flags,
260 (ssize_t)p->ncopies);
261
262 /* Sanity check. */
263 if ((p->len == 0) || (p->zlen_flags == 0) || (p->nrefs == 0)) {
264 warn0("on-disk directory is corrupt: %s", s);
265 goto err4;
266 }
267
268 /* Move to next record. */
269 if (statstape)
270 ps++;
271 else
272 p++;
273 }
274 if (fclose(f)) {
275 warnp("fclose(%s)", s);
276 goto err3;
277 }
278
279 /* Free string allocated by asprintf. */
280 free(s);
281
282 /* Success! */
283 return (HT);
284
285 err4:
286 fclose(f);
287 err3:
288 free(*dir);
289 err2:
290 free(s);
291 err1:
292 rwhashtab_free(HT);
293 err0:
294 /* Failure! */
295 return (NULL);
296 }
297
298 /**
299 * chunks_directory_write(cachepath, HT, stats_extra, suff):
300 * Write stats_extra statistics and the contents of the hash table ${HT} of
301 * struct chunkdata records to a new chunk directory in
302 * "${cachepath}/directory${suff}".
303 */
304 int
chunks_directory_write(const char * cachepath,RWHASHTAB * HT,struct chunkstats * stats_extra,const char * suff)305 chunks_directory_write(const char * cachepath, RWHASHTAB * HT,
306 struct chunkstats * stats_extra, const char * suff)
307 {
308 struct chunkstats_external cse;
309 FILE * f;
310 char * s;
311 int fd;
312
313 /* The caller must pass the cachepath, and a suffix to use. */
314 assert(cachepath != NULL);
315 assert(suff != NULL);
316
317 /* Construct the path to the new chunk directory. */
318 if (asprintf(&s, "%s/directory%s", cachepath, suff) == -1) {
319 warnp("asprintf");
320 goto err0;
321 }
322
323 /* Create the new chunk directory. */
324 if ((f = fopen(s, "w")) == NULL) {
325 warnp("fopen(%s)", s);
326 goto err1;
327 }
328
329 /* Write the extra files statistics. */
330 le64enc(cse.nchunks, stats_extra->nchunks);
331 le64enc(cse.s_len, stats_extra->s_len);
332 le64enc(cse.s_zlen, stats_extra->s_zlen);
333 if (fwrite(&cse, sizeof(cse), 1, f) != 1) {
334 warnp("Error writing to chunk directory");
335 goto err2;
336 }
337
338 /* Write the hash table entries to the new chunk directory. */
339 if (rwhashtab_foreach(HT, callback_write, f))
340 goto err2;
341
342 /* Call fsync on the new chunk directory and close it. */
343 if (fflush(f)) {
344 warnp("fflush(%s)", s);
345 goto err2;
346 }
347 if ((fd = fileno(f)) == -1) {
348 warnp("fileno(%s)", s);
349 goto err2;
350 }
351 if (fsync(fd)) {
352 warnp("fsync(%s)", s);
353 goto err2;
354 }
355 if (fclose(f)) {
356 warnp("fclose(%s)", s);
357 goto err1;
358 }
359
360 /* Free string allocated by asprintf. */
361 free(s);
362
363 /* Success! */
364 return (0);
365
366 err2:
367 fclose(f);
368 err1:
369 free(s);
370 err0:
371 /* Failure! */
372 return (-1);
373 }
374
375 /**
376 * chunks_directory_exists(cachepath):
377 * Return 1 if the /directory file exists within ${cachepath}, 0 if it does
378 * not, or -1 if there is an error.
379 */
380 int
chunks_directory_exists(const char * cachepath)381 chunks_directory_exists(const char * cachepath)
382 {
383 char * directory_filename;
384 struct stat sb;
385 int rc;
386
387 /* Prepare filename. */
388 if (asprintf(&directory_filename, "%s/directory", cachepath) == -1) {
389 rc = -1;
390 goto done;
391 }
392
393 /* Check if file exists. */
394 if (stat(directory_filename, &sb) == 0) {
395 /* File exists. */
396 rc = 1;
397 } else {
398 if (errno == ENOENT) {
399 /* File does not exist. */
400 rc = 0;
401 } else {
402 /* Other error. */
403 warnp("stat(%s)", directory_filename);
404 rc = -1;
405 }
406 }
407
408 /* Clean up memory. */
409 free(directory_filename);
410
411 done:
412 /* Return result code. */
413 return (rc);
414 }
415
416 /**
417 * chunks_directory_free(htab, dir):
418 * Free the hash table ${htab} of struct chunkdata records, all of its
419 * elements, and ${dir}.
420 */
421 void
chunks_directory_free(RWHASHTAB * HT,void * dir)422 chunks_directory_free(RWHASHTAB * HT, void * dir)
423 {
424
425 /* Free records in the hash table. */
426 rwhashtab_foreach(HT, callback_free, NULL);
427
428 /* Free the hash table itself. */
429 rwhashtab_free(HT);
430
431 /* Free the records which were allocated en masse. */
432 free(dir);
433 }
434
435 /**
436 * chunks_directory_commit(cachepath, osuff, nsuff):
437 * If ${cachepath}/directory${osuff} exists, move it to
438 * ${cachepath}/directory${nsuff} (replacing anything already there).
439 */
440 int
chunks_directory_commit(const char * cachepath,const char * osuff,const char * nsuff)441 chunks_directory_commit(const char * cachepath, const char * osuff,
442 const char * nsuff)
443 {
444 struct stat sbs;
445 struct stat sbt;
446 char * s;
447 char * t;
448
449 /* The caller must pass the cachepath, and suffices to use. */
450 assert(cachepath != NULL);
451 assert(nsuff != NULL);
452 assert(osuff != NULL);
453
454 /* Construct file names. */
455 if (asprintf(&s, "%s/directory%s", cachepath, nsuff) == -1) {
456 warnp("asprintf");
457 goto err0;
458 }
459 if (asprintf(&t, "%s/directory%s", cachepath, osuff) == -1) {
460 warnp("asprintf");
461 goto err1;
462 }
463
464 /*
465 * If ${cachedir}/directory.tmp does not exist, the transaction was
466 * already committed from the perspective of the chunk layer; so we
467 * can free memory and return.
468 */
469 if (lstat(t, &sbt)) {
470 if (errno == ENOENT)
471 goto done;
472
473 warnp("lstat(%s)", t);
474 goto err2;
475 }
476
477 /*
478 * If ${cachedir}/directory exists and is not the same file as
479 * ${cachedir}/directory.tmp, remove ${cachedir}/directory and
480 * create a hard link from ${cachedir}/directory.tmp.
481 */
482 if (lstat(s, &sbs)) {
483 if (errno != ENOENT) {
484 warnp("lstat(%s)", s);
485 goto err2;
486 }
487 } else {
488 if (sbs.st_ino != sbt.st_ino) {
489 /* Remove ${cachedir}/directory. */
490 if (unlink(s)) {
491 warnp("unlink(%s)", s);
492 goto err2;
493 }
494 } else {
495 /*
496 * We're replaying and we've already linked the two
497 * paths; skip ahead to unlinking the .tmp file, as
498 * otherwise link(2) will fail with EEXIST.
499 */
500 goto linkdone;
501 }
502 }
503
504 /**
505 * We want to move ${t} to ${s} in a crash-proof way. Unfortunately
506 * the POSIX rename(2) syscall merely guarantees that if ${s} already
507 * exists then ${s} will always exist -- not that the file being
508 * renamed will always exist. Depending on how crash-proof the
509 * filesystem is, that second requirement might not be satisfied.
510 *
511 * Ideally we would like to solve this problem by creating a hard
512 * link, syncing the directory, then unlinking the old file; but we
513 * might be running on a filesystem/OS which doesn't support hard
514 * links (e.g., FAT32).
515 *
516 * If the link(2) call fails with ENOSYS (sensible failure code for
517 * not supporting hard links) or EPERM (Linux's idea of a joke?), we
518 * fall back to using rename(2) instead of link/sync/unlink.
519 */
520
521 /* Create a link from ${cachedir}/directory.tmp. */
522 if (link(t, s)) {
523 if ((errno != ENOSYS) && (errno != EPERM)) {
524 warnp("link(%s, %s)", t, s);
525 goto err2;
526 }
527
528 /* Use rename(2) instead. */
529 if (rename(t, s)) {
530 warnp("rename(%s, %s)", t, s);
531 goto err2;
532 }
533 } else {
534 linkdone:
535 /* Make sure ${cachedir} is flushed to disk. */
536 if (dirutil_fsyncdir(cachepath))
537 goto err2;
538
539 /* Remove ${cachedir}/directory.tmp. */
540 if (unlink(t)) {
541 warnp("unlink(%s)", t);
542 goto err2;
543 }
544 }
545
546 /* Finally, sync the directory one last time. */
547 if (dirutil_fsyncdir(cachepath))
548 goto err2;
549
550 done:
551 free(t);
552 free(s);
553
554 /* Success! */
555 return (0);
556
557 err2:
558 free(t);
559 err1:
560 free(s);
561 err0:
562 /* Failure! */
563 return (-1);
564 }
565