1 #include "bsdtar_platform.h"
2 
3 #include <sys/stat.h>
4 #include <sys/types.h>
5 
6 #include <assert.h>
7 #include <errno.h>
8 #include <inttypes.h>
9 #include <limits.h>
10 #include <stddef.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <unistd.h>
16 
17 #include "ctassert.h"
18 #include "dirutil.h"
19 #include "rwhashtab.h"
20 #include "sysendian.h"
21 #include "warnp.h"
22 
23 #include "chunks_internal.h"
24 
25 /* On-disk extra data statistics structure; integers are little-endian. */
26 struct chunkstats_external {
27 	uint8_t nchunks[8];	/* Number of files. */
28 	uint8_t s_len[8];	/* Sum of file lengths. */
29 	uint8_t s_zlen[8];	/* Sum of compressed lengths. */
30 };
31 CTASSERT(sizeof(struct chunkstats_external) == 24);
32 
33 /* On-disk chunk metadata structure; integers are little-endian. */
34 struct chunkdata_external {
35 	uint8_t hash[32];	/* HMAC of chunk. */
36 	uint8_t len[4];		/* Length of chunk. */
37 	uint8_t zlen[4];	/* Compressed length of chunk. */
38 	uint8_t nrefs[4];	/* Number of existing tapes using this. */
39 	uint8_t ncopies[4];	/* Number of copies of this chunk. */
40 };
41 CTASSERT(sizeof(struct chunkdata_external) == 48);
42 
43 static int callback_write(void * rec, void * cookie);
44 static int callback_free(void * rec, void * cookie);
45 
46 /**
47  * callback_write(rec, cookie):
48  * Convert chunkdata record ${rec} into a struct chunkdata_external and
49  * write it to the FILE * ${cookie}; but don't write entries with nrefs == 0.
50  */
51 static int
callback_write(void * rec,void * cookie)52 callback_write(void * rec, void * cookie)
53 {
54 	struct chunkdata_external che;
55 	struct chunkdata * ch = rec;
56 	FILE * f = cookie;
57 
58 	/* If nrefs == 0, return without writing anything. */
59 	if (ch->nrefs == 0)
60 		return (0);
61 
62 	/* Convert to on-disk format. */
63 	memcpy(che.hash, ch->hash, 32);
64 	le32enc(che.len, ch->len);
65 	le32enc(che.zlen, ch->zlen_flags & CHDATA_ZLEN);
66 	le32enc(che.nrefs, ch->nrefs);
67 	le32enc(che.ncopies, ch->ncopies);
68 
69 	/* Write. */
70 	if (fwrite(&che, sizeof(che), 1, f) != 1) {
71 		warnp("Error writing to chunk directory");
72 		return (-1);
73 	}
74 
75 	/* Success! */
76 	return (0);
77 }
78 
79 /**
80  * callback_free(rec, cookie):
81  * If the chunkdata record ${rec} was allocated via malloc(3), free it.
82  */
83 static int
callback_free(void * rec,void * cookie)84 callback_free(void * rec, void * cookie)
85 {
86 	struct chunkdata * ch = rec;
87 
88 	(void)cookie;	/* UNUSED */
89 
90 	if (ch->zlen_flags & CHDATA_MALLOC)
91 		free(rec);
92 
93 	return (0);
94 }
95 
96 /**
97  * chunks_directory_read(cachepath, dir, stats_unique, stats_all, stats_extra,
98  *     mustexist, statstape):
99  * Read stats_extra statistics (statistics on non-chunks which are stored)
100  * and the chunk directory (if present) from "${cachepath}/directory" into
101  * memory allocated and assigned to ${*dir}; and return a hash table
102  * populated with struct chunkdata records.  Populate stats_all with
103  * statistics for all the chunks listed in the directory (counting
104  * multiplicity) and populate stats_unique with statistics reflecting the
105  * unique chunks.  If ${mustexist}, error out if the directory does not exist.
106  * If ${statstape}, allocate struct chunkdata_statstape records instead.
107  */
108 RWHASHTAB *
chunks_directory_read(const char * cachepath,void ** dir,struct chunkstats * stats_unique,struct chunkstats * stats_all,struct chunkstats * stats_extra,int mustexist,int statstape)109 chunks_directory_read(const char * cachepath, void ** dir,
110     struct chunkstats * stats_unique, struct chunkstats * stats_all,
111     struct chunkstats * stats_extra, int mustexist, int statstape)
112 {
113 	struct chunkdata_external che;
114 	struct chunkstats_external cse;
115 	struct stat sb;
116 	RWHASHTAB * HT;
117 	char * s;
118 	struct chunkdata * p = NULL;
119 	struct chunkdata_statstape * ps = NULL;
120 	FILE * f;
121 	size_t numchunks;
122 
123 	/* Zero statistics. */
124 	chunks_stats_zero(stats_unique);
125 	chunks_stats_zero(stats_all);
126 	chunks_stats_zero(stats_extra);
127 
128 	/* Create a hash table to hold the chunkdata structures. */
129 	HT = rwhashtab_init(offsetof(struct chunkdata, hash), 32);
130 	if (HT == NULL)
131 		goto err0;
132 
133 	/* Bail if we're not using a cache directory. */
134 	if (cachepath == NULL) {
135 		*dir = NULL;
136 		return (HT);
137 	}
138 
139 	/* Construct the string "${cachepath}/directory". */
140 	if (asprintf(&s, "%s/directory", cachepath) == -1) {
141 		warnp("asprintf");
142 		goto err1;
143 	}
144 	if (stat(s, &sb)) {
145 		/* Could not stat ${cachepath}/directory.  Error? */
146 		if (errno != ENOENT) {
147 			warnp("stat(%s)", s);
148 			goto err2;
149 		}
150 
151 		/* The directory doesn't exist; complain if mustexist != 0. */
152 		if (mustexist) {
153 			warn0("Error reading cache directory from %s",
154 			    cachepath);
155 			goto err2;
156 		}
157 
158 		/*
159 		 * ${cachepath}/directory does not exist; set ${*dir} to NULL
160 		 * and return the empty hash table.
161 		 */
162 		free(s);
163 		*dir = NULL;
164 		return (HT);
165 	}
166 
167 	/*
168 	 * Make sure the directory file isn't too large or too small, in
169 	 * order to avoid any possibility of integer overflows.
170 	 */
171 	if ((sb.st_size < 0) ||
172 	    ((sizeof(off_t) > sizeof(size_t)) && (sb.st_size > SIZE_MAX))) {
173 		warn0("on-disk directory has insane size (%jd bytes): %s",
174 		    (intmax_t)(sb.st_size), s);
175 		goto err2;
176 	}
177 
178 	/* Make sure the number of chunks is an integer. */
179 	if (((size_t)sb.st_size - sizeof(struct chunkstats_external)) %
180 	    (sizeof(struct chunkdata_external))) {
181 		warn0("on-disk directory is corrupt: %s", s);
182 		goto err2;
183 	}
184 
185 	/* Compute the number of on-disk chunks. */
186 	numchunks =
187 	    ((size_t)sb.st_size - sizeof(struct chunkstats_external)) /
188 	    sizeof(struct chunkdata_external);
189 
190 	/* Make sure we don't get an integer overflow. */
191 	if (numchunks >= SIZE_MAX / sizeof(struct chunkdata_statstape)) {
192 		warn0("on-disk directory is too large: %s", s);
193 		goto err2;
194 	}
195 
196 	/*
197 	 * Allocate memory to ${*dir} large enough to store a struct
198 	 * chunkdata or struct chunkdata_statstape for each struct
199 	 * chunkdata_external in ${cachepath}/directory.
200 	 */
201 	if (statstape) {
202 		ps = malloc(numchunks * sizeof(struct chunkdata_statstape));
203 		*dir = ps;
204 	} else {
205 		p = malloc(numchunks * sizeof(struct chunkdata));
206 		*dir = p;
207 	}
208 	if (*dir == NULL)
209 		goto err2;
210 
211 	/* Open the directory file. */
212 	if ((f = fopen(s, "r")) == NULL) {
213 		warnp("fopen(%s)", s);
214 		goto err3;
215 	}
216 
217 	/* Read the extra files statistics. */
218 	if (fread(&cse, sizeof(cse), 1, f) != 1) {
219 		warnp("fread(%s)", s);
220 		goto err4;
221 	}
222 	stats_extra->nchunks = le64dec(cse.nchunks);
223 	stats_extra->s_len = le64dec(cse.s_len);
224 	stats_extra->s_zlen = le64dec(cse.s_zlen);
225 
226 	/* Read the chunk structures. */
227 	for (; numchunks != 0; numchunks--) {
228 		/* Set p to point at the struct chunkdata. */
229 		if (statstape)
230 			p = &ps->d;
231 
232 		/* Read the file one record at a time... */
233 		if (fread(&che, sizeof(che), 1, f) != 1) {
234 			warnp("fread(%s)", s);
235 			goto err4;
236 		}
237 
238 		/* ... creating struct chunkdata records... */
239 		memcpy(p->hash, che.hash, 32);
240 		p->len = le32dec(che.len);
241 		p->zlen_flags = le32dec(che.zlen);
242 		p->nrefs = le32dec(che.nrefs);
243 		p->ncopies = le32dec(che.ncopies);
244 
245 		/* ... inserting them into the hash table... */
246 		if (rwhashtab_insert(HT, p))
247 			goto err4;
248 
249 #if UINT32_MAX > SSIZE_MAX
250 		/* ... paranoid check for number of copies... */
251 		if (p->ncopies > SSIZE_MAX)
252 			warn0("More than %zd copies of a chunk; "
253 			    "data is ok but stats may be inaccurate",
254 			    SSIZE_MAX);
255 #endif
256 
257 		/* ... and updating the statistics. */
258 		chunks_stats_add(stats_unique, p->len, p->zlen_flags, 1);
259 		chunks_stats_add(stats_all, p->len, p->zlen_flags,
260 		    (ssize_t)p->ncopies);
261 
262 		/* Sanity check. */
263 		if ((p->len == 0) || (p->zlen_flags == 0) || (p->nrefs == 0)) {
264 			warn0("on-disk directory is corrupt: %s", s);
265 			goto err4;
266 		}
267 
268 		/* Move to next record. */
269 		if (statstape)
270 			ps++;
271 		else
272 			p++;
273 	}
274 	if (fclose(f)) {
275 		warnp("fclose(%s)", s);
276 		goto err3;
277 	}
278 
279 	/* Free string allocated by asprintf. */
280 	free(s);
281 
282 	/* Success! */
283 	return (HT);
284 
285 err4:
286 	fclose(f);
287 err3:
288 	free(*dir);
289 err2:
290 	free(s);
291 err1:
292 	rwhashtab_free(HT);
293 err0:
294 	/* Failure! */
295 	return (NULL);
296 }
297 
298 /**
299  * chunks_directory_write(cachepath, HT, stats_extra, suff):
300  * Write stats_extra statistics and the contents of the hash table ${HT} of
301  * struct chunkdata records to a new chunk directory in
302  * "${cachepath}/directory${suff}".
303  */
304 int
chunks_directory_write(const char * cachepath,RWHASHTAB * HT,struct chunkstats * stats_extra,const char * suff)305 chunks_directory_write(const char * cachepath, RWHASHTAB * HT,
306     struct chunkstats * stats_extra, const char * suff)
307 {
308 	struct chunkstats_external cse;
309 	FILE * f;
310 	char * s;
311 	int fd;
312 
313 	/* The caller must pass the cachepath, and a suffix to use. */
314 	assert(cachepath != NULL);
315 	assert(suff != NULL);
316 
317 	/* Construct the path to the new chunk directory. */
318 	if (asprintf(&s, "%s/directory%s", cachepath, suff) == -1) {
319 		warnp("asprintf");
320 		goto err0;
321 	}
322 
323 	/* Create the new chunk directory. */
324 	if ((f = fopen(s, "w")) == NULL) {
325 		warnp("fopen(%s)", s);
326 		goto err1;
327 	}
328 
329 	/* Write the extra files statistics. */
330 	le64enc(cse.nchunks, stats_extra->nchunks);
331 	le64enc(cse.s_len, stats_extra->s_len);
332 	le64enc(cse.s_zlen, stats_extra->s_zlen);
333 	if (fwrite(&cse, sizeof(cse), 1, f) != 1) {
334 		warnp("Error writing to chunk directory");
335 		goto err2;
336 	}
337 
338 	/* Write the hash table entries to the new chunk directory. */
339 	if (rwhashtab_foreach(HT, callback_write, f))
340 		goto err2;
341 
342 	/* Call fsync on the new chunk directory and close it. */
343 	if (fflush(f)) {
344 		warnp("fflush(%s)", s);
345 		goto err2;
346 	}
347 	if ((fd = fileno(f)) == -1) {
348 		warnp("fileno(%s)", s);
349 		goto err2;
350 	}
351 	if (fsync(fd)) {
352 		warnp("fsync(%s)", s);
353 		goto err2;
354 	}
355 	if (fclose(f)) {
356 		warnp("fclose(%s)", s);
357 		goto err1;
358 	}
359 
360 	/* Free string allocated by asprintf. */
361 	free(s);
362 
363 	/* Success! */
364 	return (0);
365 
366 err2:
367 	fclose(f);
368 err1:
369 	free(s);
370 err0:
371 	/* Failure! */
372 	return (-1);
373 }
374 
375 /**
376  * chunks_directory_exists(cachepath):
377  * Return 1 if the /directory file exists within ${cachepath}, 0 if it does
378  * not, or -1 if there is an error.
379  */
380 int
chunks_directory_exists(const char * cachepath)381 chunks_directory_exists(const char * cachepath)
382 {
383 	char * directory_filename;
384 	struct stat sb;
385 	int rc;
386 
387 	/* Prepare filename. */
388 	if (asprintf(&directory_filename, "%s/directory", cachepath) == -1) {
389 		rc = -1;
390 		goto done;
391 	}
392 
393 	/* Check if file exists. */
394 	if (stat(directory_filename, &sb) == 0) {
395 		/* File exists. */
396 		rc = 1;
397 	} else {
398 		if (errno == ENOENT) {
399 			/* File does not exist. */
400 			rc = 0;
401 		} else {
402 			/* Other error. */
403 			warnp("stat(%s)", directory_filename);
404 			rc = -1;
405 		}
406 	}
407 
408 	/* Clean up memory. */
409 	free(directory_filename);
410 
411 done:
412 	/* Return result code. */
413 	return (rc);
414 }
415 
416 /**
417  * chunks_directory_free(htab, dir):
418  * Free the hash table ${htab} of struct chunkdata records, all of its
419  * elements, and ${dir}.
420  */
421 void
chunks_directory_free(RWHASHTAB * HT,void * dir)422 chunks_directory_free(RWHASHTAB * HT, void * dir)
423 {
424 
425 	/* Free records in the hash table. */
426 	rwhashtab_foreach(HT, callback_free, NULL);
427 
428 	/* Free the hash table itself. */
429 	rwhashtab_free(HT);
430 
431 	/* Free the records which were allocated en masse. */
432 	free(dir);
433 }
434 
435 /**
436  * chunks_directory_commit(cachepath, osuff, nsuff):
437  * If ${cachepath}/directory${osuff} exists, move it to
438  * ${cachepath}/directory${nsuff} (replacing anything already there).
439  */
440 int
chunks_directory_commit(const char * cachepath,const char * osuff,const char * nsuff)441 chunks_directory_commit(const char * cachepath, const char * osuff,
442     const char * nsuff)
443 {
444 	struct stat sbs;
445 	struct stat sbt;
446 	char * s;
447 	char * t;
448 
449 	/* The caller must pass the cachepath, and suffices to use. */
450 	assert(cachepath != NULL);
451 	assert(nsuff != NULL);
452 	assert(osuff != NULL);
453 
454 	/* Construct file names. */
455 	if (asprintf(&s, "%s/directory%s", cachepath, nsuff) == -1) {
456 		warnp("asprintf");
457 		goto err0;
458 	}
459 	if (asprintf(&t, "%s/directory%s", cachepath, osuff) == -1) {
460 		warnp("asprintf");
461 		goto err1;
462 	}
463 
464 	/*
465 	 * If ${cachedir}/directory.tmp does not exist, the transaction was
466 	 * already committed from the perspective of the chunk layer; so we
467 	 * can free memory and return.
468 	 */
469 	if (lstat(t, &sbt)) {
470 		if (errno == ENOENT)
471 			goto done;
472 
473 		warnp("lstat(%s)", t);
474 		goto err2;
475 	}
476 
477 	/*
478 	 * If ${cachedir}/directory exists and is not the same file as
479 	 * ${cachedir}/directory.tmp, remove ${cachedir}/directory and
480 	 * create a hard link from ${cachedir}/directory.tmp.
481 	 */
482 	if (lstat(s, &sbs)) {
483 		if (errno != ENOENT) {
484 			warnp("lstat(%s)", s);
485 			goto err2;
486 		}
487 	} else {
488 		if (sbs.st_ino != sbt.st_ino) {
489 			/* Remove ${cachedir}/directory. */
490 			if (unlink(s)) {
491 				warnp("unlink(%s)", s);
492 				goto err2;
493 			}
494 		} else {
495 			/*
496 			 * We're replaying and we've already linked the two
497 			 * paths; skip ahead to unlinking the .tmp file, as
498 			 * otherwise link(2) will fail with EEXIST.
499 			 */
500 			goto linkdone;
501 		}
502 	}
503 
504 	/**
505 	 * We want to move ${t} to ${s} in a crash-proof way.  Unfortunately
506 	 * the POSIX rename(2) syscall merely guarantees that if ${s} already
507 	 * exists then ${s} will always exist -- not that the file being
508 	 * renamed will always exist.  Depending on how crash-proof the
509 	 * filesystem is, that second requirement might not be satisfied.
510 	 *
511 	 * Ideally we would like to solve this problem by creating a hard
512 	 * link, syncing the directory, then unlinking the old file; but we
513 	 * might be running on a filesystem/OS which doesn't support hard
514 	 * links (e.g., FAT32).
515 	 *
516 	 * If the link(2) call fails with ENOSYS (sensible failure code for
517 	 * not supporting hard links) or EPERM (Linux's idea of a joke?), we
518 	 * fall back to using rename(2) instead of link/sync/unlink.
519 	 */
520 
521 	/* Create a link from ${cachedir}/directory.tmp. */
522 	if (link(t, s)) {
523 		if ((errno != ENOSYS) && (errno != EPERM)) {
524 			warnp("link(%s, %s)", t, s);
525 			goto err2;
526 		}
527 
528 		/* Use rename(2) instead. */
529 		if (rename(t, s)) {
530 			warnp("rename(%s, %s)", t, s);
531 			goto err2;
532 		}
533 	} else {
534 linkdone:
535 		/* Make sure ${cachedir} is flushed to disk. */
536 		if (dirutil_fsyncdir(cachepath))
537 			goto err2;
538 
539 		/* Remove ${cachedir}/directory.tmp. */
540 		if (unlink(t)) {
541 			warnp("unlink(%s)", t);
542 			goto err2;
543 		}
544 	}
545 
546 	/* Finally, sync the directory one last time. */
547 	if (dirutil_fsyncdir(cachepath))
548 		goto err2;
549 
550 done:
551 	free(t);
552 	free(s);
553 
554 	/* Success! */
555 	return (0);
556 
557 err2:
558 	free(t);
559 err1:
560 	free(s);
561 err0:
562 	/* Failure! */
563 	return (-1);
564 }
565