1 // Copyright (C) 2009-2019 Joel Rosdahl
2 //
3 // This program is free software; you can redistribute it and/or modify it
4 // under the terms of the GNU General Public License as published by the Free
5 // Software Foundation; either version 3 of the License, or (at your option)
6 // any later version.
7 //
8 // This program is distributed in the hope that it will be useful, but WITHOUT
9 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 // more details.
12 //
13 // You should have received a copy of the GNU General Public License along with
14 // this program; if not, write to the Free Software Foundation, Inc., 51
15 // Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 #include "ccache.h"
18 #include "hashtable_itr.h"
19 #include "hashutil.h"
20 #include "manifest.h"
21 #include "murmurhashneutral2.h"
22 
23 #include <zlib.h>
24 
25 // Sketchy specification of the manifest disk format:
26 //
27 // <magic>         magic number                        (4 bytes)
28 // <version>       file format version                 (1 byte unsigned int)
29 // <hash_size>     size of the hash fields (in bytes)  (1 byte unsigned int)
30 // <reserved>      reserved for future use             (2 bytes)
31 // ----------------------------------------------------------------------------
32 // <n>             number of include file paths        (4 bytes unsigned int)
33 // <path_0>        path to include file                (NUL-terminated string,
34 // ...                                                  at most 1024 bytes)
35 // <path_n-1>
36 // ----------------------------------------------------------------------------
37 // <n>             number of include file hash entries (4 bytes unsigned int)
38 // <index[0]>      index of include file path          (4 bytes unsigned int)
39 // <hash[0]>       hash of include file                (<hash_size> bytes)
40 // <size[0]>       size of include file                (4 bytes unsigned int)
41 // <mtime[0]>      mtime of include file               (8 bytes signed int)
42 // <ctime[0]>      ctime of include file               (8 bytes signed int)
43 // ...
44 // <index[n-1]>
45 // <hash[n-1]>
46 // <size[n-1]>
47 // <mtime[n-1]>
48 // <ctime[n-1]>
49 // ----------------------------------------------------------------------------
50 // <n>             number of object name entries       (4 bytes unsigned int)
51 // <m[0]>          number of include file hash indexes (4 bytes unsigned int)
52 // <index[0][0]>   include file hash index             (4 bytes unsigned int)
53 // ...
54 // <index[0][m[0]-1]>
55 // <hash[0]>       hash part of object name            (<hash_size> bytes)
56 // <size[0]>       size part of object name            (4 bytes unsigned int)
57 // ...
58 // <m[n-1]>        number of include file hash indexes
59 // <index[n-1][0]> include file hash index
60 // ...
61 // <index[n-1][m[n-1]]>
62 // <hash[n-1]>
63 // <size[n-1]>
64 
65 static const uint32_t MAGIC = 0x63436d46U;
66 static const uint32_t MAX_MANIFEST_ENTRIES = 100;
67 static const uint32_t MAX_MANIFEST_FILE_INFO_ENTRIES = 10000;
68 
69 #define ccache_static_assert(e) \
70 	do { enum { ccache_static_assert__ = 1/(e) }; } while (false)
71 
72 struct file_info {
73 	// Index to n_files.
74 	uint32_t index;
75 	// Hash of referenced file.
76 	uint8_t hash[16];
77 	// Size of referenced file.
78 	uint32_t size;
79 	// mtime of referenced file.
80 	int64_t mtime;
81 	// ctime of referenced file.
82 	int64_t ctime;
83 };
84 
85 struct object {
86 	// Number of entries in file_info_indexes.
87 	uint32_t n_file_info_indexes;
88 	// Indexes to file_infos.
89 	uint32_t *file_info_indexes;
90 	// Hash of the object itself.
91 	struct file_hash hash;
92 };
93 
94 struct manifest {
95 	// Version of decoded file.
96 	uint8_t version;
97 
98 	// Reserved for future use.
99 	uint16_t reserved;
100 
101 	// Size of hash fields (in bytes).
102 	uint8_t hash_size;
103 
104 	// Referenced include files.
105 	uint32_t n_files;
106 	char **files;
107 
108 	// Information about referenced include files.
109 	uint32_t n_file_infos;
110 	struct file_info *file_infos;
111 
112 	// Object names plus references to include file hashes.
113 	uint32_t n_objects;
114 	struct object *objects;
115 };
116 
117 struct file_stats {
118 	uint32_t size;
119 	int64_t mtime;
120 	int64_t ctime;
121 };
122 
123 static unsigned int
hash_from_file_info(void * key)124 hash_from_file_info(void *key)
125 {
126 	ccache_static_assert(sizeof(struct file_info) == 40); // No padding.
127 	return murmurhashneutral2(key, sizeof(struct file_info), 0);
128 }
129 
130 static int
file_infos_equal(void * key1,void * key2)131 file_infos_equal(void *key1, void *key2)
132 {
133 	struct file_info *fi1 = (struct file_info *)key1;
134 	struct file_info *fi2 = (struct file_info *)key2;
135 	return fi1->index == fi2->index
136 	       && memcmp(fi1->hash, fi2->hash, 16) == 0
137 	       && fi1->size == fi2->size
138 	       && fi1->mtime == fi2->mtime
139 	       && fi1->ctime == fi2->ctime;
140 }
141 
142 static void
free_manifest(struct manifest * mf)143 free_manifest(struct manifest *mf)
144 {
145 	for (uint32_t i = 0; i < mf->n_files; i++) {
146 		free(mf->files[i]);
147 	}
148 	free(mf->files);
149 	free(mf->file_infos);
150 	for (uint32_t i = 0; i < mf->n_objects; i++) {
151 		free(mf->objects[i].file_info_indexes);
152 	}
153 	free(mf->objects);
154 	free(mf);
155 }
156 
157 #define READ_BYTE(var) \
158 	do { \
159 		int ch_ = gzgetc(f); \
160 		if (ch_ == EOF) { \
161 			goto error; \
162 		} \
163 		(var) = ch_ & 0xFF; \
164 	} while (false)
165 
166 #define READ_INT(size, var) \
167 	do { \
168 		uint64_t u_ = 0; \
169 		for (size_t i_ = 0; i_ < (size); i_++) { \
170 			int ch_ = gzgetc(f); \
171 			if (ch_ == EOF) { \
172 				goto error; \
173 			} \
174 			u_ <<= 8; \
175 			u_ |= ch_ & 0xFF; \
176 		} \
177 		(var) = u_; \
178 	} while (false)
179 
180 #define READ_STR(var) \
181 	do { \
182 		char buf_[1024]; \
183 		size_t i_; \
184 		for (i_ = 0; i_ < sizeof(buf_); i_++) { \
185 			int ch_ = gzgetc(f); \
186 			if (ch_ == EOF) { \
187 				goto error; \
188 			} \
189 			buf_[i_] = ch_; \
190 			if (ch_ == '\0') { \
191 				break; \
192 			} \
193 		} \
194 		if (i_ == sizeof(buf_)) { \
195 			goto error; \
196 		} \
197 		(var) = x_strdup(buf_); \
198 	} while (false)
199 
200 #define READ_BYTES(n, var) \
201 	do { \
202 		for (size_t i_ = 0; i_ < (n); i_++) { \
203 			int ch_ = gzgetc(f); \
204 			if (ch_ == EOF) { \
205 				goto error; \
206 			} \
207 			(var)[i_] = ch_; \
208 		} \
209 	} while (false)
210 
211 static struct manifest *
create_empty_manifest(void)212 create_empty_manifest(void)
213 {
214 	struct manifest *mf = x_malloc(sizeof(*mf));
215 	mf->hash_size = 16;
216 	mf->n_files = 0;
217 	mf->files = NULL;
218 	mf->n_file_infos = 0;
219 	mf->file_infos = NULL;
220 	mf->n_objects = 0;
221 	mf->objects = NULL;
222 
223 	return mf;
224 }
225 
226 static struct manifest *
read_manifest(gzFile f,char ** errmsg)227 read_manifest(gzFile f, char **errmsg)
228 {
229 	*errmsg = NULL;
230 	struct manifest *mf = create_empty_manifest();
231 
232 	uint32_t magic;
233 	READ_INT(4, magic);
234 	if (magic != MAGIC) {
235 		*errmsg = format("Manifest file has bad magic number %u", magic);
236 		goto error;
237 	}
238 
239 	READ_BYTE(mf->version);
240 	if (mf->version != MANIFEST_VERSION) {
241 		*errmsg = format(
242 			"Unknown manifest version (actual %u, expected %u)",
243 			mf->version,
244 			MANIFEST_VERSION);
245 		goto error;
246 	}
247 
248 	READ_BYTE(mf->hash_size);
249 	if (mf->hash_size != 16) {
250 		// Temporary measure until we support different hash algorithms.
251 		*errmsg =
252 			format("Manifest file has unsupported hash size %u", mf->hash_size);
253 		goto error;
254 	}
255 
256 	READ_INT(2, mf->reserved);
257 
258 	READ_INT(4, mf->n_files);
259 	mf->files = x_calloc(mf->n_files, sizeof(*mf->files));
260 	for (uint32_t i = 0; i < mf->n_files; i++) {
261 		READ_STR(mf->files[i]);
262 	}
263 
264 	READ_INT(4, mf->n_file_infos);
265 	mf->file_infos = x_calloc(mf->n_file_infos, sizeof(*mf->file_infos));
266 	for (uint32_t i = 0; i < mf->n_file_infos; i++) {
267 		READ_INT(4, mf->file_infos[i].index);
268 		READ_BYTES(mf->hash_size, mf->file_infos[i].hash);
269 		READ_INT(4, mf->file_infos[i].size);
270 		READ_INT(8, mf->file_infos[i].mtime);
271 		READ_INT(8, mf->file_infos[i].ctime);
272 	}
273 
274 	READ_INT(4, mf->n_objects);
275 	mf->objects = x_calloc(mf->n_objects, sizeof(*mf->objects));
276 	for (uint32_t i = 0; i < mf->n_objects; i++) {
277 		READ_INT(4, mf->objects[i].n_file_info_indexes);
278 		mf->objects[i].file_info_indexes =
279 			x_calloc(mf->objects[i].n_file_info_indexes,
280 			         sizeof(*mf->objects[i].file_info_indexes));
281 		for (uint32_t j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
282 			READ_INT(4, mf->objects[i].file_info_indexes[j]);
283 		}
284 		READ_BYTES(mf->hash_size, mf->objects[i].hash.hash);
285 		READ_INT(4, mf->objects[i].hash.size);
286 	}
287 
288 	return mf;
289 
290 error:
291 	if (!*errmsg) {
292 		*errmsg = x_strdup("Corrupt manifest file");
293 	}
294 	free_manifest(mf);
295 	return NULL;
296 }
297 
298 #define WRITE_INT(size, var) \
299 	do { \
300 		uint64_t u_ = (var); \
301 		uint8_t ch_; \
302 		size_t i_; \
303 		for (i_ = 0; i_ < (size); i_++) { \
304 			ch_ = (u_ >> (8 * ((size) - i_ - 1))); \
305 			if (gzputc(f, ch_) == EOF) { \
306 				goto error; \
307 			} \
308 		} \
309 	} while (false)
310 
311 #define WRITE_STR(var) \
312 	do { \
313 		if (gzputs(f, var) == EOF || gzputc(f, '\0') == EOF) { \
314 			goto error; \
315 		} \
316 	} while (false)
317 
318 #define WRITE_BYTES(n, var) \
319 	do { \
320 		size_t i_; \
321 		for (i_ = 0; i_ < (n); i_++) { \
322 			if (gzputc(f, (var)[i_]) == EOF) { \
323 				goto error; \
324 			} \
325 		} \
326 	} while (false)
327 
328 static int
write_manifest(gzFile f,const struct manifest * mf)329 write_manifest(gzFile f, const struct manifest *mf)
330 {
331 	WRITE_INT(4, MAGIC);
332 	WRITE_INT(1, MANIFEST_VERSION);
333 	WRITE_INT(1, 16);
334 	WRITE_INT(2, 0);
335 
336 	WRITE_INT(4, mf->n_files);
337 	for (uint32_t i = 0; i < mf->n_files; i++) {
338 		WRITE_STR(mf->files[i]);
339 	}
340 
341 	WRITE_INT(4, mf->n_file_infos);
342 	for (uint32_t i = 0; i < mf->n_file_infos; i++) {
343 		WRITE_INT(4, mf->file_infos[i].index);
344 		WRITE_BYTES(mf->hash_size, mf->file_infos[i].hash);
345 		WRITE_INT(4, mf->file_infos[i].size);
346 		WRITE_INT(8, mf->file_infos[i].mtime);
347 		WRITE_INT(8, mf->file_infos[i].ctime);
348 	}
349 
350 	WRITE_INT(4, mf->n_objects);
351 	for (uint32_t i = 0; i < mf->n_objects; i++) {
352 		WRITE_INT(4, mf->objects[i].n_file_info_indexes);
353 		for (uint32_t j = 0; j < mf->objects[i].n_file_info_indexes; j++) {
354 			WRITE_INT(4, mf->objects[i].file_info_indexes[j]);
355 		}
356 		WRITE_BYTES(mf->hash_size, mf->objects[i].hash.hash);
357 		WRITE_INT(4, mf->objects[i].hash.size);
358 	}
359 
360 	return 1;
361 
362 error:
363 	cc_log("Error writing to manifest file");
364 	return 0;
365 }
366 
367 static int
verify_object(struct conf * conf,struct manifest * mf,struct object * obj,struct hashtable * stated_files,struct hashtable * hashed_files)368 verify_object(struct conf *conf, struct manifest *mf, struct object *obj,
369               struct hashtable *stated_files, struct hashtable *hashed_files)
370 {
371 	for (uint32_t i = 0; i < obj->n_file_info_indexes; i++) {
372 		struct file_info *fi = &mf->file_infos[obj->file_info_indexes[i]];
373 		char *path = mf->files[fi->index];
374 		struct file_stats *st = hashtable_search(stated_files, path);
375 		if (!st) {
376 			struct stat file_stat;
377 			if (x_stat(path, &file_stat) != 0) {
378 				return 0;
379 			}
380 			st = x_malloc(sizeof(*st));
381 			st->size = file_stat.st_size;
382 			st->mtime = file_stat.st_mtime;
383 			st->ctime = file_stat.st_ctime;
384 			hashtable_insert(stated_files, x_strdup(path), st);
385 		}
386 
387 		if (fi->size != st->size) {
388 			return 0;
389 		}
390 
391 		// Clang stores the mtime of the included files in the precompiled header,
392 		// and will error out if that header is later used without rebuilding.
393 		if ((guessed_compiler == GUESSED_CLANG
394 		     || guessed_compiler == GUESSED_UNKNOWN)
395 		    && output_is_precompiled_header
396 		    && fi->mtime != st->mtime) {
397 			cc_log("Precompiled header includes %s, which has a new mtime", path);
398 			return 0;
399 		}
400 
401 		if (conf->sloppiness & SLOPPY_FILE_STAT_MATCHES) {
402 			if (!(conf->sloppiness & SLOPPY_FILE_STAT_MATCHES_CTIME)) {
403 				if (fi->mtime == st->mtime && fi->ctime == st->ctime) {
404 					cc_log("mtime/ctime hit for %s", path);
405 					continue;
406 				} else {
407 					cc_log("mtime/ctime miss for %s", path);
408 				}
409 			} else {
410 				if (fi->mtime == st->mtime) {
411 					cc_log("mtime hit for %s", path);
412 					continue;
413 				} else {
414 					cc_log("mtime miss for %s", path);
415 				}
416 			}
417 		}
418 
419 		struct file_hash *actual = hashtable_search(hashed_files, path);
420 		if (!actual) {
421 			struct hash *hash = hash_init();
422 			int result = hash_source_code_file(conf, hash, path);
423 			if (result & HASH_SOURCE_CODE_ERROR) {
424 				cc_log("Failed hashing %s", path);
425 				hash_free(hash);
426 				return 0;
427 			}
428 			if (result & HASH_SOURCE_CODE_FOUND_TIME) {
429 				hash_free(hash);
430 				return 0;
431 			}
432 			actual = x_malloc(sizeof(*actual));
433 			hash_result_as_bytes(hash, actual->hash);
434 			actual->size = hash_input_size(hash);
435 			hashtable_insert(hashed_files, x_strdup(path), actual);
436 			hash_free(hash);
437 		}
438 		if (memcmp(fi->hash, actual->hash, mf->hash_size) != 0
439 		    || fi->size != actual->size) {
440 			return 0;
441 		}
442 	}
443 
444 	return 1;
445 }
446 
447 static struct hashtable *
create_string_index_map(char ** strings,uint32_t len)448 create_string_index_map(char **strings, uint32_t len)
449 {
450 	struct hashtable *h =
451 		create_hashtable(1000, hash_from_string, strings_equal);
452 	for (uint32_t i = 0; i < len; i++) {
453 		uint32_t *index = x_malloc(sizeof(*index));
454 		*index = i;
455 		hashtable_insert(h, x_strdup(strings[i]), index);
456 	}
457 	return h;
458 }
459 
460 static struct hashtable *
create_file_info_index_map(struct file_info * infos,uint32_t len)461 create_file_info_index_map(struct file_info *infos, uint32_t len)
462 {
463 	struct hashtable *h =
464 		create_hashtable(1000, hash_from_file_info, file_infos_equal);
465 	for (uint32_t i = 0; i < len; i++) {
466 		struct file_info *fi = x_malloc(sizeof(*fi));
467 		*fi = infos[i];
468 		uint32_t *index = x_malloc(sizeof(*index));
469 		*index = i;
470 		hashtable_insert(h, fi, index);
471 	}
472 	return h;
473 }
474 
475 static uint32_t
get_include_file_index(struct manifest * mf,char * path,struct hashtable * mf_files)476 get_include_file_index(struct manifest *mf, char *path,
477                        struct hashtable *mf_files)
478 {
479 	uint32_t *index = hashtable_search(mf_files, path);
480 	if (index) {
481 		return *index;
482 	}
483 
484 	uint32_t n = mf->n_files;
485 	mf->files = x_realloc(mf->files, (n + 1) * sizeof(*mf->files));
486 	mf->n_files++;
487 	mf->files[n] = x_strdup(path);
488 	return n;
489 }
490 
491 static uint32_t
get_file_hash_index(struct manifest * mf,char * path,struct file_hash * file_hash,struct hashtable * mf_files,struct hashtable * mf_file_infos,bool save_timestamp)492 get_file_hash_index(struct manifest *mf,
493                     char *path,
494                     struct file_hash *file_hash,
495                     struct hashtable *mf_files,
496                     struct hashtable *mf_file_infos,
497                     bool save_timestamp)
498 {
499 	struct file_info fi;
500 	fi.index = get_include_file_index(mf, path, mf_files);
501 	memcpy(fi.hash, file_hash->hash, sizeof(fi.hash));
502 	fi.size = file_hash->size;
503 
504 	// file_stat.st_{m,c}time has a resolution of 1 second, so we can cache the
505 	// file's mtime and ctime only if they're at least one second older than
506 	// time_of_compilation.
507 	//
508 	// st->ctime may be 0, so we have to check time_of_compilation against
509 	// MAX(mtime, ctime).
510 	//
511 	// ccache only reads mtime/ctime if file_stat_match sloppiness is enabled, so
512 	// mtimes/ctimes are stored as a dummy value (-1) if not enabled. This reduces
513 	// the number of file_info entries for the common case.
514 
515 	struct stat file_stat;
516 	if (save_timestamp && stat(path, &file_stat) != -1
517 	    && time_of_compilation > MAX(file_stat.st_mtime, file_stat.st_ctime)) {
518 		fi.mtime = file_stat.st_mtime;
519 		fi.ctime = file_stat.st_ctime;
520 	} else {
521 		fi.mtime = -1;
522 		fi.ctime = -1;
523 	}
524 
525 	uint32_t *fi_index = hashtable_search(mf_file_infos, &fi);
526 	if (fi_index) {
527 		return *fi_index;
528 	}
529 
530 	uint32_t n = mf->n_file_infos;
531 	mf->file_infos = x_realloc(mf->file_infos, (n + 1) * sizeof(*mf->file_infos));
532 	mf->n_file_infos++;
533 	mf->file_infos[n] = fi;
534 	return n;
535 }
536 
537 static void
add_file_info_indexes(uint32_t * indexes,uint32_t size,struct manifest * mf,struct hashtable * included_files,bool save_timestamp)538 add_file_info_indexes(uint32_t *indexes, uint32_t size,
539                       struct manifest *mf, struct hashtable *included_files,
540                       bool save_timestamp)
541 {
542 	if (size == 0) {
543 		return;
544 	}
545 
546 	// path --> index
547 	struct hashtable *mf_files =
548 		create_string_index_map(mf->files, mf->n_files);
549 	// struct file_info --> index
550 	struct hashtable *mf_file_infos =
551 		create_file_info_index_map(mf->file_infos, mf->n_file_infos);
552 	struct hashtable_itr *iter = hashtable_iterator(included_files);
553 	uint32_t i = 0;
554 	do {
555 		char *path = hashtable_iterator_key(iter);
556 		struct file_hash *file_hash = hashtable_iterator_value(iter);
557 		indexes[i] = get_file_hash_index(mf, path, file_hash, mf_files,
558 		                                 mf_file_infos, save_timestamp);
559 		i++;
560 	} while (hashtable_iterator_advance(iter));
561 	assert(i == size);
562 
563 	hashtable_destroy(mf_file_infos, 1);
564 	hashtable_destroy(mf_files, 1);
565 }
566 
567 static void
add_object_entry(struct manifest * mf,struct file_hash * object_hash,struct hashtable * included_files,bool save_timestamp)568 add_object_entry(struct manifest *mf,
569                  struct file_hash *object_hash,
570                  struct hashtable *included_files,
571                  bool save_timestamp)
572 {
573 	uint32_t n_objs = mf->n_objects;
574 	mf->objects = x_realloc(mf->objects, (n_objs + 1) * sizeof(*mf->objects));
575 	mf->n_objects++;
576 	struct object *obj = &mf->objects[n_objs];
577 
578 	uint32_t n_fii = hashtable_count(included_files);
579 	obj->n_file_info_indexes = n_fii;
580 	obj->file_info_indexes = x_malloc(n_fii * sizeof(*obj->file_info_indexes));
581 	add_file_info_indexes(obj->file_info_indexes, n_fii, mf, included_files,
582 	                      save_timestamp);
583 	memcpy(obj->hash.hash, object_hash->hash, mf->hash_size);
584 	obj->hash.size = object_hash->size;
585 }
586 
587 // Try to get the object hash from a manifest file. Caller frees. Returns NULL
588 // on failure.
589 struct file_hash *
manifest_get(struct conf * conf,const char * manifest_path)590 manifest_get(struct conf *conf, const char *manifest_path)
591 {
592 	gzFile f = NULL;
593 	struct manifest *mf = NULL;
594 	struct hashtable *hashed_files = NULL; // path --> struct file_hash
595 	struct hashtable *stated_files = NULL; // path --> struct file_stats
596 	struct file_hash *fh = NULL;
597 
598 	int fd = open(manifest_path, O_RDONLY | O_BINARY);
599 	if (fd == -1) {
600 		// Cache miss.
601 		cc_log("No such manifest file");
602 		goto out;
603 	}
604 	f = gzdopen(fd, "rb");
605 	if (!f) {
606 		close(fd);
607 		cc_log("Failed to gzdopen manifest file");
608 		goto out;
609 	}
610 
611 	char *errmsg;
612 	mf = read_manifest(f, &errmsg);
613 	if (!mf) {
614 		cc_log("%s", errmsg);
615 		goto out;
616 	}
617 
618 	hashed_files = create_hashtable(1000, hash_from_string, strings_equal);
619 	stated_files = create_hashtable(1000, hash_from_string, strings_equal);
620 
621 	// Check newest object first since it's a bit more likely to match.
622 	for (uint32_t i = mf->n_objects; i > 0; i--) {
623 		if (verify_object(conf, mf, &mf->objects[i - 1],
624 		                  stated_files, hashed_files)) {
625 			fh = x_malloc(sizeof(*fh));
626 			*fh = mf->objects[i - 1].hash;
627 			goto out;
628 		}
629 	}
630 
631 out:
632 	if (hashed_files) {
633 		hashtable_destroy(hashed_files, 1);
634 	}
635 	if (stated_files) {
636 		hashtable_destroy(stated_files, 1);
637 	}
638 	if (f) {
639 		gzclose(f);
640 	}
641 	if (mf) {
642 		free_manifest(mf);
643 	}
644 	return fh;
645 }
646 
647 // Put the object name into a manifest file given a set of included files.
648 // Returns true on success, otherwise false.
649 bool
manifest_put(const char * manifest_path,struct file_hash * object_hash,struct hashtable * included_files,bool save_timestamp)650 manifest_put(const char *manifest_path, struct file_hash *object_hash,
651              struct hashtable *included_files, bool save_timestamp)
652 {
653 	int ret = 0;
654 	gzFile f2 = NULL;
655 	struct manifest *mf = NULL;
656 	char *tmp_file = NULL;
657 
658 	// We don't bother to acquire a lock when writing the manifest to disk. A
659 	// race between two processes will only result in one lost entry, which is
660 	// not a big deal, and it's also very unlikely.
661 
662 	int fd1 = open(manifest_path, O_RDONLY | O_BINARY);
663 	if (fd1 == -1) {
664 		// New file.
665 		mf = create_empty_manifest();
666 	} else {
667 		gzFile f1 = gzdopen(fd1, "rb");
668 		if (!f1) {
669 			cc_log("Failed to gzdopen manifest file");
670 			close(fd1);
671 			goto out;
672 		}
673 		char *errmsg;
674 		mf = read_manifest(f1, &errmsg);
675 		gzclose(f1);
676 		if (!mf) {
677 			cc_log("%s", errmsg);
678 			free(errmsg);
679 			cc_log("Failed to read manifest file; deleting it");
680 			x_unlink(manifest_path);
681 			mf = create_empty_manifest();
682 		}
683 	}
684 
685 	if (mf->n_objects > MAX_MANIFEST_ENTRIES) {
686 		// Normally, there shouldn't be many object entries in the manifest since
687 		// new entries are added only if an include file has changed but not the
688 		// source file, and you typically change source files more often than
689 		// header files. However, it's certainly possible to imagine cases where
690 		// the manifest will grow large (for instance, a generated header file that
691 		// changes for every build), and this must be taken care of since
692 		// processing an ever growing manifest eventually will take too much time.
693 		// A good way of solving this would be to maintain the object entries in
694 		// LRU order and discarding the old ones. An easy way is to throw away all
695 		// entries when there are too many. Let's do that for now.
696 		cc_log("More than %u entries in manifest file; discarding",
697 		       MAX_MANIFEST_ENTRIES);
698 		free_manifest(mf);
699 		mf = create_empty_manifest();
700 	} else if (mf->n_file_infos > MAX_MANIFEST_FILE_INFO_ENTRIES) {
701 		// Rarely, file_info entries can grow large in pathological cases where
702 		// many included files change, but the main file does not. This also puts
703 		// an upper bound on the number of file_info entries.
704 		cc_log("More than %u file_info entries in manifest file; discarding",
705 		       MAX_MANIFEST_FILE_INFO_ENTRIES);
706 		free_manifest(mf);
707 		mf = create_empty_manifest();
708 	}
709 
710 	tmp_file = format("%s.tmp", manifest_path);
711 	int fd2 = create_tmp_fd(&tmp_file);
712 	f2 = gzdopen(fd2, "wb");
713 	if (!f2) {
714 		cc_log("Failed to gzdopen %s", tmp_file);
715 		goto out;
716 	}
717 
718 	add_object_entry(mf, object_hash, included_files, save_timestamp);
719 	if (write_manifest(f2, mf)) {
720 		gzclose(f2);
721 		f2 = NULL;
722 		if (x_rename(tmp_file, manifest_path) == 0) {
723 			ret = 1;
724 		} else {
725 			cc_log("Failed to rename %s to %s", tmp_file, manifest_path);
726 			goto out;
727 		}
728 	} else {
729 		cc_log("Failed to write manifest file");
730 		goto out;
731 	}
732 
733 out:
734 	if (mf) {
735 		free_manifest(mf);
736 	}
737 	if (tmp_file) {
738 		free(tmp_file);
739 	}
740 	if (f2) {
741 		gzclose(f2);
742 	}
743 	return ret;
744 }
745 
746 bool
manifest_dump(const char * manifest_path,FILE * stream)747 manifest_dump(const char *manifest_path, FILE *stream)
748 {
749 	struct manifest *mf = NULL;
750 	gzFile f = NULL;
751 	bool ret = false;
752 
753 	int fd = open(manifest_path, O_RDONLY | O_BINARY);
754 	if (fd == -1) {
755 		fprintf(stderr, "No such manifest file: %s\n", manifest_path);
756 		goto out;
757 	}
758 	f = gzdopen(fd, "rb");
759 	if (!f) {
760 		fprintf(stderr, "Failed to dzopen manifest file\n");
761 		close(fd);
762 		goto out;
763 	}
764 	char *errmsg;
765 	mf = read_manifest(f, &errmsg);
766 	if (!mf) {
767 		fprintf(stderr, "%s\n", errmsg);
768 		free(errmsg);
769 		goto out;
770 	}
771 
772 	fprintf(stream, "Magic: %c%c%c%c\n",
773 	        (MAGIC >> 24) & 0xFF,
774 	        (MAGIC >> 16) & 0xFF,
775 	        (MAGIC >> 8) & 0xFF,
776 	        MAGIC & 0xFF);
777 	fprintf(stream, "Version: %u\n", mf->version);
778 	fprintf(stream, "Hash size: %u\n", (unsigned)mf->hash_size);
779 	fprintf(stream, "Reserved field: %u\n", (unsigned)mf->reserved);
780 	fprintf(stream, "File paths (%u):\n", (unsigned)mf->n_files);
781 	for (unsigned i = 0; i < mf->n_files; ++i) {
782 		fprintf(stream, "  %u: %s\n", i, mf->files[i]);
783 	}
784 	fprintf(stream, "File infos (%u):\n", (unsigned)mf->n_file_infos);
785 	for (unsigned i = 0; i < mf->n_file_infos; ++i) {
786 		char *hash;
787 		fprintf(stream, "  %u:\n", i);
788 		fprintf(stream, "    Path index: %u\n", mf->file_infos[i].index);
789 		hash = format_hash_as_string(mf->file_infos[i].hash, -1);
790 		fprintf(stream, "    Hash: %s\n", hash);
791 		free(hash);
792 		fprintf(stream, "    Size: %u\n", mf->file_infos[i].size);
793 		fprintf(stream, "    Mtime: %lld\n", (long long)mf->file_infos[i].mtime);
794 		fprintf(stream, "    Ctime: %lld\n", (long long)mf->file_infos[i].ctime);
795 	}
796 	fprintf(stream, "Results (%u):\n", (unsigned)mf->n_objects);
797 	for (unsigned i = 0; i < mf->n_objects; ++i) {
798 		char *hash;
799 		fprintf(stream, "  %u:\n", i);
800 		fprintf(stream, "    File info indexes:");
801 		for (unsigned j = 0; j < mf->objects[i].n_file_info_indexes; ++j) {
802 			fprintf(stream, " %u", mf->objects[i].file_info_indexes[j]);
803 		}
804 		fprintf(stream, "\n");
805 		hash = format_hash_as_string(mf->objects[i].hash.hash, -1);
806 		fprintf(stream, "    Hash: %s\n", hash);
807 		free(hash);
808 		fprintf(stream, "    Size: %u\n", (unsigned)mf->objects[i].hash.size);
809 	}
810 
811 	ret = true;
812 
813 out:
814 	if (mf) {
815 		free_manifest(mf);
816 	}
817 	if (f) {
818 		gzclose(f);
819 	}
820 	return ret;
821 }
822