1 /*
2  * blob_table.c
3  *
4  * A blob table maps SHA-1 message digests to "blobs", which are nonempty
5  * sequences of binary data.  Within a WIM file, blobs are single-instanced.
6  *
7  * This file also contains code to read and write the corresponding on-disk
8  * representation of this table in the WIM file format.
9  */
10 
11 /*
12  * Copyright (C) 2012-2016 Eric Biggers
13  *
14  * This file is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option) any
17  * later version.
18  *
19  * This file is distributed in the hope that it will be useful, but WITHOUT
20  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with this file; if not, see http://www.gnu.org/licenses/.
26  */
27 
28 #ifdef HAVE_CONFIG_H
29 #  include "config.h"
30 #endif
31 
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h> /* for unlink()  */
35 
36 #include "wimlib/assert.h"
37 #include "wimlib/bitops.h"
38 #include "wimlib/blob_table.h"
39 #include "wimlib/encoding.h"
40 #include "wimlib/endianness.h"
41 #include "wimlib/error.h"
42 #include "wimlib/metadata.h"
43 #include "wimlib/ntfs_3g.h"
44 #include "wimlib/resource.h"
45 #include "wimlib/unaligned.h"
46 #include "wimlib/util.h"
47 #include "wimlib/win32.h"
48 #include "wimlib/write.h"
49 
50 /* A hash table mapping SHA-1 message digests to blob descriptors  */
51 struct blob_table {
52 	struct hlist_head *array;
53 	size_t num_blobs;
54 	size_t mask; /* capacity - 1; capacity is a power of 2  */
55 };
56 
57 struct blob_table *
new_blob_table(size_t capacity)58 new_blob_table(size_t capacity)
59 {
60 	struct blob_table *table;
61 	struct hlist_head *array;
62 
63 	capacity = roundup_pow_of_2(capacity);
64 
65 	table = MALLOC(sizeof(struct blob_table));
66 	if (table == NULL)
67 		goto oom;
68 
69 	array = CALLOC(capacity, sizeof(array[0]));
70 	if (array == NULL) {
71 		FREE(table);
72 		goto oom;
73 	}
74 
75 	table->num_blobs = 0;
76 	table->mask = capacity - 1;
77 	table->array = array;
78 	return table;
79 
80 oom:
81 	ERROR("Failed to allocate memory for blob table "
82 	      "with capacity %zu", capacity);
83 	return NULL;
84 }
85 
86 static int
do_free_blob_descriptor(struct blob_descriptor * blob,void * _ignore)87 do_free_blob_descriptor(struct blob_descriptor *blob, void *_ignore)
88 {
89 	free_blob_descriptor(blob);
90 	return 0;
91 }
92 
93 void
free_blob_table(struct blob_table * table)94 free_blob_table(struct blob_table *table)
95 {
96 	if (table) {
97 		for_blob_in_table(table, do_free_blob_descriptor, NULL);
98 		FREE(table->array);
99 		FREE(table);
100 	}
101 }
102 
103 struct blob_descriptor *
new_blob_descriptor(void)104 new_blob_descriptor(void)
105 {
106 	STATIC_ASSERT(BLOB_NONEXISTENT == 0);
107 	return CALLOC(1, sizeof(struct blob_descriptor));
108 }
109 
110 struct blob_descriptor *
clone_blob_descriptor(const struct blob_descriptor * old)111 clone_blob_descriptor(const struct blob_descriptor *old)
112 {
113 	struct blob_descriptor *new;
114 
115 	new = memdup(old, sizeof(struct blob_descriptor));
116 	if (new == NULL)
117 		return NULL;
118 
119 	switch (new->blob_location) {
120 	case BLOB_IN_WIM:
121 		list_add(&new->rdesc_node, &new->rdesc->blob_list);
122 		break;
123 
124 	case BLOB_IN_FILE_ON_DISK:
125 #ifdef WITH_FUSE
126 	case BLOB_IN_STAGING_FILE:
127 		STATIC_ASSERT((void*)&old->file_on_disk ==
128 			      (void*)&old->staging_file_name);
129 #endif
130 		new->file_on_disk = TSTRDUP(old->file_on_disk);
131 		if (new->file_on_disk == NULL)
132 			goto out_free;
133 		break;
134 #ifdef __WIN32__
135 	case BLOB_IN_WINDOWS_FILE:
136 		new->windows_file = clone_windows_file(old->windows_file);
137 		break;
138 #endif
139 	case BLOB_IN_ATTACHED_BUFFER:
140 		new->attached_buffer = memdup(old->attached_buffer, old->size);
141 		if (new->attached_buffer == NULL)
142 			goto out_free;
143 		break;
144 #ifdef WITH_NTFS_3G
145 	case BLOB_IN_NTFS_VOLUME:
146 		new->ntfs_loc = clone_ntfs_location(old->ntfs_loc);
147 		if (!new->ntfs_loc)
148 			goto out_free;
149 		break;
150 #endif
151 	}
152 	return new;
153 
154 out_free:
155 	free_blob_descriptor(new);
156 	return NULL;
157 }
158 
159 /* Release a blob descriptor from its location, if any, and set its new location
160  * to BLOB_NONEXISTENT.  */
161 void
blob_release_location(struct blob_descriptor * blob)162 blob_release_location(struct blob_descriptor *blob)
163 {
164 	switch (blob->blob_location) {
165 	case BLOB_IN_WIM: {
166 		struct wim_resource_descriptor *rdesc = blob->rdesc;
167 
168 		list_del(&blob->rdesc_node);
169 		if (list_empty(&rdesc->blob_list)) {
170 			wim_decrement_refcnt(rdesc->wim);
171 			FREE(rdesc);
172 		}
173 		break;
174 	}
175 	case BLOB_IN_FILE_ON_DISK:
176 #ifdef WITH_FUSE
177 	case BLOB_IN_STAGING_FILE:
178 		STATIC_ASSERT((void*)&blob->file_on_disk ==
179 			      (void*)&blob->staging_file_name);
180 #endif
181 	case BLOB_IN_ATTACHED_BUFFER:
182 		STATIC_ASSERT((void*)&blob->file_on_disk ==
183 			      (void*)&blob->attached_buffer);
184 		FREE(blob->file_on_disk);
185 		break;
186 #ifdef __WIN32__
187 	case BLOB_IN_WINDOWS_FILE:
188 		free_windows_file(blob->windows_file);
189 		break;
190 #endif
191 #ifdef WITH_NTFS_3G
192 	case BLOB_IN_NTFS_VOLUME:
193 		free_ntfs_location(blob->ntfs_loc);
194 		break;
195 #endif
196 	}
197 	blob->blob_location = BLOB_NONEXISTENT;
198 }
199 
200 void
free_blob_descriptor(struct blob_descriptor * blob)201 free_blob_descriptor(struct blob_descriptor *blob)
202 {
203 	if (blob) {
204 		blob_release_location(blob);
205 		FREE(blob);
206 	}
207 }
208 
209 /* Should this blob be retained even if it has no references?  */
210 static bool
should_retain_blob(const struct blob_descriptor * blob)211 should_retain_blob(const struct blob_descriptor *blob)
212 {
213 	return blob->blob_location == BLOB_IN_WIM;
214 }
215 
216 static void
finalize_blob(struct blob_descriptor * blob)217 finalize_blob(struct blob_descriptor *blob)
218 {
219 	if (!should_retain_blob(blob))
220 		free_blob_descriptor(blob);
221 }
222 
223 /*
224  * Decrements the reference count of the specified blob, which must be either
225  * (a) unhashed, or (b) inserted in the specified blob table.
226  *
227  * If the blob's reference count reaches 0, we may unlink it from @table and
228  * free it.  However, we retain blobs with 0 reference count that originated
229  * from WIM files (BLOB_IN_WIM).  We do this for two reasons:
230  *
231  * 1. This prevents information about valid blobs in a WIM file --- blobs which
232  *    will continue to be present after appending to the WIM file --- from being
233  *    lost merely because we dropped all references to them.
234  *
235  * 2. Blob reference counts we read from WIM files can't be trusted.  It's
236  *    possible that a WIM has reference counts that are too low; WIMGAPI
237  *    sometimes creates WIMs where this is the case.  It's also possible that
238  *    blobs have been referenced from an external WIM; those blobs can
239  *    potentially have any reference count at all, either lower or higher than
240  *    would be expected for this WIM ("this WIM" meaning the owner of @table) if
241  *    it were a standalone WIM.
242  *
243  * So we can't take the reference counts too seriously.  But at least, we do
244  * recalculate by default when writing a new WIM file.
245  */
246 void
blob_decrement_refcnt(struct blob_descriptor * blob,struct blob_table * table)247 blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table)
248 {
249 	blob_subtract_refcnt(blob, table, 1);
250 }
251 
252 void
blob_subtract_refcnt(struct blob_descriptor * blob,struct blob_table * table,u32 count)253 blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table,
254 		     u32 count)
255 {
256 	if (unlikely(blob->refcnt < count)) {
257 		blob->refcnt = 0; /* See comment above  */
258 		return;
259 	}
260 
261 	blob->refcnt -= count;
262 
263 	if (blob->refcnt != 0)
264 		return;
265 
266 	if (blob->unhashed) {
267 		list_del(&blob->unhashed_list);
268 	#ifdef WITH_FUSE
269 		/* If the blob has been extracted to a staging file for a FUSE
270 		 * mount, unlink the staging file.  (Note that there still may
271 		 * be open file descriptors to it.)  */
272 		if (blob->blob_location == BLOB_IN_STAGING_FILE)
273 			unlinkat(blob->staging_dir_fd,
274 				 blob->staging_file_name, 0);
275 	#endif
276 	} else {
277 		if (!should_retain_blob(blob))
278 			blob_table_unlink(table, blob);
279 	}
280 
281 	/* If FUSE mounts are enabled, then don't actually free the blob
282 	 * descriptor until the last file descriptor to it has been closed.  */
283 #ifdef WITH_FUSE
284 	if (blob->num_opened_fds == 0)
285 #endif
286 		finalize_blob(blob);
287 }
288 
289 #ifdef WITH_FUSE
290 void
blob_decrement_num_opened_fds(struct blob_descriptor * blob)291 blob_decrement_num_opened_fds(struct blob_descriptor *blob)
292 {
293 	wimlib_assert(blob->num_opened_fds != 0);
294 
295 	if (--blob->num_opened_fds == 0 && blob->refcnt == 0)
296 		finalize_blob(blob);
297 }
298 #endif
299 
300 static void
blob_table_insert_raw(struct blob_table * table,struct blob_descriptor * blob)301 blob_table_insert_raw(struct blob_table *table, struct blob_descriptor *blob)
302 {
303 	size_t i = blob->hash_short & table->mask;
304 
305 	hlist_add_head(&blob->hash_list, &table->array[i]);
306 }
307 
308 static void
enlarge_blob_table(struct blob_table * table)309 enlarge_blob_table(struct blob_table *table)
310 {
311 	size_t old_capacity, new_capacity;
312 	struct hlist_head *old_array, *new_array;
313 	struct blob_descriptor *blob;
314 	struct hlist_node *tmp;
315 	size_t i;
316 
317 	old_capacity = table->mask + 1;
318 	new_capacity = old_capacity * 2;
319 	new_array = CALLOC(new_capacity, sizeof(struct hlist_head));
320 	if (new_array == NULL)
321 		return;
322 	old_array = table->array;
323 	table->array = new_array;
324 	table->mask = new_capacity - 1;
325 
326 	for (i = 0; i < old_capacity; i++)
327 		hlist_for_each_entry_safe(blob, tmp, &old_array[i], hash_list)
328 			blob_table_insert_raw(table, blob);
329 	FREE(old_array);
330 }
331 
332 /* Insert a blob descriptor into the blob table.  */
333 void
blob_table_insert(struct blob_table * table,struct blob_descriptor * blob)334 blob_table_insert(struct blob_table *table, struct blob_descriptor *blob)
335 {
336 	blob_table_insert_raw(table, blob);
337 	if (table->num_blobs++ > table->mask)
338 		enlarge_blob_table(table);
339 }
340 
341 /* Unlinks a blob descriptor from the blob table; does not free it.  */
342 void
blob_table_unlink(struct blob_table * table,struct blob_descriptor * blob)343 blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob)
344 {
345 	wimlib_assert(!blob->unhashed);
346 	wimlib_assert(table->num_blobs != 0);
347 
348 	hlist_del(&blob->hash_list);
349 	table->num_blobs--;
350 }
351 
352 /* Given a SHA-1 message digest, return the corresponding blob descriptor from
353  * the specified blob table, or NULL if there is none.  */
354 struct blob_descriptor *
lookup_blob(const struct blob_table * table,const u8 * hash)355 lookup_blob(const struct blob_table *table, const u8 *hash)
356 {
357 	size_t i;
358 	struct blob_descriptor *blob;
359 
360 	i = load_size_t_unaligned(hash) & table->mask;
361 	hlist_for_each_entry(blob, &table->array[i], hash_list)
362 		if (hashes_equal(hash, blob->hash))
363 			return blob;
364 	return NULL;
365 }
366 
367 /* Call a function on all blob descriptors in the specified blob table.  Stop
368  * early and return nonzero if any call to the function returns nonzero.  */
369 int
for_blob_in_table(struct blob_table * table,int (* visitor)(struct blob_descriptor *,void *),void * arg)370 for_blob_in_table(struct blob_table *table,
371 		  int (*visitor)(struct blob_descriptor *, void *), void *arg)
372 {
373 	struct blob_descriptor *blob;
374 	struct hlist_node *tmp;
375 	int ret;
376 
377 	for (size_t i = 0; i <= table->mask; i++) {
378 		hlist_for_each_entry_safe(blob, tmp, &table->array[i],
379 					  hash_list)
380 		{
381 			ret = visitor(blob, arg);
382 			if (ret)
383 				return ret;
384 		}
385 	}
386 	return 0;
387 }
388 
389 /*
390  * This is a qsort() callback that sorts blobs into an order optimized for
391  * reading.  Sorting is done primarily by blob location, then secondarily by a
392  * location-dependent order.  For example, blobs in WIM resources are sorted
393  * such that the underlying WIM files will be read sequentially.  This is
394  * especially important for WIM files containing solid resources.
395  */
396 int
cmp_blobs_by_sequential_order(const void * p1,const void * p2)397 cmp_blobs_by_sequential_order(const void *p1, const void *p2)
398 {
399 	const struct blob_descriptor *blob1, *blob2;
400 	int v;
401 	WIMStruct *wim1, *wim2;
402 
403 	blob1 = *(const struct blob_descriptor**)p1;
404 	blob2 = *(const struct blob_descriptor**)p2;
405 
406 	v = (int)blob1->blob_location - (int)blob2->blob_location;
407 
408 	/* Different locations?  Note: "unsafe compaction mode" requires that
409 	 * blobs in WIMs sort before all others.  For the logic here to ensure
410 	 * this, BLOB_IN_WIM must have the lowest value among all defined
411 	 * blob_locations.  Statically verify that the enum values haven't
412 	 * changed.  */
413 	STATIC_ASSERT(BLOB_NONEXISTENT == 0 && BLOB_IN_WIM == 1);
414 	if (v)
415 		return v;
416 
417 	switch (blob1->blob_location) {
418 	case BLOB_IN_WIM:
419 		wim1 = blob1->rdesc->wim;
420 		wim2 = blob2->rdesc->wim;
421 
422 		/* Different WIM files?  */
423 		if (wim1 != wim2) {
424 
425 			/* Resources from the WIM file currently being compacted
426 			 * (if any) must always sort first.  */
427 			v = (int)wim2->being_compacted - (int)wim1->being_compacted;
428 			if (v)
429 				return v;
430 
431 			/* Different split WIMs?  */
432 			v = cmp_guids(wim1->hdr.guid, wim2->hdr.guid);
433 			if (v)
434 				return v;
435 
436 			/* Different part numbers in the same split WIM?  */
437 			v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number;
438 			if (v)
439 				return v;
440 
441 			/* Probably two WIMStructs for the same on-disk file.
442 			 * Just sort by pointer.  */
443 			return wim1 < wim2 ? -1 : 1;
444 		}
445 
446 		/* Same WIM file  */
447 
448 		/* Sort by increasing resource offset  */
449 		if (blob1->rdesc->offset_in_wim != blob2->rdesc->offset_in_wim)
450 			return cmp_u64(blob1->rdesc->offset_in_wim,
451 				       blob2->rdesc->offset_in_wim);
452 
453 		/* The blobs are in the same solid resource.  Sort by increasing
454 		 * offset in the resource.  */
455 		return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
456 
457 	case BLOB_IN_FILE_ON_DISK:
458 #ifdef WITH_FUSE
459 	case BLOB_IN_STAGING_FILE:
460 #endif
461 		/* Compare files by path: just a heuristic that will place files
462 		 * in the same directory next to each other.  */
463 		return tstrcmp(blob1->file_on_disk, blob2->file_on_disk);
464 #ifdef __WIN32__
465 	case BLOB_IN_WINDOWS_FILE:
466 		return cmp_windows_files(blob1->windows_file, blob2->windows_file);
467 #endif
468 #ifdef WITH_NTFS_3G
469 	case BLOB_IN_NTFS_VOLUME:
470 		return cmp_ntfs_locations(blob1->ntfs_loc, blob2->ntfs_loc);
471 #endif
472 	default:
473 		/* No additional sorting order defined for this resource
474 		 * location (e.g. BLOB_IN_ATTACHED_BUFFER); simply compare
475 		 * everything equal to each other.  */
476 		return 0;
477 	}
478 }
479 
480 int
sort_blob_list(struct list_head * blob_list,size_t list_head_offset,int (* compar)(const void *,const void *))481 sort_blob_list(struct list_head *blob_list, size_t list_head_offset,
482 	       int (*compar)(const void *, const void*))
483 {
484 	struct list_head *cur;
485 	struct blob_descriptor **array;
486 	size_t i;
487 	size_t array_size;
488 	size_t num_blobs = 0;
489 
490 	list_for_each(cur, blob_list)
491 		num_blobs++;
492 
493 	if (num_blobs <= 1)
494 		return 0;
495 
496 	array_size = num_blobs * sizeof(array[0]);
497 	array = MALLOC(array_size);
498 	if (array == NULL)
499 		return WIMLIB_ERR_NOMEM;
500 
501 	cur = blob_list->next;
502 	for (i = 0; i < num_blobs; i++) {
503 		array[i] = (struct blob_descriptor*)((u8*)cur - list_head_offset);
504 		cur = cur->next;
505 	}
506 
507 	qsort(array, num_blobs, sizeof(array[0]), compar);
508 
509 	INIT_LIST_HEAD(blob_list);
510 	for (i = 0; i < num_blobs; i++) {
511 		list_add_tail((struct list_head*)
512 			       ((u8*)array[i] + list_head_offset), blob_list);
513 	}
514 	FREE(array);
515 	return 0;
516 }
517 
518 /* Sort the specified list of blobs in an order optimized for sequential
519  * reading.  */
520 int
sort_blob_list_by_sequential_order(struct list_head * blob_list,size_t list_head_offset)521 sort_blob_list_by_sequential_order(struct list_head *blob_list,
522 				   size_t list_head_offset)
523 {
524 	return sort_blob_list(blob_list, list_head_offset,
525 			      cmp_blobs_by_sequential_order);
526 }
527 
528 static int
add_blob_to_array(struct blob_descriptor * blob,void * _pp)529 add_blob_to_array(struct blob_descriptor *blob, void *_pp)
530 {
531 	struct blob_descriptor ***pp = _pp;
532 	*(*pp)++ = blob;
533 	return 0;
534 }
535 
536 /* Iterate through the blob descriptors in the specified blob table in an order
537  * optimized for sequential reading.  */
538 int
for_blob_in_table_sorted_by_sequential_order(struct blob_table * table,int (* visitor)(struct blob_descriptor *,void *),void * arg)539 for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
540 					     int (*visitor)(struct blob_descriptor *, void *),
541 					     void *arg)
542 {
543 	struct blob_descriptor **blob_array, **p;
544 	size_t num_blobs = table->num_blobs;
545 	int ret;
546 
547 	blob_array = MALLOC(num_blobs * sizeof(blob_array[0]));
548 	if (!blob_array)
549 		return WIMLIB_ERR_NOMEM;
550 	p = blob_array;
551 	for_blob_in_table(table, add_blob_to_array, &p);
552 
553 	wimlib_assert(p == blob_array + num_blobs);
554 
555 	qsort(blob_array, num_blobs, sizeof(blob_array[0]),
556 	      cmp_blobs_by_sequential_order);
557 	ret = 0;
558 	for (size_t i = 0; i < num_blobs; i++) {
559 		ret = visitor(blob_array[i], arg);
560 		if (ret)
561 			break;
562 	}
563 	FREE(blob_array);
564 	return ret;
565 }
566 
567 /* On-disk format of a blob descriptor in a WIM file.
568  *
569  * Note: if the WIM file contains solid resource(s), then this structure is
570  * sometimes overloaded to describe a "resource" rather than a "blob".  See the
571  * code for details.  */
572 struct blob_descriptor_disk {
573 
574 	/* Size, offset, and flags of the blob.  */
575 	struct wim_reshdr_disk reshdr;
576 
577 	/* Which part of the split WIM this blob is in; indexed from 1. */
578 	le16 part_number;
579 
580 	/* Reference count of this blob over all WIM images.  (But see comment
581 	 * above blob_decrement_refcnt().)  */
582 	le32 refcnt;
583 
584 	/* SHA-1 message digest of the uncompressed data of this blob, or all
585 	 * zeroes if this blob is of zero length.  */
586 	u8 hash[SHA1_HASH_SIZE];
587 } _packed_attribute;
588 
589 /* Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
590  * count how many specify resources (as opposed to blobs within those
591  * resources).
592  *
593  * Returns the resulting count.  */
594 static size_t
count_solid_resources(const struct blob_descriptor_disk * entries,size_t max)595 count_solid_resources(const struct blob_descriptor_disk *entries, size_t max)
596 {
597 	size_t count = 0;
598 	do {
599 		struct wim_reshdr reshdr;
600 
601 		get_wim_reshdr(&(entries++)->reshdr, &reshdr);
602 
603 		if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) {
604 			/* Run was terminated by a stand-alone blob entry.  */
605 			break;
606 		}
607 
608 		if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
609 			/* This is a resource entry.  */
610 			count++;
611 		}
612 	} while (--max);
613 	return count;
614 }
615 
616 /*
617  * Given a run of consecutive blob descriptors with the SOLID flag set and
618  * having @num_rdescs resource entries, load resource information from them into
619  * the resource descriptors in the @rdescs array.
620  *
621  * Returns 0 on success, or a nonzero error code on failure.
622  */
623 static int
do_load_solid_info(WIMStruct * wim,struct wim_resource_descriptor ** rdescs,size_t num_rdescs,const struct blob_descriptor_disk * entries)624 do_load_solid_info(WIMStruct *wim, struct wim_resource_descriptor **rdescs,
625 		   size_t num_rdescs,
626 		   const struct blob_descriptor_disk *entries)
627 {
628 	for (size_t i = 0; i < num_rdescs; i++) {
629 		struct wim_reshdr reshdr;
630 		struct alt_chunk_table_header_disk hdr;
631 		struct wim_resource_descriptor *rdesc;
632 		int ret;
633 
634 		/* Advance to next resource entry.  */
635 
636 		do {
637 			get_wim_reshdr(&(entries++)->reshdr, &reshdr);
638 		} while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER);
639 
640 		rdesc = rdescs[i];
641 
642 		wim_reshdr_to_desc(&reshdr, wim, rdesc);
643 
644 		/* For solid resources, the uncompressed size, compression type,
645 		 * and chunk size are stored in the resource itself, not in the
646 		 * blob table.  */
647 
648 		ret = full_pread(&wim->in_fd, &hdr,
649 				 sizeof(hdr), reshdr.offset_in_wim);
650 		if (ret) {
651 			ERROR("Failed to read header of solid resource "
652 			      "(offset_in_wim=%"PRIu64")",
653 			      reshdr.offset_in_wim);
654 			return ret;
655 		}
656 
657 		rdesc->uncompressed_size = le64_to_cpu(hdr.res_usize);
658 
659 		/* Compression format numbers must be the same as in
660 		 * WIMGAPI to be compatible here.  */
661 		STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_NONE == 0);
662 		STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_XPRESS == 1);
663 		STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_LZX == 2);
664 		STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_LZMS == 3);
665 		rdesc->compression_type = le32_to_cpu(hdr.compression_format);
666 		rdesc->chunk_size = le32_to_cpu(hdr.chunk_size);
667 	}
668 	return 0;
669 }
670 
671 /*
672  * Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
673  * allocate a 'struct wim_resource_descriptor' for each resource within that
674  * run.
675  *
676  * Returns 0 on success, or a nonzero error code on failure.
677  * Returns the pointers and count in *rdescs_ret and *num_rdescs_ret.
678  */
679 static int
load_solid_info(WIMStruct * wim,const struct blob_descriptor_disk * entries,size_t num_remaining_entries,struct wim_resource_descriptor *** rdescs_ret,size_t * num_rdescs_ret)680 load_solid_info(WIMStruct *wim,
681 		const struct blob_descriptor_disk *entries,
682 		size_t num_remaining_entries,
683 		struct wim_resource_descriptor ***rdescs_ret,
684 		size_t *num_rdescs_ret)
685 {
686 	size_t num_rdescs;
687 	struct wim_resource_descriptor **rdescs;
688 	size_t i;
689 	int ret;
690 
691 	num_rdescs = count_solid_resources(entries, num_remaining_entries);
692 	rdescs = CALLOC(num_rdescs, sizeof(rdescs[0]));
693 	if (!rdescs)
694 		return WIMLIB_ERR_NOMEM;
695 
696 	for (i = 0; i < num_rdescs; i++) {
697 		rdescs[i] = MALLOC(sizeof(struct wim_resource_descriptor));
698 		if (!rdescs[i]) {
699 			ret = WIMLIB_ERR_NOMEM;
700 			goto out_free_rdescs;
701 		}
702 	}
703 
704 	ret = do_load_solid_info(wim, rdescs, num_rdescs, entries);
705 	if (ret)
706 		goto out_free_rdescs;
707 
708 	wim->refcnt += num_rdescs;
709 
710 	*rdescs_ret = rdescs;
711 	*num_rdescs_ret = num_rdescs;
712 	return 0;
713 
714 out_free_rdescs:
715 	for (i = 0; i < num_rdescs; i++)
716 		FREE(rdescs[i]);
717 	FREE(rdescs);
718 	return ret;
719 }
720 
721 /* Given a 'struct blob_descriptor' allocated for an on-disk blob descriptor
722  * with the SOLID flag set, try to assign it to resource in the current solid
723  * run.  */
724 static int
assign_blob_to_solid_resource(const struct wim_reshdr * reshdr,struct blob_descriptor * blob,struct wim_resource_descriptor ** rdescs,size_t num_rdescs)725 assign_blob_to_solid_resource(const struct wim_reshdr *reshdr,
726 			      struct blob_descriptor *blob,
727 			      struct wim_resource_descriptor **rdescs,
728 			      size_t num_rdescs)
729 {
730 	u64 offset = reshdr->offset_in_wim;
731 
732 	/* XXX: This linear search will be slow in the degenerate case where the
733 	 * number of solid resources in the run is huge.  */
734 	blob->size = reshdr->size_in_wim;
735 	for (size_t i = 0; i < num_rdescs; i++) {
736 		if (offset + blob->size <= rdescs[i]->uncompressed_size) {
737 			blob_set_is_located_in_wim_resource(blob, rdescs[i], offset);
738 			return 0;
739 		}
740 		offset -= rdescs[i]->uncompressed_size;
741 	}
742 	ERROR("blob could not be assigned to a solid resource");
743 	return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
744 }
745 
746 static void
free_solid_rdescs(struct wim_resource_descriptor ** rdescs,size_t num_rdescs)747 free_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
748 {
749 	if (rdescs) {
750 		for (size_t i = 0; i < num_rdescs; i++) {
751 			if (list_empty(&rdescs[i]->blob_list)) {
752 				rdescs[i]->wim->refcnt--;
753 				FREE(rdescs[i]);
754 			}
755 		}
756 		FREE(rdescs);
757 	}
758 }
759 
760 static int
cmp_blobs_by_offset_in_res(const void * p1,const void * p2)761 cmp_blobs_by_offset_in_res(const void *p1, const void *p2)
762 {
763 	const struct blob_descriptor *blob1, *blob2;
764 
765 	blob1 = *(const struct blob_descriptor**)p1;
766 	blob2 = *(const struct blob_descriptor**)p2;
767 
768 	return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
769 }
770 
771 /* Validate the size and location of a WIM resource.  */
772 static int
validate_resource(struct wim_resource_descriptor * rdesc)773 validate_resource(struct wim_resource_descriptor *rdesc)
774 {
775 	struct blob_descriptor *blob;
776 	bool out_of_order;
777 	u64 expected_next_offset;
778 	int ret;
779 
780 	/* Verify that the resource itself has a valid offset and size.  */
781 	if (rdesc->offset_in_wim + rdesc->size_in_wim < rdesc->size_in_wim)
782 		goto invalid_due_to_overflow;
783 
784 	/* Verify that each blob in the resource has a valid offset and size.
785 	 */
786 	expected_next_offset = 0;
787 	out_of_order = false;
788 	list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
789 		if (blob->offset_in_res + blob->size < blob->size ||
790 		    blob->offset_in_res + blob->size > rdesc->uncompressed_size)
791 			goto invalid_due_to_overflow;
792 
793 		if (blob->offset_in_res >= expected_next_offset)
794 			expected_next_offset = blob->offset_in_res + blob->size;
795 		else
796 			out_of_order = true;
797 	}
798 
799 	/* If the blobs were not located at strictly increasing positions (not
800 	 * allowing for overlap), sort them.  Then make sure that none overlap.
801 	 */
802 	if (out_of_order) {
803 		ret = sort_blob_list(&rdesc->blob_list,
804 				     offsetof(struct blob_descriptor,
805 					      rdesc_node),
806 				     cmp_blobs_by_offset_in_res);
807 		if (ret)
808 			return ret;
809 
810 		expected_next_offset = 0;
811 		list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
812 			if (blob->offset_in_res >= expected_next_offset)
813 				expected_next_offset = blob->offset_in_res + blob->size;
814 			else
815 				goto invalid_due_to_overlap;
816 		}
817 	}
818 
819 	return 0;
820 
821 invalid_due_to_overflow:
822 	ERROR("Invalid blob table (offset overflow)");
823 	return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
824 
825 invalid_due_to_overlap:
826 	ERROR("Invalid blob table (blobs in solid resource overlap)");
827 	return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
828 }
829 
830 static int
finish_solid_rdescs(struct wim_resource_descriptor ** rdescs,size_t num_rdescs)831 finish_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
832 {
833 	int ret = 0;
834 	for (size_t i = 0; i < num_rdescs; i++) {
835 		ret = validate_resource(rdescs[i]);
836 		if (ret)
837 			break;
838 	}
839 	free_solid_rdescs(rdescs, num_rdescs);
840 	return ret;
841 }
842 
843 /*
844  * read_blob_table() -
845  *
846  * Read the blob table from a WIM file.  Usually, each entry in this table
847  * describes a "blob", or equivalently a "resource", that the WIM file contains,
848  * along with its location and SHA-1 message digest.  Descriptors for
849  * non-metadata blobs will be saved in the in-memory blob table
850  * (wim->blob_table), whereas descriptors for metadata blobs will be saved in a
851  * special location per-image (the wim->image_metadata array).
852  *
853  * However, in WIM_VERSION_SOLID (3584) WIMs, a resource may contain multiple
854  * blobs that are compressed together.  Such a resource is called a "solid
855  * resource".  Solid resources are still described in the on-disk "blob table",
856  * although the format is not the most logical.  A consecutive sequence of
857  * entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid run".
858  * A solid run describes a set of solid resources, each of which contains a set
859  * of blobs.  In a solid run, a 'struct wim_reshdr_disk' with 'uncompressed_size
860  * = SOLID_RESOURCE_MAGIC_NUMBER (0x100000000)' specifies a solid resource,
861  * whereas any other 'struct wim_reshdr_disk' specifies a blob within a solid
862  * resource.  There are some oddities in how we need to determine which solid
863  * resource a blob is actually in; see the code for details.
864  *
865  * Possible return values:
866  *	WIMLIB_ERR_SUCCESS (0)
867  *	WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
868  *	WIMLIB_ERR_NOMEM
869  *
870  *	Or an error code caused by failure to read the blob table from the WIM
871  *	file.
872  */
873 int
read_blob_table(WIMStruct * wim)874 read_blob_table(WIMStruct *wim)
875 {
876 	int ret;
877 	size_t num_entries;
878 	void *buf = NULL;
879 	struct blob_table *table = NULL;
880 	struct blob_descriptor *cur_blob = NULL;
881 	size_t num_duplicate_blobs = 0;
882 	size_t num_empty_blobs = 0;
883 	size_t num_wrong_part_blobs = 0;
884 	u32 image_index = 0;
885 	struct wim_resource_descriptor **cur_solid_rdescs = NULL;
886 	size_t cur_num_solid_rdescs = 0;
887 
888 	/* Calculate the number of entries in the blob table.  */
889 	num_entries = wim->hdr.blob_table_reshdr.uncompressed_size /
890 		      sizeof(struct blob_descriptor_disk);
891 
892 	/* Read the blob table into a buffer.  */
893 	ret = wim_reshdr_to_data(&wim->hdr.blob_table_reshdr, wim, &buf);
894 	if (ret)
895 		goto out;
896 
897 	/* Allocate a hash table to map SHA-1 message digests into blob
898 	 * descriptors.  This is the in-memory "blob table".  */
899 	table = new_blob_table(num_entries);
900 	if (!table)
901 		goto oom;
902 
903 	/* Allocate and initalize blob descriptors from the raw blob table
904 	 * buffer.  */
905 	for (size_t i = 0; i < num_entries; i++) {
906 		const struct blob_descriptor_disk *disk_entry =
907 			&((const struct blob_descriptor_disk*)buf)[i];
908 		struct wim_reshdr reshdr;
909 		u16 part_number;
910 
911 		/* Get the resource header  */
912 		get_wim_reshdr(&disk_entry->reshdr, &reshdr);
913 
914 		/* Ignore SOLID flag if it isn't supposed to be used in this WIM
915 		 * version.  */
916 		if (wim->hdr.wim_version == WIM_VERSION_DEFAULT)
917 			reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID;
918 
919 		/* Allocate a new 'struct blob_descriptor'.  */
920 		cur_blob = new_blob_descriptor();
921 		if (!cur_blob)
922 			goto oom;
923 
924 		/* Get the part number, reference count, and hash.  */
925 		part_number = le16_to_cpu(disk_entry->part_number);
926 		cur_blob->refcnt = le32_to_cpu(disk_entry->refcnt);
927 		copy_hash(cur_blob->hash, disk_entry->hash);
928 
929 		if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
930 
931 			/* SOLID entry  */
932 
933 			if (!cur_solid_rdescs) {
934 				/* Starting new run  */
935 				ret = load_solid_info(wim, disk_entry,
936 						      num_entries - i,
937 						      &cur_solid_rdescs,
938 						      &cur_num_solid_rdescs);
939 				if (ret)
940 					goto out;
941 			}
942 
943 			if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
944 				/* Resource entry, not blob entry  */
945 				goto free_cur_blob_and_continue;
946 			}
947 
948 			/* Blob entry  */
949 
950 			ret = assign_blob_to_solid_resource(&reshdr,
951 							    cur_blob,
952 							    cur_solid_rdescs,
953 							    cur_num_solid_rdescs);
954 			if (ret)
955 				goto out;
956 
957 		} else {
958 			/* Normal blob/resource entry; SOLID not set.  */
959 
960 			struct wim_resource_descriptor *rdesc;
961 
962 			if (unlikely(cur_solid_rdescs)) {
963 				/* This entry terminated a solid run.  */
964 				ret = finish_solid_rdescs(cur_solid_rdescs,
965 							  cur_num_solid_rdescs);
966 				cur_solid_rdescs = NULL;
967 				if (ret)
968 					goto out;
969 			}
970 
971 			if (unlikely(!(reshdr.flags & WIM_RESHDR_FLAG_COMPRESSED) &&
972 				     (reshdr.size_in_wim != reshdr.uncompressed_size)))
973 			{
974 				ERROR("Uncompressed resource has "
975 				      "size_in_wim != uncompressed_size");
976 				ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
977 				goto out;
978 			}
979 
980 			/* Set up a resource descriptor for this blob.  */
981 
982 			rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
983 			if (!rdesc)
984 				goto oom;
985 
986 			wim_reshdr_to_desc_and_blob(&reshdr, wim, rdesc, cur_blob);
987 			wim->refcnt++;
988 		}
989 
990 		/* cur_blob is now a blob bound to a resource.  */
991 
992 		/* Ignore entries with all zeroes in the hash field.  */
993 		if (unlikely(is_zero_hash(cur_blob->hash)))
994 			goto free_cur_blob_and_continue;
995 
996 		/* Verify that the blob has nonzero size.  */
997 		if (unlikely(cur_blob->size == 0)) {
998 			num_empty_blobs++;
999 			goto free_cur_blob_and_continue;
1000 		}
1001 
1002 		/* Verify that the part number matches that of the underlying
1003 		 * WIM file.  */
1004 		if (unlikely(part_number != wim->hdr.part_number)) {
1005 			num_wrong_part_blobs++;
1006 			goto free_cur_blob_and_continue;
1007 		}
1008 
1009 		if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) {
1010 			/* Blob table entry for a metadata resource.  */
1011 
1012 			/* Metadata entries with no references must be ignored.
1013 			 * See, for example, the WinPE WIMs from the WAIK v2.1.
1014 			 */
1015 			if (cur_blob->refcnt == 0)
1016 				goto free_cur_blob_and_continue;
1017 
1018 			if (cur_blob->refcnt != 1) {
1019 				/* We don't currently support this case due to
1020 				 * the complications of multiple images sharing
1021 				 * the same metadata resource or a metadata
1022 				 * resource also being referenced by files.  */
1023 				ERROR("Found metadata resource with refcnt != 1");
1024 				ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1025 				goto out;
1026 			}
1027 
1028 			if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
1029 				ERROR("Image metadata in solid resources "
1030 				      "is unsupported.");
1031 				ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1032 				goto out;
1033 			}
1034 
1035 			if (wim->hdr.part_number != 1) {
1036 				WARNING("Ignoring metadata resource found in a "
1037 					"non-first part of the split WIM");
1038 				goto free_cur_blob_and_continue;
1039 			}
1040 
1041 			/* The number of entries in the blob table with
1042 			 * WIM_RESHDR_FLAG_METADATA set should be the same as
1043 			 * the image_count field in the WIM header.  */
1044 			if (image_index == wim->hdr.image_count) {
1045 				WARNING("Found more metadata resources than images");
1046 				goto free_cur_blob_and_continue;
1047 			}
1048 
1049 			/* Notice very carefully:  We are assigning the metadata
1050 			 * resources to images in the same order in which their
1051 			 * blob table entries occur on disk.  (This is also the
1052 			 * behavior of Microsoft's software.)  In particular,
1053 			 * this overrides the actual locations of the metadata
1054 			 * resources themselves in the WIM file as well as any
1055 			 * information written in the XML data.  */
1056 			wim->image_metadata[image_index] = new_unloaded_image_metadata(cur_blob);
1057 			if (!wim->image_metadata[image_index])
1058 				goto oom;
1059 			image_index++;
1060 		} else {
1061 			/* Blob table entry for a non-metadata blob.  */
1062 
1063 			/* Ignore this blob if it's a duplicate.  */
1064 			if (lookup_blob(table, cur_blob->hash)) {
1065 				num_duplicate_blobs++;
1066 				goto free_cur_blob_and_continue;
1067 			}
1068 
1069 			/* Insert the blob into the in-memory blob table, keyed
1070 			 * by its SHA-1 message digest.  */
1071 			blob_table_insert(table, cur_blob);
1072 		}
1073 
1074 		continue;
1075 
1076 	free_cur_blob_and_continue:
1077 		if (cur_solid_rdescs &&
1078 		    cur_blob->blob_location == BLOB_IN_WIM)
1079 			blob_unset_is_located_in_wim_resource(cur_blob);
1080 		free_blob_descriptor(cur_blob);
1081 	}
1082 	cur_blob = NULL;
1083 
1084 	if (cur_solid_rdescs) {
1085 		/* End of blob table terminated a solid run.  */
1086 		ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1087 		cur_solid_rdescs = NULL;
1088 		if (ret)
1089 			goto out;
1090 	}
1091 
1092 	if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) {
1093 		WARNING("Could not find metadata resources for all images");
1094 		wim->hdr.image_count = image_index;
1095 	}
1096 
1097 	if (num_duplicate_blobs > 0)
1098 		WARNING("Ignoring %zu duplicate blobs", num_duplicate_blobs);
1099 
1100 	if (num_empty_blobs > 0)
1101 		WARNING("Ignoring %zu empty blobs", num_empty_blobs);
1102 
1103 	if (num_wrong_part_blobs > 0) {
1104 		WARNING("Ignoring %zu blobs with wrong part number",
1105 			num_wrong_part_blobs);
1106 	}
1107 
1108 	wim->blob_table = table;
1109 	ret = 0;
1110 	goto out_free_buf;
1111 
1112 oom:
1113 	ERROR("Not enough memory to read blob table!");
1114 	ret = WIMLIB_ERR_NOMEM;
1115 out:
1116 	free_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1117 	free_blob_descriptor(cur_blob);
1118 	free_blob_table(table);
1119 out_free_buf:
1120 	FREE(buf);
1121 	return ret;
1122 }
1123 
1124 static void
write_blob_descriptor(struct blob_descriptor_disk * disk_entry,const struct wim_reshdr * out_reshdr,u16 part_number,u32 refcnt,const u8 * hash)1125 write_blob_descriptor(struct blob_descriptor_disk *disk_entry,
1126 		      const struct wim_reshdr *out_reshdr,
1127 		      u16 part_number, u32 refcnt, const u8 *hash)
1128 {
1129 	put_wim_reshdr(out_reshdr, &disk_entry->reshdr);
1130 	disk_entry->part_number = cpu_to_le16(part_number);
1131 	disk_entry->refcnt = cpu_to_le32(refcnt);
1132 	copy_hash(disk_entry->hash, hash);
1133 }
1134 
1135 /* Note: the list of blob descriptors must be sorted so that all entries for the
1136  * same solid resource are consecutive.  In addition, blob descriptors for
1137  * metadata resources must be in the same order as the indices of the underlying
1138  * images.  */
1139 int
write_blob_table_from_blob_list(struct list_head * blob_list,struct filedes * out_fd,u16 part_number,struct wim_reshdr * out_reshdr,int write_resource_flags)1140 write_blob_table_from_blob_list(struct list_head *blob_list,
1141 				struct filedes *out_fd,
1142 				u16 part_number,
1143 				struct wim_reshdr *out_reshdr,
1144 				int write_resource_flags)
1145 {
1146 	size_t table_size;
1147 	struct blob_descriptor *blob;
1148 	struct blob_descriptor_disk *table_buf;
1149 	struct blob_descriptor_disk *table_buf_ptr;
1150 	int ret;
1151 	u64 prev_res_offset_in_wim = ~0ULL;
1152 	u64 prev_uncompressed_size;
1153 	u64 logical_offset;
1154 
1155 	table_size = 0;
1156 	list_for_each_entry(blob, blob_list, blob_table_list) {
1157 		table_size += sizeof(struct blob_descriptor_disk);
1158 
1159 		if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID &&
1160 		    blob->out_res_offset_in_wim != prev_res_offset_in_wim)
1161 		{
1162 			table_size += sizeof(struct blob_descriptor_disk);
1163 			prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1164 		}
1165 	}
1166 
1167 	table_buf = MALLOC(table_size);
1168 	if (table_buf == NULL) {
1169 		ERROR("Failed to allocate %zu bytes for temporary blob table",
1170 		      table_size);
1171 		return WIMLIB_ERR_NOMEM;
1172 	}
1173 	table_buf_ptr = table_buf;
1174 
1175 	prev_res_offset_in_wim = ~0ULL;
1176 	prev_uncompressed_size = 0;
1177 	logical_offset = 0;
1178 	list_for_each_entry(blob, blob_list, blob_table_list) {
1179 		if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
1180 			struct wim_reshdr tmp_reshdr;
1181 
1182 			/* Eww.  When WIMGAPI sees multiple solid resources, it
1183 			 * expects the offsets to be adjusted as if there were
1184 			 * really only one solid resource.  */
1185 
1186 			if (blob->out_res_offset_in_wim != prev_res_offset_in_wim) {
1187 				/* Put the resource entry for solid resource  */
1188 				tmp_reshdr.offset_in_wim = blob->out_res_offset_in_wim;
1189 				tmp_reshdr.size_in_wim = blob->out_res_size_in_wim;
1190 				tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER;
1191 				tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID;
1192 
1193 				write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1194 						      part_number, 1, zero_hash);
1195 
1196 				logical_offset += prev_uncompressed_size;
1197 
1198 				prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1199 				prev_uncompressed_size = blob->out_res_uncompressed_size;
1200 			}
1201 			tmp_reshdr = blob->out_reshdr;
1202 			tmp_reshdr.offset_in_wim += logical_offset;
1203 			write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1204 					      part_number, blob->out_refcnt, blob->hash);
1205 		} else {
1206 			write_blob_descriptor(table_buf_ptr++, &blob->out_reshdr,
1207 					      part_number, blob->out_refcnt, blob->hash);
1208 		}
1209 
1210 	}
1211 	wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size);
1212 
1213 	/* Write the blob table uncompressed.  Although wimlib can handle a
1214 	 * compressed blob table, MS software cannot.  */
1215 	ret = write_wim_resource_from_buffer(table_buf,
1216 					     table_size,
1217 					     true,
1218 					     out_fd,
1219 					     WIMLIB_COMPRESSION_TYPE_NONE,
1220 					     0,
1221 					     out_reshdr,
1222 					     NULL,
1223 					     write_resource_flags);
1224 	FREE(table_buf);
1225 	return ret;
1226 }
1227 
1228 /* Allocate a blob descriptor for the contents of the buffer, or re-use an
1229  * existing descriptor in @blob_table for an identical blob.  */
1230 struct blob_descriptor *
new_blob_from_data_buffer(const void * buffer,size_t size,struct blob_table * blob_table)1231 new_blob_from_data_buffer(const void *buffer, size_t size,
1232 			  struct blob_table *blob_table)
1233 {
1234 	u8 hash[SHA1_HASH_SIZE];
1235 	struct blob_descriptor *blob;
1236 	void *buffer_copy;
1237 
1238 	sha1_buffer(buffer, size, hash);
1239 
1240 	blob = lookup_blob(blob_table, hash);
1241 	if (blob)
1242 		return blob;
1243 
1244 	blob = new_blob_descriptor();
1245 	if (!blob)
1246 		return NULL;
1247 
1248 	buffer_copy = memdup(buffer, size);
1249 	if (!buffer_copy) {
1250 		free_blob_descriptor(blob);
1251 		return NULL;
1252 	}
1253 	blob_set_is_located_in_attached_buffer(blob, buffer_copy, size);
1254 	copy_hash(blob->hash, hash);
1255 	blob_table_insert(blob_table, blob);
1256 	return blob;
1257 }
1258 
1259 struct blob_descriptor *
after_blob_hashed(struct blob_descriptor * blob,struct blob_descriptor ** back_ptr,struct blob_table * blob_table)1260 after_blob_hashed(struct blob_descriptor *blob,
1261 		  struct blob_descriptor **back_ptr,
1262 		  struct blob_table *blob_table)
1263 {
1264 	struct blob_descriptor *duplicate_blob;
1265 
1266 	list_del(&blob->unhashed_list);
1267 	blob->unhashed = 0;
1268 
1269 	/* Look for a duplicate blob  */
1270 	duplicate_blob = lookup_blob(blob_table, blob->hash);
1271 	if (duplicate_blob) {
1272 		/* We have a duplicate blob.  Transfer the reference counts from
1273 		 * this blob to the duplicate and update the reference to this
1274 		 * blob (from a stream) to point to the duplicate.  The caller
1275 		 * is responsible for freeing @blob if needed.  */
1276 		wimlib_assert(duplicate_blob->size == blob->size);
1277 		duplicate_blob->refcnt += blob->refcnt;
1278 		blob->refcnt = 0;
1279 		*back_ptr = duplicate_blob;
1280 		return duplicate_blob;
1281 	} else {
1282 		/* No duplicate blob, so we need to insert this blob into the
1283 		 * blob table and treat it as a hashed blob.  */
1284 		blob_table_insert(blob_table, blob);
1285 		return blob;
1286 	}
1287 }
1288 
1289 /*
1290  * Calculate the SHA-1 message digest of a blob and move its descriptor from the
1291  * list of unhashed blobs to the blob table, possibly joining it with an
1292  * identical blob.
1293  *
1294  * @blob:
1295  *	The blob to hash
1296  * @blob_table:
1297  *	The blob table in which the blob needs to be indexed
1298  * @blob_ret:
1299  *	On success, a pointer to the resulting blob descriptor is written to
1300  *	this location.  This will be the same as @blob if it was inserted into
1301  *	the blob table, or different if a duplicate blob was found.
1302  *
1303  * Returns 0 on success; nonzero if there is an error reading the blob data.
1304  */
1305 int
hash_unhashed_blob(struct blob_descriptor * blob,struct blob_table * blob_table,struct blob_descriptor ** blob_ret)1306 hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table,
1307 		   struct blob_descriptor **blob_ret)
1308 {
1309 	struct blob_descriptor **back_ptr;
1310 	int ret;
1311 
1312 	back_ptr = retrieve_pointer_to_unhashed_blob(blob);
1313 
1314 	ret = sha1_blob(blob);
1315 	if (ret)
1316 		return ret;
1317 
1318 	*blob_ret = after_blob_hashed(blob, back_ptr, blob_table);
1319 	return 0;
1320 }
1321 
1322 void
blob_to_wimlib_resource_entry(const struct blob_descriptor * blob,struct wimlib_resource_entry * wentry)1323 blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
1324 			      struct wimlib_resource_entry *wentry)
1325 {
1326 	memset(wentry, 0, sizeof(*wentry));
1327 
1328 	wentry->uncompressed_size = blob->size;
1329 	if (blob->blob_location == BLOB_IN_WIM) {
1330 		unsigned res_flags = blob->rdesc->flags;
1331 
1332 		wentry->part_number = blob->rdesc->wim->hdr.part_number;
1333 		if (res_flags & WIM_RESHDR_FLAG_SOLID) {
1334 			wentry->offset = blob->offset_in_res;
1335 		} else {
1336 			wentry->compressed_size = blob->rdesc->size_in_wim;
1337 			wentry->offset = blob->rdesc->offset_in_wim;
1338 		}
1339 		wentry->raw_resource_offset_in_wim = blob->rdesc->offset_in_wim;
1340 		wentry->raw_resource_compressed_size = blob->rdesc->size_in_wim;
1341 		wentry->raw_resource_uncompressed_size = blob->rdesc->uncompressed_size;
1342 
1343 		wentry->is_compressed = (res_flags & WIM_RESHDR_FLAG_COMPRESSED) != 0;
1344 		wentry->is_free = (res_flags & WIM_RESHDR_FLAG_FREE) != 0;
1345 		wentry->is_spanned = (res_flags & WIM_RESHDR_FLAG_SPANNED) != 0;
1346 		wentry->packed = (res_flags & WIM_RESHDR_FLAG_SOLID) != 0;
1347 	}
1348 	if (!blob->unhashed)
1349 		copy_hash(wentry->sha1_hash, blob->hash);
1350 	wentry->reference_count = blob->refcnt;
1351 	wentry->is_metadata = blob->is_metadata;
1352 }
1353 
1354 struct iterate_blob_context {
1355 	wimlib_iterate_lookup_table_callback_t cb;
1356 	void *user_ctx;
1357 };
1358 
1359 static int
do_iterate_blob(struct blob_descriptor * blob,void * _ctx)1360 do_iterate_blob(struct blob_descriptor *blob, void *_ctx)
1361 {
1362 	struct iterate_blob_context *ctx = _ctx;
1363 	struct wimlib_resource_entry entry;
1364 
1365 	blob_to_wimlib_resource_entry(blob, &entry);
1366 	return (*ctx->cb)(&entry, ctx->user_ctx);
1367 }
1368 
1369 /* API function documented in wimlib.h  */
1370 WIMLIBAPI int
wimlib_iterate_lookup_table(WIMStruct * wim,int flags,wimlib_iterate_lookup_table_callback_t cb,void * user_ctx)1371 wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
1372 			    wimlib_iterate_lookup_table_callback_t cb,
1373 			    void *user_ctx)
1374 {
1375 	if (flags != 0)
1376 		return WIMLIB_ERR_INVALID_PARAM;
1377 
1378 	struct iterate_blob_context ctx = {
1379 		.cb = cb,
1380 		.user_ctx = user_ctx,
1381 	};
1382 	if (wim_has_metadata(wim)) {
1383 		int ret;
1384 		for (int i = 0; i < wim->hdr.image_count; i++) {
1385 			struct blob_descriptor *blob;
1386 			struct wim_image_metadata *imd = wim->image_metadata[i];
1387 
1388 			ret = do_iterate_blob(imd->metadata_blob, &ctx);
1389 			if (ret)
1390 				return ret;
1391 			image_for_each_unhashed_blob(blob, imd) {
1392 				ret = do_iterate_blob(blob, &ctx);
1393 				if (ret)
1394 					return ret;
1395 			}
1396 		}
1397 	}
1398 	return for_blob_in_table(wim->blob_table, do_iterate_blob, &ctx);
1399 }
1400