1 #ifndef _WIMLIB_BLOB_TABLE_H
2 #define _WIMLIB_BLOB_TABLE_H
3 
4 #include "wimlib/list.h"
5 #include "wimlib/resource.h"
6 #include "wimlib/sha1.h"
7 #include "wimlib/types.h"
8 
9 /* An enumerated type that identifies where a blob's data is located.  */
10 enum blob_location {
11 
12 	/* The blob's data does not exist.  This is a temporary state only.  */
13 	BLOB_NONEXISTENT = 0,
14 
15 	/* The blob's data is available in the WIM resource identified by the
16 	 * `struct wim_resource_descriptor' pointed to by @rdesc.
17 	 * @offset_in_res identifies the offset at which this particular blob
18 	 * begins in the uncompressed data of the resource.  */
19 	BLOB_IN_WIM,
20 
21 	/* The blob's data is available as the contents of the file named by
22 	 * @file_on_disk.  */
23 	BLOB_IN_FILE_ON_DISK,
24 
25 	/* The blob's data is available as the contents of the in-memory buffer
26 	 * pointed to by @attached_buffer.  */
27 	BLOB_IN_ATTACHED_BUFFER,
28 
29 #ifdef WITH_FUSE
30 	/* The blob's data is available as the contents of the file with name
31 	 * @staging_file_name relative to the open directory file descriptor
32 	 * @staging_dir_fd.  */
33 	BLOB_IN_STAGING_FILE,
34 #endif
35 
36 #ifdef WITH_NTFS_3G
37 	/* The blob's data is available as the contents of an NTFS attribute
38 	 * accessible through libntfs-3g.  @ntfs_loc points to a structure which
39 	 * identifies the attribute.  */
40 	BLOB_IN_NTFS_VOLUME,
41 #endif
42 
43 #ifdef __WIN32__
44 	/* Windows only: the blob's data is available in the file (or named data
45 	 * stream) specified by @windows_file.  The data might be only properly
46 	 * accessible through the Windows API.  */
47 	BLOB_IN_WINDOWS_FILE,
48 #endif
49 };
50 
51 /* A "blob extraction target" is a stream, and the inode to which that stream
52  * belongs, to which a blob needs to be extracted as part of an extraction
53  * operation.  Since blobs are single-instanced, a blob may have multiple
54  * extraction targets.  */
55 struct blob_extraction_target {
56 	struct wim_inode *inode;
57 	struct wim_inode_stream *stream;
58 };
59 
60 /*
61  * Descriptor for a "blob", which is a known length sequence of binary data.
62  *
63  * Within a WIM file, blobs are single instanced and are identified by SHA-1
64  * message digest.
65  */
66 struct blob_descriptor {
67 
68 	/* List node for a hash bucket of the blob table  */
69 	struct hlist_node hash_list;
70 
71 	/*
72 	 * Uncompressed size of this blob.
73 	 *
74 	 * In most cases we are now enforcing that this is nonzero; i.e. an
75 	 * empty stream will have "no blob" rather than "an empty blob".  The
76 	 * exceptions are:
77 	 *
78 	 *	- blob descriptors with 'blob_location == BLOB_NONEXISTENT',
79 	 *	  e.g. placeholder entries for new metadata resources or for
80 	 *	  blobs required for pipable WIM extraction.  In these cases the
81 	 *	  size is not meaningful information anyway.
82 	 *	- blob descriptors with 'blob_location == BLOB_IN_STAGING_FILE'
83 	 *	  can vary their size over time, including to 0.
84 	 */
85 	u64 size;
86 
87 	union {
88 		/*
89 		 * For unhashed == 0: 'hash' is the SHA-1 message digest of the
90 		 * blob's data.  'hash_short' allows accessing just a prefix of
91 		 * the SHA-1 message digest, which is useful for getting a "hash
92 		 * code" for hash table lookup/insertion.
93 		 */
94 		u8 hash[SHA1_HASH_SIZE];
95 		size_t hash_short;
96 
97 		/* For unhashed == 1: these variables make it possible to find
98 		 * the stream that references this blob.  There can be at most
99 		 * one such reference, since duplicate blobs can only be joined
100 		 * after they have been hashed.  */
101 		struct {
102 			struct wim_inode *back_inode;
103 			u32 back_stream_id;
104 		};
105 	} _packed_attribute; /* union is SHA1_HASH_SIZE bytes */
106 
107 	/* Number of times this blob is referenced by file streams in WIM
108 	 * images.  See blob_decrement_refcnt() for information about the
109 	 * limitations of this field.  */
110 	u32 refcnt;
111 
112 	/*
113 	 * When a WIM file is written, this is set to the number of references
114 	 * (from file streams) to this blob in the output WIM file.
115 	 *
116 	 * During extraction, this is set to the number of targets to which this
117 	 * blob is being extracted.
118 	 *
119 	 * During image export, this is set to the number of references of this
120 	 * blob that originated from the source WIM.
121 	 *
122 	 * When mounting a WIM image read-write, this is set to the number of
123 	 * extra references to this blob preemptively taken to allow later
124 	 * saving the modified image as a new image and leaving the original
125 	 * image alone.
126 	 */
127 	u32 out_refcnt;
128 
129 #ifdef WITH_FUSE
130 	/* Number of open file descriptors to this blob during a FUSE mount of
131 	 * a WIM image.  */
132 	u16 num_opened_fds;
133 #endif
134 
135 	/* One of the `enum blob_location' values documented above.  */
136 	u16 blob_location : 4;
137 
138 	/* 1 iff this blob contains "metadata" as opposed to data.  */
139 	u16 is_metadata : 1;
140 
141 	/* 1 iff the SHA-1 message digest of this blob is unknown.  */
142 	u16 unhashed : 1;
143 
144 	/* Temporary fields used when writing blobs; set as documented for
145 	 * prepare_blob_list_for_write().  */
146 	u16 unique_size : 1;
147 	u16 will_be_in_output_wim : 1;
148 
149 	u16 may_send_done_with_file : 1;
150 
151 	/* Only used by wimlib_export_image() */
152 	u16 was_exported : 1;
153 
154 	/* Specification of where this blob's data is located.  Which member of
155 	 * this union is valid is determined by the @blob_location field.  */
156 	union {
157 		/* BLOB_IN_WIM  */
158 		struct {
159 			struct wim_resource_descriptor *rdesc;
160 			u64 offset_in_res;
161 
162 			/* Links together blobs that share the same underlying
163 			 * WIM resource.  The head is rdesc->blob_list.  */
164 			struct list_head rdesc_node;
165 		};
166 
167 		struct {
168 
169 			union {
170 
171 				/* BLOB_IN_FILE_ON_DISK
172 				 * BLOB_IN_WINDOWS_FILE  */
173 				struct {
174 					union {
175 						tchar *file_on_disk;
176 						struct windows_file *windows_file;
177 					};
178 					struct wim_inode *file_inode;
179 				};
180 
181 				/* BLOB_IN_ATTACHED_BUFFER */
182 				void *attached_buffer;
183 
184 			#ifdef WITH_FUSE
185 				/* BLOB_IN_STAGING_FILE  */
186 				struct {
187 					char *staging_file_name;
188 					int staging_dir_fd;
189 				};
190 			#endif
191 
192 			#ifdef WITH_NTFS_3G
193 				/* BLOB_IN_NTFS_VOLUME  */
194 				struct ntfs_location *ntfs_loc;
195 			#endif
196 			};
197 
198 			/* List link for per-WIM-image list of unhashed blobs */
199 			struct list_head unhashed_list;
200 		};
201 	};
202 
203 	/* Temporary fields  */
204 	union {
205 		/* Fields used temporarily during WIM file writing.  */
206 		struct {
207 			union {
208 				/* List node used for blob size table.  */
209 				struct hlist_node hash_list_2;
210 
211 				/* Metadata for the underlying solid resource in
212 				 * the WIM being written (only valid if
213 				 * WIM_RESHDR_FLAG_SOLID set in
214 				 * out_reshdr.flags).  */
215 				struct {
216 					u64 out_res_offset_in_wim;
217 					u64 out_res_size_in_wim;
218 					u64 out_res_uncompressed_size;
219 				};
220 			};
221 
222 			/* Links blobs being written to the WIM.  */
223 			struct list_head write_blobs_list;
224 
225 			union {
226 				/* Metadata for this blob in the WIM being
227 				 * written.  */
228 				struct wim_reshdr out_reshdr;
229 
230 				struct {
231 					/* Name under which this blob is being
232 					 * sorted; used only when sorting blobs
233 					 * for solid compression.  */
234 					utf16lechar *solid_sort_name;
235 					size_t solid_sort_name_nbytes;
236 				};
237 			};
238 		};
239 
240 		/* Used temporarily during extraction.  This is an array of
241 		 * references to the streams being extracted that use this blob.
242 		 * out_refcnt tracks the number of slots filled.  */
243 		union {
244 			struct blob_extraction_target inline_blob_extraction_targets[3];
245 			struct {
246 				struct blob_extraction_target *blob_extraction_targets;
247 				u32 alloc_blob_extraction_targets;
248 			};
249 		};
250 	};
251 
252 	/* Temporary list fields.  */
253 	union {
254 		/* Links blobs for writing blob table.  */
255 		struct list_head blob_table_list;
256 
257 		/* Links blobs being extracted.  */
258 		struct list_head extraction_list;
259 
260 		/* Links blobs being exported.  */
261 		struct list_head export_blob_list;
262 	};
263 };
264 
265 extern struct blob_table *
266 new_blob_table(size_t capacity) _malloc_attribute;
267 
268 extern void
269 free_blob_table(struct blob_table *table);
270 
271 extern int
272 read_blob_table(WIMStruct *wim);
273 
274 extern int
275 write_blob_table_from_blob_list(struct list_head *blob_list,
276 				struct filedes *out_fd,
277 				u16 part_number,
278 				struct wim_reshdr *out_reshdr,
279 				int write_resource_flags);
280 
281 extern struct blob_descriptor *
282 new_blob_descriptor(void) _malloc_attribute;
283 
284 extern struct blob_descriptor *
285 clone_blob_descriptor(const struct blob_descriptor *blob) _malloc_attribute;
286 
287 extern void
288 blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table);
289 
290 extern void
291 blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table,
292 		     u32 count);
293 
294 #ifdef WITH_FUSE
295 extern void
296 blob_decrement_num_opened_fds(struct blob_descriptor *blob);
297 #endif
298 
299 extern void
300 blob_release_location(struct blob_descriptor *blob);
301 
302 extern void
303 free_blob_descriptor(struct blob_descriptor *blob);
304 
305 extern void
306 blob_table_insert(struct blob_table *table, struct blob_descriptor *blob);
307 
308 extern void
309 blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob);
310 
311 extern struct blob_descriptor *
312 lookup_blob(const struct blob_table *table, const u8 *hash);
313 
314 extern int
315 for_blob_in_table(struct blob_table *table,
316 		  int (*visitor)(struct blob_descriptor *, void *), void *arg);
317 
318 extern int
319 for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
320 					     int (*visitor)(struct blob_descriptor *, void *),
321 					     void *arg);
322 
323 struct wimlib_resource_entry;
324 
325 extern void
326 blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
327 			      struct wimlib_resource_entry *wentry);
328 
329 extern int
330 sort_blob_list(struct list_head *blob_list, size_t list_head_offset,
331 	       int (*compar)(const void *, const void*));
332 
333 extern int
334 sort_blob_list_by_sequential_order(struct list_head *blob_list,
335 				   size_t list_head_offset);
336 
337 extern int
338 cmp_blobs_by_sequential_order(const void *p1, const void *p2);
339 
340 static inline const struct blob_extraction_target *
blob_extraction_targets(const struct blob_descriptor * blob)341 blob_extraction_targets(const struct blob_descriptor *blob)
342 {
343 	if (blob->out_refcnt <= ARRAY_LEN(blob->inline_blob_extraction_targets))
344 		return blob->inline_blob_extraction_targets;
345 	else
346 		return blob->blob_extraction_targets;
347 }
348 
349 /*
350  * Declare that the specified blob is located in the specified WIM resource at
351  * the specified offset.  The caller is expected to set blob->size if required.
352  */
353 static inline void
blob_set_is_located_in_wim_resource(struct blob_descriptor * blob,struct wim_resource_descriptor * rdesc,u64 offset_in_res)354 blob_set_is_located_in_wim_resource(struct blob_descriptor *blob,
355 				    struct wim_resource_descriptor *rdesc,
356 				    u64 offset_in_res)
357 {
358 	blob->blob_location = BLOB_IN_WIM;
359 	blob->rdesc = rdesc;
360 	list_add_tail(&blob->rdesc_node, &rdesc->blob_list);
361 	blob->offset_in_res = offset_in_res;
362 }
363 
364 static inline void
blob_unset_is_located_in_wim_resource(struct blob_descriptor * blob)365 blob_unset_is_located_in_wim_resource(struct blob_descriptor *blob)
366 {
367 	list_del(&blob->rdesc_node);
368 	blob->blob_location = BLOB_NONEXISTENT;
369 }
370 
371 static inline void
blob_set_is_located_in_attached_buffer(struct blob_descriptor * blob,void * buffer,size_t size)372 blob_set_is_located_in_attached_buffer(struct blob_descriptor *blob,
373 				       void *buffer, size_t size)
374 {
375 	blob->blob_location = BLOB_IN_ATTACHED_BUFFER;
376 	blob->attached_buffer = buffer;
377 	blob->size = size;
378 }
379 
380 static inline bool
blob_is_in_file(const struct blob_descriptor * blob)381 blob_is_in_file(const struct blob_descriptor *blob)
382 {
383 	return blob->blob_location == BLOB_IN_FILE_ON_DISK
384 #ifdef __WIN32__
385 	    || blob->blob_location == BLOB_IN_WINDOWS_FILE
386 #endif
387 	   ;
388 }
389 
390 #ifdef __WIN32__
391 extern const wchar_t *
392 get_windows_file_path(const struct windows_file *file);
393 #endif
394 
395 static inline const tchar *
blob_file_path(const struct blob_descriptor * blob)396 blob_file_path(const struct blob_descriptor *blob)
397 {
398 #ifdef __WIN32__
399 	if (blob->blob_location == BLOB_IN_WINDOWS_FILE)
400 		return get_windows_file_path(blob->windows_file);
401 #endif
402 	return blob->file_on_disk;
403 }
404 
405 extern struct blob_descriptor *
406 new_blob_from_data_buffer(const void *buffer, size_t size,
407 			  struct blob_table *blob_table);
408 
409 extern struct blob_descriptor *
410 after_blob_hashed(struct blob_descriptor *blob,
411 		  struct blob_descriptor **back_ptr,
412 		  struct blob_table *blob_table);
413 
414 extern int
415 hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table,
416 		   struct blob_descriptor **blob_ret);
417 
418 extern struct blob_descriptor **
419 retrieve_pointer_to_unhashed_blob(struct blob_descriptor *blob);
420 
421 static inline void
prepare_unhashed_blob(struct blob_descriptor * blob,struct wim_inode * back_inode,u32 stream_id,struct list_head * unhashed_blobs)422 prepare_unhashed_blob(struct blob_descriptor *blob,
423 		      struct wim_inode *back_inode, u32 stream_id,
424 		      struct list_head *unhashed_blobs)
425 {
426 	if (!blob)
427 		return;
428 	blob->unhashed = 1;
429 	blob->back_inode = back_inode;
430 	blob->back_stream_id = stream_id;
431 	list_add_tail(&blob->unhashed_list, unhashed_blobs);
432 }
433 
434 #endif /* _WIMLIB_BLOB_TABLE_H */
435