1 /*
2 * dentry.c - see description below
3 */
4
5 /*
6 * Copyright (C) 2012-2016 Eric Biggers
7 *
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
11 * later version.
12 *
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
16 * details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
20 */
21
22 /*
23 * This file contains logic to deal with WIM directory entries, or "dentries":
24 *
25 * - Reading a dentry tree from a metadata resource in a WIM file
26 * - Writing a dentry tree to a metadata resource in a WIM file
27 * - Iterating through a tree of WIM dentries
28 * - Path lookup: translating a path into a WIM dentry or inode
29 * - Creating, modifying, and deleting WIM dentries
30 *
31 * Notes:
32 *
33 * - A WIM file can contain multiple images, each of which has an independent
34 * tree of dentries. "On disk", the dentry tree for an image is stored in
35 * the "metadata resource" for that image.
36 *
37 * - Multiple dentries in an image may correspond to the same inode, or "file".
38 * When this occurs, it means that the file has multiple names, or "hard
39 * links". A dentry is not a file, but rather the name of a file!
40 *
41 * - Inodes are not represented explicitly in the WIM file format. Instead,
42 * the metadata resource provides a "hard link group ID" for each dentry.
43 * wimlib handles pulling out actual inodes from this information, but this
44 * occurs in inode_fixup.c and not in this file.
45 *
46 * - wimlib does not allow *directory* hard links, so a WIM image really does
47 * have a *tree* of dentries (and not an arbitrary graph of dentries).
48 *
49 * - wimlib supports both case-sensitive and case-insensitive path lookups.
50 * The implementation uses a single in-memory index per directory, using a
51 * collation order like that used by NTFS; see collate_dentry_names().
52 *
53 * - Multiple dentries in a directory might have the same case-insensitive
54 * name. But wimlib enforces that at most one dentry in a directory can have
55 * a given case-sensitive name.
56 */
57
58 #ifdef HAVE_CONFIG_H
59 # include "config.h"
60 #endif
61
62 #include <errno.h>
63
64 #include "wimlib/assert.h"
65 #include "wimlib/dentry.h"
66 #include "wimlib/inode.h"
67 #include "wimlib/encoding.h"
68 #include "wimlib/endianness.h"
69 #include "wimlib/metadata.h"
70 #include "wimlib/paths.h"
71
72 /* On-disk format of a WIM dentry (directory entry), located in the metadata
73 * resource for a WIM image. */
74 struct wim_dentry_on_disk {
75
76 /* Length of this directory entry in bytes, not including any extra
77 * stream entries. Should be a multiple of 8 so that the following
78 * dentry or extra stream entry is aligned on an 8-byte boundary. (If
79 * not, wimlib will round it up.) It must be at least as long as the
80 * fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), plus the
81 * lengths of the file name and/or short name if present, plus the size
82 * of any "extra" data.
83 *
84 * It is also possible for this field to be 0. This case indicates the
85 * end of a list of sibling entries in a directory. It also means the
86 * real length is 8, because the dentry included only the length field,
87 * but that takes up 8 bytes. */
88 le64 length;
89
90 /* File attributes for the file or directory. This is a bitwise OR of
91 * the FILE_ATTRIBUTE_* constants and should correspond to the value
92 * retrieved by GetFileAttributes() on Windows. */
93 le32 attributes;
94
95 /* A value that specifies the security descriptor for this file or
96 * directory. If 0xFFFFFFFF, the file or directory has no security
97 * descriptor. Otherwise, it is a 0-based index into the WIM image's
98 * table of security descriptors (see: `struct wim_security_data') */
99 le32 security_id;
100
101 /* Offset, in bytes, from the start of the uncompressed metadata
102 * resource of this directory's child directory entries, or 0 if this
103 * directory entry does not correspond to a directory or otherwise does
104 * not have any children. */
105 le64 subdir_offset;
106
107 /* Reserved fields */
108 le64 unused_1;
109 le64 unused_2;
110
111 /* Creation time, last access time, and last write time, in
112 * 100-nanosecond intervals since 12:00 a.m UTC January 1, 1601. They
113 * should correspond to the times gotten by calling GetFileTime() on
114 * Windows. */
115 le64 creation_time;
116 le64 last_access_time;
117 le64 last_write_time;
118
119 /*
120 * Usually this is the SHA-1 message digest of the file's "contents"
121 * (the unnamed data stream).
122 *
123 * If the file has FILE_ATTRIBUTE_REPARSE_POINT set, then this is
124 * instead usually the SHA-1 message digest of the uncompressed reparse
125 * point data.
126 *
127 * However, there are some special rules that need to be applied to
128 * interpret this field correctly when extra stream entries are present.
129 * See the code for details.
130 */
131 u8 default_hash[SHA1_HASH_SIZE];
132
133 /* Unknown field (maybe accidental padding) */
134 le32 unknown_0x54;
135
136 /*
137 * The following 8-byte union contains either information about the
138 * reparse point (for files with FILE_ATTRIBUTE_REPARSE_POINT set), or
139 * the "hard link group ID" (for other files).
140 *
141 * The reparse point information contains ReparseTag and ReparseReserved
142 * from the header of the reparse point buffer. It also contains a flag
143 * that indicates whether a reparse point fixup (for the target of an
144 * absolute symbolic link or junction) was done or not.
145 *
146 * The "hard link group ID" is like an inode number; all dentries for
147 * the same inode share the same value. See inode_fixup.c for more
148 * information.
149 *
150 * Note that this union creates the limitation that reparse point files
151 * cannot have multiple names (hard links).
152 */
153 union {
154 struct {
155 le32 reparse_tag;
156 le16 rp_reserved;
157 le16 rp_flags;
158 } _packed_attribute reparse;
159 struct {
160 le64 hard_link_group_id;
161 } _packed_attribute nonreparse;
162 };
163
164 /* Number of extra stream entries that directly follow this dentry
165 * on-disk. */
166 le16 num_extra_streams;
167
168 /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE
169 * encoded short name (8.3 DOS-compatible name), excluding the null
170 * terminator. If zero, then the long name of this dentry does not have
171 * a corresponding short name (but this does not exclude the possibility
172 * that another dentry for the same file has a short name). */
173 le16 short_name_nbytes;
174
175 /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE
176 * encoded "long" name, excluding the null terminator. If zero, then
177 * this file has no long name. The root dentry should not have a long
178 * name, but all other dentries in the image should have long names. */
179 le16 name_nbytes;
180
181 /* Beginning of optional, variable-length fields */
182
183 /* If name_nbytes != 0, the next field will be the UTF-16LE encoded long
184 * name. This will be null-terminated, so the size of this field will
185 * really be name_nbytes + 2. */
186 /*utf16lechar name[];*/
187
188 /* If short_name_nbytes != 0, the next field will be the UTF-16LE
189 * encoded short name. This will be null-terminated, so the size of
190 * this field will really be short_name_nbytes + 2. */
191 /*utf16lechar short_name[];*/
192
193 /* If there is still space in the dentry (according to the 'length'
194 * field) after 8-byte alignment, then the remaining space will be a
195 * variable-length list of tagged metadata items. See tagged_items.c
196 * for more information. */
197 /* u8 tagged_items[] _aligned_attribute(8); */
198
199 } _packed_attribute;
200 /* If num_extra_streams != 0, then there are that many extra stream
201 * entries following the dentry, starting on the next 8-byte aligned
202 * boundary. They are not counted in the 'length' field of the dentry.
203 */
204
205 /* On-disk format of an extra stream entry. This represents an extra NTFS-style
206 * "stream" associated with the file, such as a named data stream. */
207 struct wim_extra_stream_entry_on_disk {
208
209 /* Length of this extra stream entry, in bytes. This includes all
210 * fixed-length fields, plus the name and null terminator if present,
211 * and any needed padding such that the length is a multiple of 8. */
212 le64 length;
213
214 /* Reserved field */
215 le64 reserved;
216
217 /* SHA-1 message digest of this stream's uncompressed data, or all
218 * zeroes if this stream's data is of zero length. */
219 u8 hash[SHA1_HASH_SIZE];
220
221 /* Length of this stream's name, in bytes and excluding the null
222 * terminator; or 0 if this stream is unnamed. */
223 le16 name_nbytes;
224
225 /* Stream name in UTF-16LE. It is @name_nbytes bytes long, excluding
226 * the null terminator. There is a null terminator character if
227 * @name_nbytes != 0; i.e., if this stream is named. */
228 utf16lechar name[];
229 } _packed_attribute;
230
231 static void
do_dentry_set_name(struct wim_dentry * dentry,utf16lechar * name,size_t name_nbytes)232 do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *name,
233 size_t name_nbytes)
234 {
235 FREE(dentry->d_name);
236 dentry->d_name = name;
237 dentry->d_name_nbytes = name_nbytes;
238
239 if (dentry_has_short_name(dentry)) {
240 FREE(dentry->d_short_name);
241 dentry->d_short_name = NULL;
242 dentry->d_short_name_nbytes = 0;
243 }
244 }
245
246 /*
247 * Set the name of a WIM dentry from a UTF-16LE string.
248 *
249 * This sets the long name of the dentry. The short name will automatically be
250 * removed, since it may not be appropriate for the new long name.
251 *
252 * The @name string need not be null-terminated, since its length is specified
253 * in @name_nbytes.
254 *
255 * If @name_nbytes is 0, both the long and short names of the dentry will be
256 * removed.
257 *
258 * Only use this function on unlinked dentries, since it doesn't update the name
259 * indices. For dentries that are currently linked into the tree, use
260 * rename_wim_path().
261 *
262 * Returns 0 or WIMLIB_ERR_NOMEM.
263 */
264 int
dentry_set_name_utf16le(struct wim_dentry * dentry,const utf16lechar * name,size_t name_nbytes)265 dentry_set_name_utf16le(struct wim_dentry *dentry, const utf16lechar *name,
266 size_t name_nbytes)
267 {
268 utf16lechar *dup = NULL;
269
270 if (name_nbytes) {
271 dup = utf16le_dupz(name, name_nbytes);
272 if (!dup)
273 return WIMLIB_ERR_NOMEM;
274 }
275 do_dentry_set_name(dentry, dup, name_nbytes);
276 return 0;
277 }
278
279
280 /*
281 * Set the name of a WIM dentry from a 'tchar' string.
282 *
283 * This sets the long name of the dentry. The short name will automatically be
284 * removed, since it may not be appropriate for the new long name.
285 *
286 * If @name is NULL or empty, both the long and short names of the dentry will
287 * be removed.
288 *
289 * Only use this function on unlinked dentries, since it doesn't update the name
290 * indices. For dentries that are currently linked into the tree, use
291 * rename_wim_path().
292 *
293 * Returns 0 or an error code resulting from a failed string conversion.
294 */
295 int
dentry_set_name(struct wim_dentry * dentry,const tchar * name)296 dentry_set_name(struct wim_dentry *dentry, const tchar *name)
297 {
298 utf16lechar *name_utf16le = NULL;
299 size_t name_utf16le_nbytes = 0;
300 int ret;
301
302 if (name && *name) {
303 ret = tstr_to_utf16le(name, tstrlen(name) * sizeof(tchar),
304 &name_utf16le, &name_utf16le_nbytes);
305 if (ret)
306 return ret;
307 }
308
309 do_dentry_set_name(dentry, name_utf16le, name_utf16le_nbytes);
310 return 0;
311 }
312
313 /* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry
314 * that has names of the specified lengths. (Zero length means the
315 * corresponding name actually does not exist.) The returned value excludes
316 * tagged metadata items as well as any extra stream entries that may need to
317 * follow the dentry. */
318 static size_t
dentry_min_len_with_names(u16 name_nbytes,u16 short_name_nbytes)319 dentry_min_len_with_names(u16 name_nbytes, u16 short_name_nbytes)
320 {
321 size_t length = sizeof(struct wim_dentry_on_disk);
322 if (name_nbytes)
323 length += (u32)name_nbytes + 2;
324 if (short_name_nbytes)
325 length += (u32)short_name_nbytes + 2;
326 return length;
327 }
328
329
330 /* Return the length, in bytes, required for the specified stream on-disk, when
331 * represented as an extra stream entry. */
332 static size_t
stream_out_total_length(const struct wim_inode_stream * strm)333 stream_out_total_length(const struct wim_inode_stream *strm)
334 {
335 /* Account for the fixed length portion */
336 size_t len = sizeof(struct wim_extra_stream_entry_on_disk);
337
338 /* For named streams, account for the variable-length name. */
339 if (stream_is_named(strm))
340 len += utf16le_len_bytes(strm->stream_name) + 2;
341
342 /* Account for any necessary padding to the next 8-byte boundary. */
343 return ALIGN(len, 8);
344 }
345
346 /*
347 * Calculate the total number of bytes that will be consumed when a dentry is
348 * written. This includes the fixed-length portion of the dentry, the name
349 * fields, any tagged metadata items, and any extra stream entries. This also
350 * includes all alignment bytes.
351 */
352 size_t
dentry_out_total_length(const struct wim_dentry * dentry)353 dentry_out_total_length(const struct wim_dentry *dentry)
354 {
355 const struct wim_inode *inode = dentry->d_inode;
356 size_t len;
357
358 len = dentry_min_len_with_names(dentry->d_name_nbytes,
359 dentry->d_short_name_nbytes);
360 len = ALIGN(len, 8);
361
362 if (inode->i_extra)
363 len += ALIGN(inode->i_extra->size, 8);
364
365 if (!(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
366 /*
367 * Extra stream entries:
368 *
369 * - Use one extra stream entry for each named data stream
370 * - Use one extra stream entry for the unnamed data stream when there is either:
371 * - a reparse point stream
372 * - at least one named data stream (for Windows PE bug workaround)
373 * - Use one extra stream entry for the reparse point stream if there is one
374 */
375 bool have_named_data_stream = false;
376 bool have_reparse_point_stream = false;
377 for (unsigned i = 0; i < inode->i_num_streams; i++) {
378 const struct wim_inode_stream *strm = &inode->i_streams[i];
379 if (stream_is_named_data_stream(strm)) {
380 len += stream_out_total_length(strm);
381 have_named_data_stream = true;
382 } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
383 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
384 have_reparse_point_stream = true;
385 }
386 }
387
388 if (have_named_data_stream || have_reparse_point_stream) {
389 if (have_reparse_point_stream)
390 len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8);
391 len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8);
392 }
393 }
394
395 return len;
396 }
397
398 /* Internal version of for_dentry_in_tree() that omits the NULL check */
399 static int
do_for_dentry_in_tree(struct wim_dentry * dentry,int (* visitor)(struct wim_dentry *,void *),void * arg)400 do_for_dentry_in_tree(struct wim_dentry *dentry,
401 int (*visitor)(struct wim_dentry *, void *), void *arg)
402 {
403 int ret;
404 struct wim_dentry *child;
405
406 ret = (*visitor)(dentry, arg);
407 if (unlikely(ret))
408 return ret;
409
410 for_dentry_child(child, dentry) {
411 ret = do_for_dentry_in_tree(child, visitor, arg);
412 if (unlikely(ret))
413 return ret;
414 }
415 return 0;
416 }
417
418 /* Internal version of for_dentry_in_tree_depth() that omits the NULL check */
419 static int
do_for_dentry_in_tree_depth(struct wim_dentry * dentry,int (* visitor)(struct wim_dentry *,void *),void * arg)420 do_for_dentry_in_tree_depth(struct wim_dentry *dentry,
421 int (*visitor)(struct wim_dentry *, void *), void *arg)
422 {
423 int ret;
424 struct wim_dentry *child;
425
426 for_dentry_child_postorder(child, dentry) {
427 ret = do_for_dentry_in_tree_depth(child, visitor, arg);
428 if (unlikely(ret))
429 return ret;
430 }
431 return unlikely((*visitor)(dentry, arg));
432 }
433
434 /*
435 * Call a function on all dentries in a tree.
436 *
437 * @arg will be passed as the second argument to each invocation of @visitor.
438 *
439 * This function does a pre-order traversal --- that is, a parent will be
440 * visited before its children. Furthermore, siblings will be visited in their
441 * collation order.
442 *
443 * It is safe to pass NULL for @root, which means that the dentry tree is empty.
444 * In this case, this function does nothing.
445 *
446 * @visitor must not modify the structure of the dentry tree during the
447 * traversal.
448 *
449 * The return value will be 0 if all calls to @visitor returned 0. Otherwise,
450 * the return value will be the first nonzero value returned by @visitor.
451 */
452 int
for_dentry_in_tree(struct wim_dentry * root,int (* visitor)(struct wim_dentry *,void *),void * arg)453 for_dentry_in_tree(struct wim_dentry *root,
454 int (*visitor)(struct wim_dentry *, void *), void *arg)
455 {
456 if (unlikely(!root))
457 return 0;
458 return do_for_dentry_in_tree(root, visitor, arg);
459 }
460
461 /* Like for_dentry_in_tree(), but do a depth-first traversal of the dentry tree.
462 * That is, the visitor function will be called on a dentry's children before
463 * itself. It will be safe to free a dentry when visiting it. */
464 static int
for_dentry_in_tree_depth(struct wim_dentry * root,int (* visitor)(struct wim_dentry *,void *),void * arg)465 for_dentry_in_tree_depth(struct wim_dentry *root,
466 int (*visitor)(struct wim_dentry *, void *), void *arg)
467 {
468 if (unlikely(!root))
469 return 0;
470 return do_for_dentry_in_tree_depth(root, visitor, arg);
471 }
472
473 /*
474 * Calculate the full path to @dentry within the WIM image, if not already done.
475 *
476 * The full name will be saved in the cached value 'dentry->d_full_path'.
477 *
478 * Whenever possible, use dentry_full_path() instead of calling this and
479 * accessing d_full_path directly.
480 *
481 * Returns 0 or an error code resulting from a failed string conversion.
482 */
483 int
calculate_dentry_full_path(struct wim_dentry * dentry)484 calculate_dentry_full_path(struct wim_dentry *dentry)
485 {
486 size_t ulen;
487 const struct wim_dentry *d;
488
489 if (dentry->d_full_path)
490 return 0;
491
492 ulen = 0;
493 d = dentry;
494 do {
495 ulen += d->d_name_nbytes / sizeof(utf16lechar);
496 ulen++;
497 d = d->d_parent; /* assumes d == d->d_parent for root */
498 } while (!dentry_is_root(d));
499
500 utf16lechar ubuf[ulen];
501 utf16lechar *p = &ubuf[ulen];
502
503 d = dentry;
504 do {
505 p -= d->d_name_nbytes / sizeof(utf16lechar);
506 if (d->d_name_nbytes)
507 memcpy(p, d->d_name, d->d_name_nbytes);
508 *--p = cpu_to_le16(WIM_PATH_SEPARATOR);
509 d = d->d_parent; /* assumes d == d->d_parent for root */
510 } while (!dentry_is_root(d));
511
512 wimlib_assert(p == ubuf);
513
514 return utf16le_to_tstr(ubuf, ulen * sizeof(utf16lechar),
515 &dentry->d_full_path, NULL);
516 }
517
518 /*
519 * Return the full path to the @dentry within the WIM image, or NULL if the full
520 * path could not be determined due to a string conversion error.
521 *
522 * The returned memory will be cached in the dentry, so the caller is not
523 * responsible for freeing it.
524 */
525 tchar *
dentry_full_path(struct wim_dentry * dentry)526 dentry_full_path(struct wim_dentry *dentry)
527 {
528 calculate_dentry_full_path(dentry);
529 return dentry->d_full_path;
530 }
531
532 static int
dentry_calculate_subdir_offset(struct wim_dentry * dentry,void * _subdir_offset_p)533 dentry_calculate_subdir_offset(struct wim_dentry *dentry, void *_subdir_offset_p)
534 {
535 if (dentry_is_directory(dentry)) {
536 u64 *subdir_offset_p = _subdir_offset_p;
537 struct wim_dentry *child;
538
539 /* Set offset of directory's child dentries */
540 dentry->d_subdir_offset = *subdir_offset_p;
541
542 /* Account for child dentries */
543 for_dentry_child(child, dentry)
544 *subdir_offset_p += dentry_out_total_length(child);
545
546 /* Account for end-of-directory entry */
547 *subdir_offset_p += 8;
548 } else {
549 /* Not a directory; set the subdir offset to 0 */
550 dentry->d_subdir_offset = 0;
551 }
552 return 0;
553 }
554
555 /*
556 * Calculate the subdir offsets for a dentry tree, in preparation of writing
557 * that dentry tree to a metadata resource.
558 *
559 * The subdir offset of each dentry is the offset in the uncompressed metadata
560 * resource at which its child dentries begin, or 0 if that dentry has no
561 * children.
562 *
563 * The caller must initialize *subdir_offset_p to the first subdir offset that
564 * is available to use after the root dentry is written.
565 *
566 * When this function returns, *subdir_offset_p will have been advanced past the
567 * size needed for the dentry tree within the uncompressed metadata resource.
568 */
569 void
calculate_subdir_offsets(struct wim_dentry * root,u64 * subdir_offset_p)570 calculate_subdir_offsets(struct wim_dentry *root, u64 *subdir_offset_p)
571 {
572 for_dentry_in_tree(root, dentry_calculate_subdir_offset, subdir_offset_p);
573 }
574
575 static int
dentry_compare_names(const struct wim_dentry * d1,const struct wim_dentry * d2,bool ignore_case)576 dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2,
577 bool ignore_case)
578 {
579 return cmp_utf16le_strings(d1->d_name, d1->d_name_nbytes / 2,
580 d2->d_name, d2->d_name_nbytes / 2,
581 ignore_case);
582 }
583
584 /*
585 * Collate (compare) the long filenames of two dentries. This first compares
586 * the names ignoring case, then falls back to a case-sensitive comparison if
587 * the names are the same ignoring case.
588 */
589 static int
collate_dentry_names(const struct avl_tree_node * n1,const struct avl_tree_node * n2)590 collate_dentry_names(const struct avl_tree_node *n1,
591 const struct avl_tree_node *n2)
592 {
593 const struct wim_dentry *d1, *d2;
594 int res;
595
596 d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node);
597 d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node);
598
599 res = dentry_compare_names(d1, d2, true);
600 if (res)
601 return res;
602 return dentry_compare_names(d1, d2, false);
603 }
604
605 /* Default case sensitivity behavior for searches with
606 * WIMLIB_CASE_PLATFORM_DEFAULT specified. This can be modified by passing
607 * WIMLIB_INIT_FLAG_DEFAULT_CASE_SENSITIVE or
608 * WIMLIB_INIT_FLAG_DEFAULT_CASE_INSENSITIVE to wimlib_global_init(). */
609 bool default_ignore_case =
610 #ifdef __WIN32__
611 true
612 #else
613 false
614 #endif
615 ;
616
617 /*
618 * Find the dentry within the given directory that has the given UTF-16LE
619 * filename. Return it if found, otherwise return NULL. This has configurable
620 * case sensitivity, and @name need not be null-terminated.
621 */
622 struct wim_dentry *
get_dentry_child_with_utf16le_name(const struct wim_dentry * dir,const utf16lechar * name,size_t name_nbytes,CASE_SENSITIVITY_TYPE case_type)623 get_dentry_child_with_utf16le_name(const struct wim_dentry *dir,
624 const utf16lechar *name,
625 size_t name_nbytes,
626 CASE_SENSITIVITY_TYPE case_type)
627 {
628 struct wim_dentry wanted;
629 struct avl_tree_node *cur = dir->d_inode->i_children;
630 struct wim_dentry *ci_match = NULL;
631
632 wanted.d_name = (utf16lechar *)name;
633 wanted.d_name_nbytes = name_nbytes;
634
635 if (unlikely(wanted.d_name_nbytes != name_nbytes))
636 return NULL; /* overflow */
637
638 /* Note: we can't use avl_tree_lookup_node() here because we need to
639 * save case-insensitive matches. */
640 while (cur) {
641 struct wim_dentry *child;
642 int res;
643
644 child = avl_tree_entry(cur, struct wim_dentry, d_index_node);
645
646 res = dentry_compare_names(&wanted, child, true);
647 if (!res) {
648 /* case-insensitive match found */
649 ci_match = child;
650
651 res = dentry_compare_names(&wanted, child, false);
652 if (!res)
653 return child; /* case-sensitive match found */
654 }
655
656 if (res < 0)
657 cur = cur->left;
658 else
659 cur = cur->right;
660 }
661
662 /* No case-sensitive match; use a case-insensitive match if possible. */
663
664 if (!will_ignore_case(case_type))
665 return NULL;
666
667 if (ci_match) {
668 size_t num_other_ci_matches = 0;
669 struct wim_dentry *other_ci_match, *d;
670
671 dentry_for_each_ci_match(d, ci_match) {
672 num_other_ci_matches++;
673 other_ci_match = d;
674 }
675
676 if (num_other_ci_matches != 0) {
677 WARNING("Result of case-insensitive lookup is ambiguous\n"
678 " (returning \"%"TS"\" of %zu "
679 "possible files, including \"%"TS"\")",
680 dentry_full_path(ci_match), num_other_ci_matches,
681 dentry_full_path(other_ci_match));
682 }
683 }
684
685 return ci_match;
686 }
687
688 /*
689 * Find the dentry within the given directory that has the given 'tstr'
690 * filename. If the filename was successfully converted to UTF-16LE and the
691 * dentry was found, return it; otherwise return NULL. This has configurable
692 * case sensitivity.
693 */
694 struct wim_dentry *
get_dentry_child_with_name(const struct wim_dentry * dir,const tchar * name,CASE_SENSITIVITY_TYPE case_type)695 get_dentry_child_with_name(const struct wim_dentry *dir, const tchar *name,
696 CASE_SENSITIVITY_TYPE case_type)
697 {
698 int ret;
699 const utf16lechar *name_utf16le;
700 size_t name_utf16le_nbytes;
701 struct wim_dentry *child;
702
703 ret = tstr_get_utf16le_and_len(name, &name_utf16le,
704 &name_utf16le_nbytes);
705 if (ret)
706 return NULL;
707
708 child = get_dentry_child_with_utf16le_name(dir,
709 name_utf16le,
710 name_utf16le_nbytes,
711 case_type);
712 tstr_put_utf16le(name_utf16le);
713 return child;
714 }
715
716 /* This is the UTF-16LE version of get_dentry(), currently private to this file
717 * because no one needs it besides get_dentry(). */
718 static struct wim_dentry *
get_dentry_utf16le(WIMStruct * wim,const utf16lechar * path,CASE_SENSITIVITY_TYPE case_type)719 get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path,
720 CASE_SENSITIVITY_TYPE case_type)
721 {
722 struct wim_dentry *cur_dentry;
723 const utf16lechar *name_start, *name_end;
724
725 /* Start with the root directory of the image. Note: this will be NULL
726 * if an image has been added directly with wimlib_add_empty_image() but
727 * no files have been added yet; in that case we fail with ENOENT. */
728 cur_dentry = wim_get_current_root_dentry(wim);
729
730 name_start = path;
731 for (;;) {
732 if (cur_dentry == NULL) {
733 errno = ENOENT;
734 return NULL;
735 }
736
737 if (*name_start && !dentry_is_directory(cur_dentry)) {
738 errno = ENOTDIR;
739 return NULL;
740 }
741
742 while (*name_start == cpu_to_le16(WIM_PATH_SEPARATOR))
743 name_start++;
744
745 if (!*name_start)
746 return cur_dentry;
747
748 name_end = name_start;
749 do {
750 ++name_end;
751 } while (*name_end != cpu_to_le16(WIM_PATH_SEPARATOR) && *name_end);
752
753 cur_dentry = get_dentry_child_with_utf16le_name(cur_dentry,
754 name_start,
755 (u8*)name_end - (u8*)name_start,
756 case_type);
757 name_start = name_end;
758 }
759 }
760
761 /*
762 * WIM path lookup: translate a path in the currently selected WIM image to the
763 * corresponding dentry, if it exists.
764 *
765 * @wim
766 * The WIMStruct for the WIM. The search takes place in the currently
767 * selected image.
768 *
769 * @path
770 * The path to look up, given relative to the root of the WIM image.
771 * Characters with value WIM_PATH_SEPARATOR are taken to be path
772 * separators. Leading path separators are ignored, whereas one or more
773 * trailing path separators cause the path to only match a directory.
774 *
775 * @case_type
776 * The case-sensitivity behavior of this function, as one of the following
777 * constants:
778 *
779 * - WIMLIB_CASE_SENSITIVE: Perform the search case sensitively. This means
780 * that names must match exactly.
781 *
782 * - WIMLIB_CASE_INSENSITIVE: Perform the search case insensitively. This
783 * means that names are considered to match if they are equal when
784 * transformed to upper case. If a path component matches multiple names
785 * case-insensitively, the name that matches the path component
786 * case-sensitively is chosen, if existent; otherwise one
787 * case-insensitively matching name is chosen arbitrarily.
788 *
789 * - WIMLIB_CASE_PLATFORM_DEFAULT: Perform either case-sensitive or
790 * case-insensitive search, depending on the value of the global variable
791 * default_ignore_case.
792 *
793 * In any case, no Unicode normalization is done before comparing strings.
794 *
795 * Returns a pointer to the dentry that is the result of the lookup, or NULL if
796 * no such dentry exists. If NULL is returned, errno is set to one of the
797 * following values:
798 *
799 * ENOTDIR if one of the path components used as a directory existed but
800 * was not, in fact, a directory.
801 *
802 * ENOENT otherwise.
803 *
804 * Additional notes:
805 *
806 * - This function does not consider a reparse point to be a directory, even
807 * if it has FILE_ATTRIBUTE_DIRECTORY set.
808 *
809 * - This function does not dereference symbolic links or junction points
810 * when performing the search.
811 *
812 * - Since this function ignores leading slashes, the empty path is valid and
813 * names the root directory of the WIM image.
814 *
815 * - An image added with wimlib_add_empty_image() does not have a root
816 * directory yet, and this function will fail with ENOENT for any path on
817 * such an image.
818 */
819 struct wim_dentry *
get_dentry(WIMStruct * wim,const tchar * path,CASE_SENSITIVITY_TYPE case_type)820 get_dentry(WIMStruct *wim, const tchar *path, CASE_SENSITIVITY_TYPE case_type)
821 {
822 int ret;
823 const utf16lechar *path_utf16le;
824 struct wim_dentry *dentry;
825
826 ret = tstr_get_utf16le(path, &path_utf16le);
827 if (ret)
828 return NULL;
829 dentry = get_dentry_utf16le(wim, path_utf16le, case_type);
830 tstr_put_utf16le(path_utf16le);
831 return dentry;
832 }
833
834 /* Modify @path, which is a null-terminated string @len 'tchars' in length,
835 * in-place to produce the path to its parent directory. */
836 static void
to_parent_name(tchar * path,size_t len)837 to_parent_name(tchar *path, size_t len)
838 {
839 ssize_t i = (ssize_t)len - 1;
840 while (i >= 0 && path[i] == WIM_PATH_SEPARATOR)
841 i--;
842 while (i >= 0 && path[i] != WIM_PATH_SEPARATOR)
843 i--;
844 while (i >= 0 && path[i] == WIM_PATH_SEPARATOR)
845 i--;
846 path[i + 1] = T('\0');
847 }
848
849 /* Similar to get_dentry(), but returns the dentry named by @path with the last
850 * component stripped off.
851 *
852 * Note: The returned dentry is NOT guaranteed to be a directory. */
853 struct wim_dentry *
get_parent_dentry(WIMStruct * wim,const tchar * path,CASE_SENSITIVITY_TYPE case_type)854 get_parent_dentry(WIMStruct *wim, const tchar *path,
855 CASE_SENSITIVITY_TYPE case_type)
856 {
857 size_t path_len = tstrlen(path);
858 tchar buf[path_len + 1];
859
860 tmemcpy(buf, path, path_len + 1);
861 to_parent_name(buf, path_len);
862 return get_dentry(wim, buf, case_type);
863 }
864
865 /*
866 * Create an unlinked dentry.
867 *
868 * @name specifies the long name to give the new dentry. If NULL or empty, the
869 * new dentry will be given no long name.
870 *
871 * The new dentry will have no short name and no associated inode.
872 *
873 * On success, returns 0 and a pointer to the new, allocated dentry is stored in
874 * *dentry_ret. On failure, returns WIMLIB_ERR_NOMEM or an error code resulting
875 * from a failed string conversion.
876 */
877 static int
new_dentry(const tchar * name,struct wim_dentry ** dentry_ret)878 new_dentry(const tchar *name, struct wim_dentry **dentry_ret)
879 {
880 struct wim_dentry *dentry;
881 int ret;
882
883 dentry = CALLOC(1, sizeof(struct wim_dentry));
884 if (!dentry)
885 return WIMLIB_ERR_NOMEM;
886
887 if (name && *name) {
888 ret = dentry_set_name(dentry, name);
889 if (ret) {
890 FREE(dentry);
891 return ret;
892 }
893 }
894 dentry->d_parent = dentry;
895 *dentry_ret = dentry;
896 return 0;
897 }
898
899 /* Like new_dentry(), but also allocate an inode and associate it with the
900 * dentry. If set_timestamps=true, the timestamps for the inode will be set to
901 * the current time; otherwise, they will be left 0. */
902 int
new_dentry_with_new_inode(const tchar * name,bool set_timestamps,struct wim_dentry ** dentry_ret)903 new_dentry_with_new_inode(const tchar *name, bool set_timestamps,
904 struct wim_dentry **dentry_ret)
905 {
906 struct wim_dentry *dentry;
907 struct wim_inode *inode;
908 int ret;
909
910 ret = new_dentry(name, &dentry);
911 if (ret)
912 return ret;
913
914 inode = new_inode(dentry, set_timestamps);
915 if (!inode) {
916 free_dentry(dentry);
917 return WIMLIB_ERR_NOMEM;
918 }
919
920 *dentry_ret = dentry;
921 return 0;
922 }
923
924 /* Like new_dentry(), but also associate the new dentry with the specified inode
925 * and acquire a reference to each of the inode's blobs. */
926 int
new_dentry_with_existing_inode(const tchar * name,struct wim_inode * inode,struct wim_dentry ** dentry_ret)927 new_dentry_with_existing_inode(const tchar *name, struct wim_inode *inode,
928 struct wim_dentry **dentry_ret)
929 {
930 int ret = new_dentry(name, dentry_ret);
931 if (ret)
932 return ret;
933 d_associate(*dentry_ret, inode);
934 inode_ref_blobs(inode);
935 return 0;
936 }
937
938 /* Create an unnamed dentry with a new inode for a directory with the default
939 * metadata. */
940 int
new_filler_directory(struct wim_dentry ** dentry_ret)941 new_filler_directory(struct wim_dentry **dentry_ret)
942 {
943 int ret;
944 struct wim_dentry *dentry;
945
946 ret = new_dentry_with_new_inode(NULL, true, &dentry);
947 if (ret)
948 return ret;
949 /* Leave the inode number as 0; this is allowed for non
950 * hard-linked files. */
951 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
952 *dentry_ret = dentry;
953 return 0;
954 }
955
956 /*
957 * Free a WIM dentry.
958 *
959 * In addition to freeing the dentry itself, this disassociates the dentry from
960 * its inode. If the inode is no longer in use, it will be freed as well.
961 */
962 void
free_dentry(struct wim_dentry * dentry)963 free_dentry(struct wim_dentry *dentry)
964 {
965 if (dentry) {
966 d_disassociate(dentry);
967 FREE(dentry->d_name);
968 FREE(dentry->d_short_name);
969 FREE(dentry->d_full_path);
970 FREE(dentry);
971 }
972 }
973
974 static int
do_free_dentry(struct wim_dentry * dentry,void * _ignore)975 do_free_dentry(struct wim_dentry *dentry, void *_ignore)
976 {
977 free_dentry(dentry);
978 return 0;
979 }
980
981 static int
do_free_dentry_and_unref_blobs(struct wim_dentry * dentry,void * blob_table)982 do_free_dentry_and_unref_blobs(struct wim_dentry *dentry, void *blob_table)
983 {
984 inode_unref_blobs(dentry->d_inode, blob_table);
985 free_dentry(dentry);
986 return 0;
987 }
988
989 /*
990 * Free all dentries in a tree.
991 *
992 * @root:
993 * The root of the dentry tree to free. If NULL, this function has no
994 * effect.
995 *
996 * @blob_table:
997 * A pointer to the blob table for the WIM, or NULL if not specified. If
998 * specified, this function will decrement the reference counts of the
999 * blobs referenced by the dentries.
1000 *
1001 * This function also releases references to the corresponding inodes.
1002 *
1003 * This function does *not* unlink @root from its parent directory, if it has
1004 * one. If @root has a parent, the caller must unlink @root before calling this
1005 * function.
1006 */
1007 void
free_dentry_tree(struct wim_dentry * root,struct blob_table * blob_table)1008 free_dentry_tree(struct wim_dentry *root, struct blob_table *blob_table)
1009 {
1010 int (*f)(struct wim_dentry *, void *);
1011
1012 if (blob_table)
1013 f = do_free_dentry_and_unref_blobs;
1014 else
1015 f = do_free_dentry;
1016
1017 for_dentry_in_tree_depth(root, f, blob_table);
1018 }
1019
1020 /*
1021 * Return the first dentry in the list of dentries which have the same
1022 * case-insensitive name as the one given.
1023 */
1024 struct wim_dentry *
dentry_get_first_ci_match(struct wim_dentry * dentry)1025 dentry_get_first_ci_match(struct wim_dentry *dentry)
1026 {
1027 struct wim_dentry *ci_match = dentry;
1028
1029 for (;;) {
1030 struct avl_tree_node *node;
1031 struct wim_dentry *prev;
1032
1033 node = avl_tree_prev_in_order(&ci_match->d_index_node);
1034 if (!node)
1035 break;
1036 prev = avl_tree_entry(node, struct wim_dentry, d_index_node);
1037 if (dentry_compare_names(prev, dentry, true))
1038 break;
1039 ci_match = prev;
1040 }
1041
1042 if (ci_match == dentry)
1043 return dentry_get_next_ci_match(dentry, dentry);
1044
1045 return ci_match;
1046 }
1047
1048 /*
1049 * Return the next dentry in the list of dentries which have the same
1050 * case-insensitive name as the one given.
1051 */
1052 struct wim_dentry *
dentry_get_next_ci_match(struct wim_dentry * dentry,struct wim_dentry * ci_match)1053 dentry_get_next_ci_match(struct wim_dentry *dentry, struct wim_dentry *ci_match)
1054 {
1055 do {
1056 struct avl_tree_node *node;
1057
1058 node = avl_tree_next_in_order(&ci_match->d_index_node);
1059 if (!node)
1060 return NULL;
1061 ci_match = avl_tree_entry(node, struct wim_dentry, d_index_node);
1062 } while (ci_match == dentry);
1063
1064 if (dentry_compare_names(ci_match, dentry, true))
1065 return NULL;
1066
1067 return ci_match;
1068 }
1069
1070 /*
1071 * Link a dentry into a directory.
1072 *
1073 * @parent:
1074 * The directory into which to link the dentry.
1075 *
1076 * @child:
1077 * The dentry to link into the directory. It must be currently unlinked.
1078 *
1079 * Returns NULL if successful; or, if @parent already contains a dentry with the
1080 * same case-sensitive name as @child, then a pointer to this duplicate dentry
1081 * is returned.
1082 */
1083 struct wim_dentry *
dentry_add_child(struct wim_dentry * parent,struct wim_dentry * child)1084 dentry_add_child(struct wim_dentry *parent, struct wim_dentry *child)
1085 {
1086 struct wim_inode *dir = parent->d_inode;
1087 struct avl_tree_node *duplicate;
1088
1089 wimlib_assert(parent != child);
1090 wimlib_assert(inode_is_directory(dir));
1091
1092 duplicate = avl_tree_insert(&dir->i_children, &child->d_index_node,
1093 collate_dentry_names);
1094 if (duplicate)
1095 return avl_tree_entry(duplicate, struct wim_dentry, d_index_node);
1096
1097 child->d_parent = parent;
1098 return NULL;
1099 }
1100
1101 /* Unlink a dentry from its parent directory. */
1102 void
unlink_dentry(struct wim_dentry * dentry)1103 unlink_dentry(struct wim_dentry *dentry)
1104 {
1105 /* Do nothing if the dentry is root or it's already unlinked. Not
1106 * actually necessary based on the current callers, but we do the check
1107 * here to be safe. */
1108 if (unlikely(dentry->d_parent == dentry))
1109 return;
1110
1111 avl_tree_remove(&dentry->d_parent->d_inode->i_children,
1112 &dentry->d_index_node);
1113
1114 /* Not actually necessary, but to be safe don't retain the now-obsolete
1115 * parent pointer. */
1116 dentry->d_parent = dentry;
1117 }
1118
1119 static int
read_extra_data(const u8 * p,const u8 * end,struct wim_inode * inode)1120 read_extra_data(const u8 *p, const u8 *end, struct wim_inode *inode)
1121 {
1122 while (((uintptr_t)p & 7) && p < end)
1123 p++;
1124
1125 if (unlikely(p < end)) {
1126 inode->i_extra = MALLOC(sizeof(struct wim_inode_extra) +
1127 end - p);
1128 if (!inode->i_extra)
1129 return WIMLIB_ERR_NOMEM;
1130 inode->i_extra->size = end - p;
1131 memcpy(inode->i_extra->data, p, end - p);
1132 }
1133 return 0;
1134 }
1135
1136 /*
1137 * Set the type of each stream for an encrypted file.
1138 *
1139 * All data streams of the encrypted file should have been packed into a single
1140 * stream in the format provided by ReadEncryptedFileRaw() on Windows. We
1141 * assign this stream type STREAM_TYPE_EFSRPC_RAW_DATA.
1142 *
1143 * Encrypted files can't have a reparse point stream. In the on-disk NTFS
1144 * format they can, but as far as I know the reparse point stream of an
1145 * encrypted file can't be stored in the WIM format in a way that's compatible
1146 * with WIMGAPI, nor is there even any way for it to be read or written on
1147 * Windows when the process does not have access to the file encryption key.
1148 */
1149 static void
assign_stream_types_encrypted(struct wim_inode * inode)1150 assign_stream_types_encrypted(struct wim_inode *inode)
1151 {
1152 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1153 struct wim_inode_stream *strm = &inode->i_streams[i];
1154 if (!stream_is_named(strm) && !is_zero_hash(strm->_stream_hash))
1155 {
1156 strm->stream_type = STREAM_TYPE_EFSRPC_RAW_DATA;
1157 return;
1158 }
1159 }
1160 }
1161
1162 /*
1163 * Set the type of each stream for an unencrypted file.
1164 *
1165 * There will be an unnamed data stream, a reparse point stream, or both an
1166 * unnamed data stream and a reparse point stream. In addition, there may be
1167 * named data streams.
1168 *
1169 * NOTE: if the file has a reparse point stream or at least one named data
1170 * stream, then WIMGAPI puts *all* streams in the extra stream entries and
1171 * leaves the default stream hash zeroed. wimlib now does the same. However,
1172 * for input we still support the default hash field being used, since wimlib
1173 * used to use it and MS software is somewhat accepting of it as well.
1174 */
1175 static void
assign_stream_types_unencrypted(struct wim_inode * inode)1176 assign_stream_types_unencrypted(struct wim_inode *inode)
1177 {
1178 bool found_reparse_point_stream = false;
1179 bool found_unnamed_data_stream = false;
1180 struct wim_inode_stream *unnamed_stream_with_zero_hash = NULL;
1181
1182 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1183 struct wim_inode_stream *strm = &inode->i_streams[i];
1184
1185 if (stream_is_named(strm)) {
1186 /* Named data stream */
1187 strm->stream_type = STREAM_TYPE_DATA;
1188 } else if (i != 0 || !is_zero_hash(strm->_stream_hash)) {
1189 /* Unnamed stream in the extra stream entries, OR the
1190 * default stream in the dentry provided that it has a
1191 * nonzero hash. */
1192 if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
1193 !found_reparse_point_stream) {
1194 found_reparse_point_stream = true;
1195 strm->stream_type = STREAM_TYPE_REPARSE_POINT;
1196 } else if (!found_unnamed_data_stream) {
1197 found_unnamed_data_stream = true;
1198 strm->stream_type = STREAM_TYPE_DATA;
1199 }
1200 } else if (!unnamed_stream_with_zero_hash) {
1201 unnamed_stream_with_zero_hash = strm;
1202 }
1203 }
1204
1205 if (unnamed_stream_with_zero_hash) {
1206 int type = STREAM_TYPE_UNKNOWN;
1207 if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
1208 !found_reparse_point_stream) {
1209 type = STREAM_TYPE_REPARSE_POINT;
1210 } else if (!found_unnamed_data_stream) {
1211 type = STREAM_TYPE_DATA;
1212 }
1213 unnamed_stream_with_zero_hash->stream_type = type;
1214 }
1215 }
1216
1217 /*
1218 * Read and interpret the collection of streams for the specified inode.
1219 */
1220 static int
setup_inode_streams(const u8 * p,const u8 * end,struct wim_inode * inode,unsigned num_extra_streams,const u8 * default_hash,u64 * offset_p)1221 setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode,
1222 unsigned num_extra_streams, const u8 *default_hash,
1223 u64 *offset_p)
1224 {
1225 const u8 *orig_p = p;
1226
1227 inode->i_num_streams = 1 + num_extra_streams;
1228
1229 if (unlikely(inode->i_num_streams > ARRAY_LEN(inode->i_embedded_streams))) {
1230 inode->i_streams = CALLOC(inode->i_num_streams,
1231 sizeof(inode->i_streams[0]));
1232 if (!inode->i_streams)
1233 return WIMLIB_ERR_NOMEM;
1234 }
1235
1236 /* Use the default hash field for the first stream */
1237 inode->i_streams[0].stream_name = (utf16lechar *)NO_STREAM_NAME;
1238 copy_hash(inode->i_streams[0]._stream_hash, default_hash);
1239 inode->i_streams[0].stream_type = STREAM_TYPE_UNKNOWN;
1240 inode->i_streams[0].stream_id = 0;
1241
1242 /* Read the extra stream entries */
1243 for (unsigned i = 1; i < inode->i_num_streams; i++) {
1244 struct wim_inode_stream *strm;
1245 const struct wim_extra_stream_entry_on_disk *disk_strm;
1246 u64 length;
1247 u16 name_nbytes;
1248
1249 strm = &inode->i_streams[i];
1250
1251 strm->stream_id = i;
1252
1253 /* Do we have at least the size of the fixed-length data we know
1254 * need? */
1255 if ((end - p) < sizeof(struct wim_extra_stream_entry_on_disk))
1256 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1257
1258 disk_strm = (const struct wim_extra_stream_entry_on_disk *)p;
1259
1260 /* Read the length field */
1261 length = ALIGN(le64_to_cpu(disk_strm->length), 8);
1262
1263 /* Make sure the length field is neither so small it doesn't
1264 * include all the fixed-length data nor so large it overflows
1265 * the metadata resource buffer. */
1266 if (length < sizeof(struct wim_extra_stream_entry_on_disk) ||
1267 length > (end - p))
1268 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1269
1270 /* Read the rest of the fixed-length data. */
1271
1272 copy_hash(strm->_stream_hash, disk_strm->hash);
1273 name_nbytes = le16_to_cpu(disk_strm->name_nbytes);
1274
1275 /* If stream_name_nbytes != 0, the stream is named. */
1276 if (name_nbytes != 0) {
1277 /* The name is encoded in UTF16-LE, which uses 2-byte
1278 * coding units, so the length of the name had better be
1279 * an even number of bytes. */
1280 if (name_nbytes & 1)
1281 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1282
1283 /* Add the length of the stream name to get the length
1284 * we actually need to read. Make sure this isn't more
1285 * than the specified length of the entry. */
1286 if (sizeof(struct wim_extra_stream_entry_on_disk) +
1287 name_nbytes > length)
1288 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1289
1290 strm->stream_name = utf16le_dupz(disk_strm->name,
1291 name_nbytes);
1292 if (!strm->stream_name)
1293 return WIMLIB_ERR_NOMEM;
1294 } else {
1295 strm->stream_name = (utf16lechar *)NO_STREAM_NAME;
1296 }
1297
1298 strm->stream_type = STREAM_TYPE_UNKNOWN;
1299
1300 p += length;
1301 }
1302
1303 inode->i_next_stream_id = inode->i_num_streams;
1304
1305 /* Now, assign a type to each stream. Unfortunately this requires
1306 * various hacks because stream types aren't explicitly provided in the
1307 * WIM on-disk format. */
1308
1309 if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED))
1310 assign_stream_types_encrypted(inode);
1311 else
1312 assign_stream_types_unencrypted(inode);
1313
1314 *offset_p += p - orig_p;
1315 return 0;
1316 }
1317
1318 /* Read a dentry, including all extra stream entries that follow it, from an
1319 * uncompressed metadata resource buffer. */
1320 static int
read_dentry(const u8 * restrict buf,size_t buf_len,u64 * offset_p,struct wim_dentry ** dentry_ret)1321 read_dentry(const u8 * restrict buf, size_t buf_len,
1322 u64 *offset_p, struct wim_dentry **dentry_ret)
1323 {
1324 u64 offset = *offset_p;
1325 u64 length;
1326 const u8 *p;
1327 const struct wim_dentry_on_disk *disk_dentry;
1328 struct wim_dentry *dentry;
1329 struct wim_inode *inode;
1330 u16 short_name_nbytes;
1331 u16 name_nbytes;
1332 u64 calculated_size;
1333 int ret;
1334
1335 STATIC_ASSERT(sizeof(struct wim_dentry_on_disk) == WIM_DENTRY_DISK_SIZE);
1336
1337 /* Before reading the whole dentry, we need to read just the length.
1338 * This is because a dentry of length 8 (that is, just the length field)
1339 * terminates the list of sibling directory entries. */
1340
1341 /* Check for buffer overrun. */
1342 if (unlikely(offset + sizeof(u64) > buf_len ||
1343 offset + sizeof(u64) < offset))
1344 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1345
1346 /* Get pointer to the dentry data. */
1347 p = &buf[offset];
1348 disk_dentry = (const struct wim_dentry_on_disk*)p;
1349
1350 /* Get dentry length. */
1351 length = ALIGN(le64_to_cpu(disk_dentry->length), 8);
1352
1353 /* Check for end-of-directory. */
1354 if (length <= 8) {
1355 *dentry_ret = NULL;
1356 return 0;
1357 }
1358
1359 /* Validate dentry length. */
1360 if (unlikely(length < sizeof(struct wim_dentry_on_disk)))
1361 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1362
1363 /* Check for buffer overrun. */
1364 if (unlikely(offset + length > buf_len ||
1365 offset + length < offset))
1366 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1367
1368 /* Allocate new dentry structure, along with a preliminary inode. */
1369 ret = new_dentry_with_new_inode(NULL, false, &dentry);
1370 if (ret)
1371 return ret;
1372
1373 inode = dentry->d_inode;
1374
1375 /* Read more fields: some into the dentry, and some into the inode. */
1376 inode->i_attributes = le32_to_cpu(disk_dentry->attributes);
1377 inode->i_security_id = le32_to_cpu(disk_dentry->security_id);
1378 dentry->d_subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
1379 inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time);
1380 inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time);
1381 inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time);
1382 inode->i_unknown_0x54 = le32_to_cpu(disk_dentry->unknown_0x54);
1383
1384 if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
1385 inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag);
1386 inode->i_rp_reserved = le16_to_cpu(disk_dentry->reparse.rp_reserved);
1387 inode->i_rp_flags = le16_to_cpu(disk_dentry->reparse.rp_flags);
1388 /* Leave inode->i_ino at 0. Note: this means that WIM cannot
1389 * represent multiple hard links to a reparse point file. */
1390 } else {
1391 inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id);
1392 }
1393
1394 /* Now onto reading the names. There are two of them: the (long) file
1395 * name, and the short name. */
1396
1397 short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes);
1398 name_nbytes = le16_to_cpu(disk_dentry->name_nbytes);
1399
1400 if (unlikely((short_name_nbytes & 1) | (name_nbytes & 1))) {
1401 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1402 goto err_free_dentry;
1403 }
1404
1405 /* We now know the length of the file name and short name. Make sure
1406 * the length of the dentry is large enough to actually hold them. */
1407 calculated_size = dentry_min_len_with_names(name_nbytes,
1408 short_name_nbytes);
1409
1410 if (unlikely(length < calculated_size)) {
1411 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1412 goto err_free_dentry;
1413 }
1414
1415 /* Advance p to point past the base dentry, to the first name. */
1416 p += sizeof(struct wim_dentry_on_disk);
1417
1418 /* Read the filename if present. Note: if the filename is empty, there
1419 * is no null terminator following it. */
1420 if (name_nbytes) {
1421 dentry->d_name = utf16le_dupz(p, name_nbytes);
1422 if (unlikely(!dentry->d_name)) {
1423 ret = WIMLIB_ERR_NOMEM;
1424 goto err_free_dentry;
1425 }
1426 dentry->d_name_nbytes = name_nbytes;
1427 p += (u32)name_nbytes + 2;
1428 }
1429
1430 /* Read the short filename if present. Note: if there is no short
1431 * filename, there is no null terminator following it. */
1432 if (short_name_nbytes) {
1433 dentry->d_short_name = utf16le_dupz(p, short_name_nbytes);
1434 if (unlikely(!dentry->d_short_name)) {
1435 ret = WIMLIB_ERR_NOMEM;
1436 goto err_free_dentry;
1437 }
1438 dentry->d_short_name_nbytes = short_name_nbytes;
1439 p += (u32)short_name_nbytes + 2;
1440 }
1441
1442 /* Read extra data at end of dentry (but before extra stream entries).
1443 * This may contain tagged metadata items. */
1444 ret = read_extra_data(p, &buf[offset + length], inode);
1445 if (ret)
1446 goto err_free_dentry;
1447
1448 offset += length;
1449
1450 /* Set up the inode's collection of streams. */
1451 ret = setup_inode_streams(&buf[offset],
1452 &buf[buf_len],
1453 inode,
1454 le16_to_cpu(disk_dentry->num_extra_streams),
1455 disk_dentry->default_hash,
1456 &offset);
1457 if (ret)
1458 goto err_free_dentry;
1459
1460 *offset_p = offset; /* Sets offset of next dentry in directory */
1461 *dentry_ret = dentry;
1462 return 0;
1463
1464 err_free_dentry:
1465 free_dentry(dentry);
1466 return ret;
1467 }
1468
1469 static bool
dentry_is_dot_or_dotdot(const struct wim_dentry * dentry)1470 dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
1471 {
1472 if (dentry->d_name_nbytes <= 4) {
1473 if (dentry->d_name_nbytes == 4) {
1474 if (dentry->d_name[0] == cpu_to_le16('.') &&
1475 dentry->d_name[1] == cpu_to_le16('.'))
1476 return true;
1477 } else if (dentry->d_name_nbytes == 2) {
1478 if (dentry->d_name[0] == cpu_to_le16('.'))
1479 return true;
1480 }
1481 }
1482 return false;
1483 }
1484
1485 static bool
dentry_contains_embedded_null(const struct wim_dentry * dentry)1486 dentry_contains_embedded_null(const struct wim_dentry *dentry)
1487 {
1488 for (unsigned i = 0; i < dentry->d_name_nbytes / 2; i++)
1489 if (dentry->d_name[i] == cpu_to_le16('\0'))
1490 return true;
1491 return false;
1492 }
1493
1494 static bool
should_ignore_dentry(struct wim_dentry * dir,const struct wim_dentry * dentry)1495 should_ignore_dentry(struct wim_dentry *dir, const struct wim_dentry *dentry)
1496 {
1497 /* All dentries except the root must be named. */
1498 if (!dentry_has_long_name(dentry)) {
1499 WARNING("Ignoring unnamed file in directory \"%"TS"\"",
1500 dentry_full_path(dir));
1501 return true;
1502 }
1503
1504 /* Don't allow files named "." or "..". Such filenames could be used in
1505 * path traversal attacks. */
1506 if (dentry_is_dot_or_dotdot(dentry)) {
1507 WARNING("Ignoring file named \".\" or \"..\" in directory "
1508 "\"%"TS"\"", dentry_full_path(dir));
1509 return true;
1510 }
1511
1512 /* Don't allow filenames containing embedded null characters. Although
1513 * the null character is already considered an unsupported character for
1514 * extraction by all targets, it is probably a good idea to just forbid
1515 * such names entirely. */
1516 if (dentry_contains_embedded_null(dentry)) {
1517 WARNING("Ignoring filename with embedded null character in "
1518 "directory \"%"TS"\"", dentry_full_path(dir));
1519 return true;
1520 }
1521
1522 return false;
1523 }
1524
1525 static int
read_dentry_tree_recursive(const u8 * restrict buf,size_t buf_len,struct wim_dentry * restrict dir,unsigned depth)1526 read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len,
1527 struct wim_dentry * restrict dir, unsigned depth)
1528 {
1529 u64 cur_offset = dir->d_subdir_offset;
1530
1531 /* Disallow extremely deep or cyclic directory structures */
1532 if (unlikely(depth >= 16384)) {
1533 ERROR("Directory structure too deep!");
1534 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1535 }
1536
1537 for (;;) {
1538 struct wim_dentry *child;
1539 struct wim_dentry *duplicate;
1540 int ret;
1541
1542 /* Read next child of @dir. */
1543 ret = read_dentry(buf, buf_len, &cur_offset, &child);
1544 if (ret)
1545 return ret;
1546
1547 /* Check for end of directory. */
1548 if (child == NULL)
1549 return 0;
1550
1551 /* Ignore dentries with bad names. */
1552 if (unlikely(should_ignore_dentry(dir, child))) {
1553 free_dentry(child);
1554 continue;
1555 }
1556
1557 /* Link the child into the directory. */
1558 duplicate = dentry_add_child(dir, child);
1559 if (unlikely(duplicate)) {
1560 /* We already found a dentry with this same
1561 * case-sensitive long name. Only keep the first one.
1562 */
1563 WARNING("Ignoring duplicate file \"%"TS"\" "
1564 "(the WIM image already contains a file "
1565 "at that path with the exact same name)",
1566 dentry_full_path(duplicate));
1567 free_dentry(child);
1568 continue;
1569 }
1570
1571 /* If this child is a directory that itself has children, call
1572 * this procedure recursively. */
1573 if (child->d_subdir_offset != 0) {
1574 if (likely(dentry_is_directory(child))) {
1575 ret = read_dentry_tree_recursive(buf,
1576 buf_len,
1577 child,
1578 depth + 1);
1579 if (ret)
1580 return ret;
1581 } else {
1582 WARNING("Ignoring children of "
1583 "non-directory file \"%"TS"\"",
1584 dentry_full_path(child));
1585 }
1586 }
1587 }
1588 }
1589
1590 /*
1591 * Read a tree of dentries from a WIM metadata resource.
1592 *
1593 * @buf:
1594 * Buffer containing an uncompressed WIM metadata resource.
1595 *
1596 * @buf_len:
1597 * Length of the uncompressed metadata resource, in bytes.
1598 *
1599 * @root_offset
1600 * Offset in the metadata resource of the root of the dentry tree.
1601 *
1602 * @root_ret:
1603 * On success, either NULL or a pointer to the root dentry is written to
1604 * this location. The former case only occurs in the unexpected case that
1605 * the tree began with an end-of-directory entry.
1606 *
1607 * Return values:
1608 * WIMLIB_ERR_SUCCESS (0)
1609 * WIMLIB_ERR_INVALID_METADATA_RESOURCE
1610 * WIMLIB_ERR_NOMEM
1611 */
1612 int
read_dentry_tree(const u8 * buf,size_t buf_len,u64 root_offset,struct wim_dentry ** root_ret)1613 read_dentry_tree(const u8 *buf, size_t buf_len,
1614 u64 root_offset, struct wim_dentry **root_ret)
1615 {
1616 int ret;
1617 struct wim_dentry *root;
1618
1619 ret = read_dentry(buf, buf_len, &root_offset, &root);
1620 if (ret)
1621 return ret;
1622
1623 if (likely(root != NULL)) {
1624 if (unlikely(dentry_has_long_name(root) ||
1625 dentry_has_short_name(root)))
1626 {
1627 WARNING("The root directory has a nonempty name; "
1628 "removing it.");
1629 dentry_set_name(root, NULL);
1630 }
1631
1632 if (unlikely(!dentry_is_directory(root))) {
1633 ERROR("The root of the WIM image is not a directory!");
1634 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1635 goto err_free_dentry_tree;
1636 }
1637
1638 if (likely(root->d_subdir_offset != 0)) {
1639 ret = read_dentry_tree_recursive(buf, buf_len, root, 0);
1640 if (ret)
1641 goto err_free_dentry_tree;
1642 }
1643 } else {
1644 WARNING("The metadata resource has no directory entries; "
1645 "treating as an empty image.");
1646 }
1647 *root_ret = root;
1648 return 0;
1649
1650 err_free_dentry_tree:
1651 free_dentry_tree(root, NULL);
1652 return ret;
1653 }
1654
1655 static u8 *
write_extra_stream_entry(u8 * restrict p,const utf16lechar * restrict name,const u8 * restrict hash)1656 write_extra_stream_entry(u8 * restrict p, const utf16lechar * restrict name,
1657 const u8 * restrict hash)
1658 {
1659 struct wim_extra_stream_entry_on_disk *disk_strm =
1660 (struct wim_extra_stream_entry_on_disk *)p;
1661 u8 *orig_p = p;
1662 size_t name_nbytes;
1663
1664 if (name == NO_STREAM_NAME)
1665 name_nbytes = 0;
1666 else
1667 name_nbytes = utf16le_len_bytes(name);
1668
1669 disk_strm->reserved = 0;
1670 copy_hash(disk_strm->hash, hash);
1671 disk_strm->name_nbytes = cpu_to_le16(name_nbytes);
1672 p += sizeof(struct wim_extra_stream_entry_on_disk);
1673 if (name_nbytes != 0)
1674 p = mempcpy(p, name, name_nbytes + 2);
1675 /* Align to 8-byte boundary */
1676 while ((uintptr_t)p & 7)
1677 *p++ = 0;
1678 disk_strm->length = cpu_to_le64(p - orig_p);
1679 return p;
1680 }
1681
1682 /*
1683 * Write a WIM dentry to an output buffer.
1684 *
1685 * This includes any extra stream entries that may follow the dentry itself.
1686 *
1687 * @dentry:
1688 * The dentry to write.
1689 *
1690 * @p:
1691 * The memory location to which to write the data.
1692 *
1693 * Returns a pointer to the byte following the last written.
1694 */
1695 static u8 *
write_dentry(const struct wim_dentry * restrict dentry,u8 * restrict p)1696 write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
1697 {
1698 const struct wim_inode *inode;
1699 struct wim_dentry_on_disk *disk_dentry;
1700 const u8 *orig_p;
1701
1702 wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */
1703 orig_p = p;
1704
1705 inode = dentry->d_inode;
1706 disk_dentry = (struct wim_dentry_on_disk*)p;
1707
1708 disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
1709 disk_dentry->security_id = cpu_to_le32(inode->i_security_id);
1710 disk_dentry->subdir_offset = cpu_to_le64(dentry->d_subdir_offset);
1711
1712 disk_dentry->unused_1 = cpu_to_le64(0);
1713 disk_dentry->unused_2 = cpu_to_le64(0);
1714
1715 disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
1716 disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
1717 disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
1718 disk_dentry->unknown_0x54 = cpu_to_le32(inode->i_unknown_0x54);
1719 if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
1720 disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag);
1721 disk_dentry->reparse.rp_reserved = cpu_to_le16(inode->i_rp_reserved);
1722 disk_dentry->reparse.rp_flags = cpu_to_le16(inode->i_rp_flags);
1723 } else {
1724 disk_dentry->nonreparse.hard_link_group_id =
1725 cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
1726 }
1727
1728 disk_dentry->short_name_nbytes = cpu_to_le16(dentry->d_short_name_nbytes);
1729 disk_dentry->name_nbytes = cpu_to_le16(dentry->d_name_nbytes);
1730 p += sizeof(struct wim_dentry_on_disk);
1731
1732 wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry));
1733
1734 if (dentry_has_long_name(dentry))
1735 p = mempcpy(p, dentry->d_name, (u32)dentry->d_name_nbytes + 2);
1736
1737 if (dentry_has_short_name(dentry))
1738 p = mempcpy(p, dentry->d_short_name, (u32)dentry->d_short_name_nbytes + 2);
1739
1740 /* Align to 8-byte boundary */
1741 while ((uintptr_t)p & 7)
1742 *p++ = 0;
1743
1744 if (inode->i_extra) {
1745 /* Extra tagged items --- not usually present. */
1746 p = mempcpy(p, inode->i_extra->data, inode->i_extra->size);
1747
1748 /* Align to 8-byte boundary */
1749 while ((uintptr_t)p & 7)
1750 *p++ = 0;
1751 }
1752
1753 disk_dentry->length = cpu_to_le64(p - orig_p);
1754
1755 /* Streams */
1756
1757 if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
1758 const struct wim_inode_stream *efs_strm;
1759 const u8 *efs_hash;
1760
1761 efs_strm = inode_get_unnamed_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA);
1762 efs_hash = efs_strm ? stream_hash(efs_strm) : zero_hash;
1763 copy_hash(disk_dentry->default_hash, efs_hash);
1764 disk_dentry->num_extra_streams = cpu_to_le16(0);
1765 } else {
1766 /*
1767 * Extra stream entries:
1768 *
1769 * - Use one extra stream entry for each named data stream
1770 * - Use one extra stream entry for the unnamed data stream when there is either:
1771 * - a reparse point stream
1772 * - at least one named data stream (for Windows PE bug workaround)
1773 * - Use one extra stream entry for the reparse point stream if there is one
1774 */
1775 bool have_named_data_stream = false;
1776 bool have_reparse_point_stream = false;
1777 const u8 *unnamed_data_stream_hash = zero_hash;
1778 const u8 *reparse_point_hash;
1779 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1780 const struct wim_inode_stream *strm = &inode->i_streams[i];
1781 if (strm->stream_type == STREAM_TYPE_DATA) {
1782 if (stream_is_named(strm))
1783 have_named_data_stream = true;
1784 else
1785 unnamed_data_stream_hash = stream_hash(strm);
1786 } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
1787 have_reparse_point_stream = true;
1788 reparse_point_hash = stream_hash(strm);
1789 }
1790 }
1791
1792 if (unlikely(have_reparse_point_stream || have_named_data_stream)) {
1793
1794 unsigned num_extra_streams = 0;
1795
1796 copy_hash(disk_dentry->default_hash, zero_hash);
1797
1798 if (have_reparse_point_stream) {
1799 p = write_extra_stream_entry(p, NO_STREAM_NAME,
1800 reparse_point_hash);
1801 num_extra_streams++;
1802 }
1803
1804 p = write_extra_stream_entry(p, NO_STREAM_NAME,
1805 unnamed_data_stream_hash);
1806 num_extra_streams++;
1807
1808 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1809 const struct wim_inode_stream *strm = &inode->i_streams[i];
1810 if (stream_is_named_data_stream(strm)) {
1811 p = write_extra_stream_entry(p, strm->stream_name,
1812 stream_hash(strm));
1813 num_extra_streams++;
1814 }
1815 }
1816 wimlib_assert(num_extra_streams <= 0xFFFF);
1817
1818 disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams);
1819 } else {
1820 copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash);
1821 disk_dentry->num_extra_streams = cpu_to_le16(0);
1822 }
1823 }
1824
1825 return p;
1826 }
1827
1828 static int
write_dir_dentries(struct wim_dentry * dir,void * _pp)1829 write_dir_dentries(struct wim_dentry *dir, void *_pp)
1830 {
1831 if (dir->d_subdir_offset != 0) {
1832 u8 **pp = _pp;
1833 u8 *p = *pp;
1834 struct wim_dentry *child;
1835
1836 /* write child dentries */
1837 for_dentry_child(child, dir)
1838 p = write_dentry(child, p);
1839
1840 /* write end of directory entry */
1841 *(u64*)p = 0;
1842 p += 8;
1843 *pp = p;
1844 }
1845 return 0;
1846 }
1847
1848 /*
1849 * Write a directory tree to the metadata resource.
1850 *
1851 * @root:
1852 * The root of a dentry tree on which calculate_subdir_offsets() has been
1853 * called. This cannot be NULL; if the dentry tree is empty, the caller is
1854 * expected to first generate a dummy root directory.
1855 *
1856 * @p:
1857 * Pointer to a buffer with enough space for the dentry tree. This size
1858 * must have been obtained by calculate_subdir_offsets().
1859 *
1860 * Returns a pointer to the byte following the last written.
1861 */
1862 u8 *
write_dentry_tree(struct wim_dentry * root,u8 * p)1863 write_dentry_tree(struct wim_dentry *root, u8 *p)
1864 {
1865 /* write root dentry and end-of-directory entry following it */
1866 p = write_dentry(root, p);
1867 *(u64*)p = 0;
1868 p += 8;
1869
1870 /* write the rest of the dentry tree */
1871 for_dentry_in_tree(root, write_dir_dentries, &p);
1872
1873 return p;
1874 }
1875