1 /*
2  * reparse.c - Reparse point handling
3  */
4 
5 /*
6  * Copyright (C) 2012, 2013, 2015 Eric Biggers
7  *
8  * This file is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU Lesser General Public License as published by the Free
10  * Software Foundation; either version 3 of the License, or (at your option) any
11  * later version.
12  *
13  * This file is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this file; if not, see http://www.gnu.org/licenses/.
20  */
21 
22 #ifdef HAVE_CONFIG_H
23 #  include "config.h"
24 #endif
25 
26 #include <errno.h>
27 
28 #include "wimlib/alloca.h"
29 #include "wimlib/blob_table.h"
30 #include "wimlib/endianness.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/guid.h"
34 #include "wimlib/inode.h"
35 #include "wimlib/reparse.h"
36 #include "wimlib/resource.h"
37 
38 /*
39  * Reconstruct the header of a reparse point buffer.  This is necessary because
40  * only reparse data is stored in WIM files.  The reparse tag is instead stored
41  * in the on-disk WIM dentry, and the reparse data length is equal to the size
42  * of the blob in which the reparse data was stored, minus the size of a GUID
43  * (16 bytes) if the reparse tag does not have the "Microsoft" bit set.
44  */
45 void
complete_reparse_point(struct reparse_buffer_disk * rpbuf,const struct wim_inode * inode,u16 blob_size)46 complete_reparse_point(struct reparse_buffer_disk *rpbuf,
47 		       const struct wim_inode *inode, u16 blob_size)
48 {
49 	rpbuf->rptag = cpu_to_le32(inode->i_reparse_tag);
50 	if (blob_size >= GUID_SIZE && !(inode->i_reparse_tag & 0x80000000))
51 		blob_size -= GUID_SIZE;
52 	rpbuf->rpdatalen = cpu_to_le16(blob_size);
53 	rpbuf->rpreserved = cpu_to_le16(inode->i_rp_reserved);
54 }
55 
56 /* Parse the buffer for a symbolic link or junction reparse point and fill in a
57  * 'struct link_reparse_point'.  */
58 int
parse_link_reparse_point(const struct reparse_buffer_disk * rpbuf,u16 rpbuflen,struct link_reparse_point * link)59 parse_link_reparse_point(const struct reparse_buffer_disk *rpbuf, u16 rpbuflen,
60 			 struct link_reparse_point *link)
61 {
62 	u16 substitute_name_offset;
63 	u16 print_name_offset;
64 	const u8 *data;
65 
66 	link->rptag = le32_to_cpu(rpbuf->rptag);
67 
68 	/* Not a symbolic link or junction?  */
69 	if (link->rptag != WIM_IO_REPARSE_TAG_SYMLINK &&
70 	    link->rptag != WIM_IO_REPARSE_TAG_MOUNT_POINT)
71 		return WIMLIB_ERR_INVALID_REPARSE_DATA;
72 
73 	/* Is the buffer too small to be a symlink or a junction?  */
74 	if (rpbuflen < offsetof(struct reparse_buffer_disk, link.junction.data))
75 		return WIMLIB_ERR_INVALID_REPARSE_DATA;
76 
77 	link->rpreserved = le16_to_cpu(rpbuf->rpreserved);
78 	link->substitute_name_nbytes = le16_to_cpu(rpbuf->link.substitute_name_nbytes);
79 	substitute_name_offset = le16_to_cpu(rpbuf->link.substitute_name_offset);
80 	link->print_name_nbytes = le16_to_cpu(rpbuf->link.print_name_nbytes);
81 	print_name_offset = le16_to_cpu(rpbuf->link.print_name_offset);
82 
83 	/* The names must be properly sized and aligned.  */
84 	if ((substitute_name_offset | print_name_offset |
85 	     link->substitute_name_nbytes | link->print_name_nbytes) & 1)
86 		return WIMLIB_ERR_INVALID_REPARSE_DATA;
87 
88 	if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
89 		if (rpbuflen < offsetof(struct reparse_buffer_disk, link.symlink.data))
90 			return WIMLIB_ERR_INVALID_REPARSE_DATA;
91 		link->symlink_flags = le32_to_cpu(rpbuf->link.symlink.flags);
92 		data = rpbuf->link.symlink.data;
93 	} else {
94 		data = rpbuf->link.junction.data;
95 	}
96 
97 	/* Verify that the names don't overflow the buffer.  */
98 	if ((data - (const u8 *)rpbuf) + substitute_name_offset +
99 	    link->substitute_name_nbytes > rpbuflen)
100 		return WIMLIB_ERR_INVALID_REPARSE_DATA;
101 
102 	if ((data - (const u8 *)rpbuf) + print_name_offset +
103 	    link->print_name_nbytes > rpbuflen)
104 		return WIMLIB_ERR_INVALID_REPARSE_DATA;
105 
106 	/* Save the name pointers.  */
107 	link->substitute_name = (utf16lechar *)&data[substitute_name_offset];
108 	link->print_name = (utf16lechar *)&data[print_name_offset];
109 	return 0;
110 }
111 
112 /* Translate a 'struct link_reparse_point' into a reparse point buffer.  */
113 int
make_link_reparse_point(const struct link_reparse_point * link,struct reparse_buffer_disk * rpbuf,u16 * rpbuflen_ret)114 make_link_reparse_point(const struct link_reparse_point *link,
115 			struct reparse_buffer_disk *rpbuf, u16 *rpbuflen_ret)
116 {
117 	u8 *data;
118 
119 	if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
120 		data = rpbuf->link.symlink.data;
121 	else if (link->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT)
122 		data = rpbuf->link.junction.data;
123 	else /* Callers should forbid this case, but check anyway.  */
124 		return WIMLIB_ERR_INVALID_REPARSE_DATA;
125 
126 	/* Check if the names are too long to fit in a reparse point.  */
127 	if ((data - (u8 *)rpbuf) + link->substitute_name_nbytes +
128 	    link->print_name_nbytes +
129 	    2 * sizeof(utf16lechar) > REPARSE_POINT_MAX_SIZE)
130 		return WIMLIB_ERR_INVALID_REPARSE_DATA;
131 
132 	rpbuf->rptag = cpu_to_le32(link->rptag);
133 	rpbuf->rpreserved = cpu_to_le16(link->rpreserved);
134 	rpbuf->link.substitute_name_offset = cpu_to_le16(0);
135 	rpbuf->link.substitute_name_nbytes = cpu_to_le16(link->substitute_name_nbytes);
136 	rpbuf->link.print_name_offset = cpu_to_le16(link->substitute_name_nbytes +
137 						    sizeof(utf16lechar));
138 	rpbuf->link.print_name_nbytes = cpu_to_le16(link->print_name_nbytes);
139 
140 	if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
141 		rpbuf->link.symlink.flags = cpu_to_le32(link->symlink_flags);
142 
143 	/* We null-terminate the substitute and print names, although this isn't
144 	 * strictly necessary.  Note that the nbytes fields do not include the
145 	 * null terminators.  */
146 	data = mempcpy(data, link->substitute_name, link->substitute_name_nbytes);
147 	*(utf16lechar *)data = cpu_to_le16(0);
148 	data += sizeof(utf16lechar);
149 	data = mempcpy(data, link->print_name, link->print_name_nbytes);
150 	*(utf16lechar *)data = cpu_to_le16(0);
151 	data += sizeof(utf16lechar);
152 	rpbuf->rpdatalen = cpu_to_le16(data - rpbuf->rpdata);
153 
154 	*rpbuflen_ret = data - (u8 *)rpbuf;
155 	return 0;
156 }
157 
158 /* UNIX symlink <=> Windows reparse point translation  */
159 #ifndef __WIN32__
160 
161 /* Retrieve the inode's reparse point buffer into @rpbuf and @rpbuflen_ret.
162  * This gets the reparse data from @blob if specified, otherwise from the
163  * inode's reparse point stream.  The inode's streams must be resolved.  */
164 static int
wim_inode_get_reparse_point(const struct wim_inode * inode,struct reparse_buffer_disk * rpbuf,u16 * rpbuflen_ret,const struct blob_descriptor * blob)165 wim_inode_get_reparse_point(const struct wim_inode *inode,
166 			    struct reparse_buffer_disk *rpbuf,
167 			    u16 *rpbuflen_ret,
168 			    const struct blob_descriptor *blob)
169 {
170 	int ret;
171 	u16 blob_size = 0;
172 
173 	if (!blob) {
174 		const struct wim_inode_stream *strm;
175 
176 		strm = inode_get_unnamed_stream(inode, STREAM_TYPE_REPARSE_POINT);
177 		if (strm)
178 			blob = stream_blob_resolved(strm);
179 	}
180 
181 	if (blob) {
182 		if (blob->size > REPARSE_DATA_MAX_SIZE)
183 			return WIMLIB_ERR_INVALID_REPARSE_DATA;
184 		blob_size = blob->size;
185 		ret = read_blob_into_buf(blob, rpbuf->rpdata);
186 		if (ret)
187 			return ret;
188 	}
189 
190 	complete_reparse_point(rpbuf, inode, blob_size);
191 
192 	*rpbuflen_ret = REPARSE_DATA_OFFSET + blob_size;
193 	return 0;
194 }
195 
196 static void
copy(char ** buf_p,size_t * bufsize_p,const char * src,size_t src_size)197 copy(char **buf_p, size_t *bufsize_p, const char *src, size_t src_size)
198 {
199 	size_t n = min(*bufsize_p, src_size);
200 	memcpy(*buf_p, src, n);
201 	*buf_p += n;
202 	*bufsize_p -= n;
203 }
204 
205 /*
206  * Get a UNIX-style symlink target from the WIM inode for a reparse point.
207  *
208  * @inode
209  *	The inode from which to read the symlink.  If not a symbolic link or
210  *	junction reparse point, then -EINVAL will be returned.
211  * @buf
212  *	Buffer into which to place the link target.
213  * @bufsize
214  *	Available space in @buf, in bytes.
215  * @blob
216  *	If not NULL, the blob from which to read the reparse data.  Otherwise,
217  *	the reparse data will be read from the reparse point stream of @inode.
218  * @altroot
219  *	If @altroot_len != 0 and the link is an absolute link that was stored as
220  *	"fixed", then prepend this path to the link target.
221  * @altroot_len
222  *	Length of the @altroot string or 0.
223  *
224  * Similar to POSIX readlink(), this function writes as much of the symlink
225  * target as possible (up to @bufsize bytes) to @buf with no null terminator and
226  * returns the number of bytes written or a negative errno value on error.  Note
227  * that the target is truncated and @bufsize is returned in the overflow case.
228  */
229 int
wim_inode_readlink(const struct wim_inode * inode,char * buf,size_t bufsize,const struct blob_descriptor * blob,const char * altroot,size_t altroot_len)230 wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize,
231 		   const struct blob_descriptor *blob,
232 		   const char *altroot, size_t altroot_len)
233 {
234 	struct reparse_buffer_disk rpbuf;
235 	u16 rpbuflen;
236 	struct link_reparse_point link;
237 	char *target_buffer;
238 	char *target;
239 	size_t target_len;
240 	char *buf_ptr;
241 	bool rpfix_ok = false;
242 
243 	/* Not a symbolic link or junction?  */
244 	if (!inode_is_symlink(inode))
245 		return -EINVAL;
246 
247 	/* Retrieve the native Windows "substitute name".  */
248 
249 	if (wim_inode_get_reparse_point(inode, &rpbuf, &rpbuflen, blob))
250 		return -EIO;
251 
252 	if (parse_link_reparse_point(&rpbuf, rpbuflen, &link))
253 		return -EINVAL;
254 
255 	/* Translate the substitute name to a multibyte string.  */
256 	if (utf16le_to_tstr(link.substitute_name, link.substitute_name_nbytes,
257 			    &target_buffer, &target_len))
258 		return -errno;
259 	target = target_buffer;
260 
261 	/*
262 	 * The substitute name is a native Windows NT path. There are two cases:
263 	 *
264 	 * 1. The reparse point is a symlink (rptag=WIM_IO_REPARSE_TAG_SYMLINK)
265 	 *    and SYMBOLIC_LINK_RELATIVE is set.  Windows resolves the path
266 	 *    relative to the directory containing the reparse point file.  In
267 	 *    this case, we just translate the path separators.
268 	 * 2. Otherwise, Windows resolves the path from the root of the Windows
269 	 *    NT kernel object namespace.  In this case, we attempt to strip the
270 	 *    device name, in addition to translating the path separators; e.g.
271 	 *    "\??\C:\Users\Public" is translated to "/Users/Public".
272 	 *
273 	 * Also in case (2) the link target may have been stored as "fixed",
274 	 * meaning that with the device portion stripped off it is effectively
275 	 * "relative to the root of the WIM image".  If this is the case, and if
276 	 * the caller provided an alternate root directory, then rewrite the
277 	 * link to be relative to that directory.
278 	 */
279 	if (!link_is_relative_symlink(&link)) {
280 		static const char *const nt_root_dirs[] = {
281 			"\\??\\", "\\DosDevices\\", "\\Device\\",
282 		};
283 		for (size_t i = 0; i < ARRAY_LEN(nt_root_dirs); i++) {
284 			size_t len = strlen(nt_root_dirs[i]);
285 			if (!strncmp(target, nt_root_dirs[i], len)) {
286 				char *p = target + len;
287 				while (*p == '\\')
288 					p++;
289 				while (*p && *p != '\\')
290 					p++;
291 				target_len -= (p - target);
292 				target = p;
293 				break;
294 			}
295 		}
296 
297 		if (!(inode->i_rp_flags & WIM_RP_FLAG_NOT_FIXED))
298 			rpfix_ok = true;
299 	}
300 
301 	/* Translate backslashes (Windows NT path separator) to forward slashes
302 	 * (UNIX path separator).  In addition, translate forwards slashes to
303 	 * backslashes; this enables lossless handling of UNIX symbolic link
304 	 * targets that contain the backslash character.  */
305 	for (char *p = target; *p; p++) {
306 		if (*p == '\\')
307 			*p = '/';
308 		else if (*p == '/')
309 			*p = '\\';
310 	}
311 
312 	/* Copy as much of the link target as possible to the output buffer and
313 	 * return the number of bytes copied.  */
314 	buf_ptr = buf;
315 	if (rpfix_ok && altroot_len != 0) {
316 		copy(&buf_ptr, &bufsize, altroot, altroot_len);
317 	} else if (target_len == 0) {
318 		/* An absolute link target that was made relative to the same
319 		 * directory pointed to will end up empty if the original target
320 		 * did not have a trailing slash.  Here, we are reading this
321 		 * adjusted link target without prefixing it.  This usually
322 		 * doesn't happen, but if it does then we need to change it to
323 		 * "/" so that it is a valid target.  */
324 		target = "/";
325 		target_len = 1;
326 	}
327 	copy(&buf_ptr, &bufsize, target, target_len);
328 	FREE(target_buffer);
329 	return buf_ptr - buf;
330 }
331 
332 /* Given a UNIX-style symbolic link target, create a Windows-style reparse point
333  * buffer and assign it to the specified inode.  */
334 int
wim_inode_set_symlink(struct wim_inode * inode,const char * _target,struct blob_table * blob_table)335 wim_inode_set_symlink(struct wim_inode *inode, const char *_target,
336 		      struct blob_table *blob_table)
337 
338 {
339 	int ret;
340 	utf16lechar *target;
341 	size_t target_nbytes;
342 	struct link_reparse_point link;
343 	struct reparse_buffer_disk rpbuf;
344 	u16 rpbuflen;
345 
346 	/* Translate the link target to UTF-16LE.  */
347 	ret = tstr_to_utf16le(_target, strlen(_target), &target, &target_nbytes);
348 	if (ret)
349 		return ret;
350 
351 	/* Translate forward slashes (UNIX path separator) to backslashes
352 	 * (Windows NT path separator).  In addition, translate backslashes to
353 	 * forward slashes; this enables lossless handling of UNIX symbolic link
354 	 * targets that contain the backslash character.  */
355 	for (utf16lechar *p = target; *p; p++) {
356 		if (*p == cpu_to_le16('/'))
357 			*p = cpu_to_le16('\\');
358 		else if (*p == cpu_to_le16('\\'))
359 			*p = cpu_to_le16('/');
360 	}
361 
362 	link.rptag = WIM_IO_REPARSE_TAG_SYMLINK;
363 	link.rpreserved = 0;
364 
365 	/* Note: an absolute link that was rewritten to be relative to another
366 	 * directory is assumed to either be empty or to have a leading slash.
367 	 * See unix_relativize_link_target().  */
368 	if (*target == cpu_to_le16('\\') || !*target) {
369 		/*
370 		 * UNIX link target was absolute.  In this case we represent the
371 		 * link as a symlink reparse point with SYMBOLIC_LINK_RELATIVE
372 		 * cleared.  For this to work we need to assign it a path that
373 		 * can be resolved from the root of the Windows NT kernel object
374 		 * namespace.  We do this by using "\??\C:" as a dummy prefix.
375 		 *
376 		 * Note that we could instead represent UNIX absolute links by
377 		 * setting SYMBOLIC_LINK_RELATIVE and then leaving the path
378 		 * backslash-prefixed like "\Users\Public".  On Windows this is
379 		 * valid and denotes a path relative to the root of the
380 		 * filesystem on which the reparse point resides.  The problem
381 		 * with this is that neither WIMGAPI nor wimlib (on Windows)
382 		 * will do "reparse point fixups" when extracting such links
383 		 * (modifying the link target to point into the actual
384 		 * extraction directory).  So for the greatest cross-platform
385 		 * consistency, we have to use the fake C: drive approach.
386 		 */
387 		static const utf16lechar prefix[6] = {
388 			cpu_to_le16('\\'),
389 			cpu_to_le16('?'),
390 			cpu_to_le16('?'),
391 			cpu_to_le16('\\'),
392 			cpu_to_le16('C'),
393 			cpu_to_le16(':'),
394 		};
395 
396 		/* Do not show \??\ in print name  */
397 		const size_t num_unprintable_chars = 4;
398 
399 		link.symlink_flags = 0;
400 		link.substitute_name_nbytes = sizeof(prefix) + target_nbytes;
401 		link.substitute_name = alloca(link.substitute_name_nbytes);
402 		memcpy(link.substitute_name, prefix, sizeof(prefix));
403 		memcpy(link.substitute_name + ARRAY_LEN(prefix), target, target_nbytes);
404 		link.print_name_nbytes = link.substitute_name_nbytes -
405 					 (num_unprintable_chars * sizeof(utf16lechar));
406 		link.print_name = link.substitute_name + num_unprintable_chars;
407 	} else {
408 		/* UNIX link target was relative.  In this case we represent the
409 		 * link as a symlink reparse point with SYMBOLIC_LINK_RELATIVE
410 		 * set.  This causes Windows to interpret the link relative to
411 		 * the directory containing the reparse point file.  */
412 		link.symlink_flags = SYMBOLIC_LINK_RELATIVE;
413 		link.substitute_name_nbytes = target_nbytes;
414 		link.substitute_name = target;
415 		link.print_name_nbytes = target_nbytes;
416 		link.print_name = target;
417 	}
418 
419 	/* Generate the reparse buffer.  */
420 	ret = make_link_reparse_point(&link, &rpbuf, &rpbuflen);
421 	if (ret)
422 		goto out_free_target;
423 
424 	/* Save the reparse data with the inode.  */
425 	ret = WIMLIB_ERR_NOMEM;
426 	if (!inode_add_stream_with_data(inode,
427 					STREAM_TYPE_REPARSE_POINT,
428 					NO_STREAM_NAME,
429 					rpbuf.rpdata,
430 					rpbuflen - REPARSE_DATA_OFFSET,
431 					blob_table))
432 		goto out_free_target;
433 
434 	/* The inode is now a reparse point.  */
435 	inode->i_reparse_tag = link.rptag;
436 	inode->i_attributes &= ~FILE_ATTRIBUTE_NORMAL;
437 	inode->i_attributes |= FILE_ATTRIBUTE_REPARSE_POINT;
438 
439 	ret = 0;
440 out_free_target:
441 	FREE(target);
442 	return ret;
443 }
444 
445 #endif /* !__WIN32__ */
446