1 /*
2  * xml.c
3  *
4  * Deals with the XML information in WIM files.  Uses the C library libxml2.
5  */
6 
7 /*
8  * Copyright (C) 2012-2016 Eric Biggers
9  *
10  * This file is free software; you can redistribute it and/or modify it under
11  * the terms of the GNU Lesser General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option) any
13  * later version.
14  *
15  * This file is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this file; if not, see http://www.gnu.org/licenses/.
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27 
28 #include <libxml/parser.h>
29 #include <libxml/tree.h>
30 #include <libxml/xmlsave.h>
31 #include <string.h>
32 
33 #include "wimlib/blob_table.h"
34 #include "wimlib/dentry.h"
35 #include "wimlib/encoding.h"
36 #include "wimlib/error.h"
37 #include "wimlib/file_io.h"
38 #include "wimlib/metadata.h"
39 #include "wimlib/resource.h"
40 #include "wimlib/timestamp.h"
41 #include "wimlib/xml.h"
42 #include "wimlib/write.h"
43 
44 /*
45  * A wrapper around a WIM file's XML document.  The XML document contains
46  * metadata about each image in the WIM file as well as metadata about the WIM
47  * file itself.
48  */
49 struct wim_xml_info {
50 
51 	/* The parsed XML document as a libxml2 document tree  */
52 	xmlDocPtr doc;
53 
54 	/* The root element of the document.  This is a cached value, equal to
55 	 * xmlDocGetRootElement(doc).  */
56 	xmlNode *root;
57 
58 	/* A malloc()ed array containing a pointer to the IMAGE element for each
59 	 * WIM image.  The image with 1-based index 'i' is at index 'i - 1' in
60 	 * this array.  Note: these pointers are cached values, since they could
61 	 * also be found by searching the document.  */
62 	xmlNode **images;
63 
64 	/* The number of WIM images (the length of 'images')  */
65 	int image_count;
66 
67 #if TCHAR_IS_UTF16LE
68 	/* Temporary memory for UTF-8 => 'tchar' string translations.  When an
69 	 * API function needs to return a 'tchar' string, it uses one of these
70 	 * array slots to hold the string and returns a pointer to it.  */
71 	tchar *strings[128];
72 	size_t next_string_idx;
73 	size_t num_strings;
74 #endif
75 };
76 
77 /*----------------------------------------------------------------------------*
78  *                            Internal functions                              *
79  *----------------------------------------------------------------------------*/
80 
81 /* Iterate through the children of an xmlNode.  */
82 #define node_for_each_child(parent, child)	\
83 	for (child = (parent)->children; child != NULL; child = child->next)
84 
85 /* Is the specified node an element of the specified name?  */
86 static bool
node_is_element(const xmlNode * node,const xmlChar * name)87 node_is_element(const xmlNode *node, const xmlChar *name)
88 {
89 	return node->type == XML_ELEMENT_NODE && xmlStrEqual(node->name, name);
90 }
91 
92 /* Retrieve a pointer to the UTF-8 text contents of the specified node, or NULL
93  * if the node has no text contents.  This assumes the simple case where the
94  * node has a single TEXT child node.  */
95 static const xmlChar *
node_get_text(const xmlNode * node)96 node_get_text(const xmlNode *node)
97 {
98 	const xmlNode *child;
99 
100 	if (!node)
101 		return NULL;
102 	node_for_each_child(node, child)
103 		if (child->type == XML_TEXT_NODE && child->content)
104 			return child->content;
105 	return NULL;
106 }
107 
108 /* Retrieve an unsigned integer from the contents of the specified node,
109  * decoding it using the specified base.  If the node has no contents or does
110  * not contain a valid number, returns 0.  */
111 static u64
node_get_number(const xmlNode * node,int base)112 node_get_number(const xmlNode *node, int base)
113 {
114 	const xmlChar *str = node_get_text(node);
115 	char *end;
116 	unsigned long long v;
117 
118 	if (!str)
119 		return 0;
120 	v = strtoull(str, &end, base);
121 	if ((xmlChar *)end == str || *end || v >= UINT64_MAX)
122 		return 0;
123 	return v;
124 }
125 
126 /* Retrieve the timestamp from a time node.  This node should have child
127  * elements HIGHPART and LOWPART; these elements will be used to construct a
128  * Windows-style timestamp.  */
129 static u64
node_get_timestamp(const xmlNode * node)130 node_get_timestamp(const xmlNode *node)
131 {
132 	u64 timestamp = 0;
133 	xmlNode *child;
134 
135 	if (!node)
136 		return 0;
137 	node_for_each_child(node, child) {
138 		if (node_is_element(child, "HIGHPART"))
139 			timestamp |= node_get_number(child, 16) << 32;
140 		else if (node_is_element(child, "LOWPART"))
141 			timestamp |= node_get_number(child, 16);
142 	}
143 	return timestamp;
144 }
145 
146 static int
tstr_get_utf8(const tchar * tstr,const xmlChar ** utf8_ret)147 tstr_get_utf8(const tchar *tstr, const xmlChar **utf8_ret)
148 {
149 #if TCHAR_IS_UTF16LE
150 	return utf16le_to_utf8(tstr, tstrlen(tstr) * sizeof(tchar),
151 			       (char **)utf8_ret, NULL);
152 #else
153 	*utf8_ret = (const xmlChar *)tstr;
154 	return 0;
155 #endif
156 }
157 
158 static void
tstr_put_utf8(const xmlChar * utf8)159 tstr_put_utf8(const xmlChar *utf8)
160 {
161 #if TCHAR_IS_UTF16LE
162 	FREE((char *)utf8);
163 #endif
164 }
165 
166 /* Retrieve the text contents of an XML element as a 'tchar' string.  If not
167  * found or if the text could not be translated, returns NULL.  */
168 static const tchar *
node_get_ttext(struct wim_xml_info * info,xmlNode * node)169 node_get_ttext(struct wim_xml_info *info, xmlNode *node)
170 {
171 	const xmlChar *text = node_get_text(node);
172 
173 #if TCHAR_IS_UTF16LE
174 	tchar **ttext_p;
175 
176 	if (!text)
177 		return NULL;
178 
179 	ttext_p = &info->strings[info->next_string_idx];
180 	if (info->num_strings >= ARRAY_LEN(info->strings)) {
181 		FREE(*ttext_p);
182 		*ttext_p = NULL;
183 	}
184 	if (utf8_to_tstr(text, strlen(text), ttext_p, NULL))
185 		return NULL;
186 	if (info->num_strings < ARRAY_LEN(info->strings))
187 		info->num_strings++;
188 	info->next_string_idx++;
189 	info->next_string_idx %= ARRAY_LEN(info->strings);
190 	return *ttext_p;
191 #else
192 	return text;
193 #endif
194 }
195 
196 /* Unlink the specified node from its parent, then free it (recursively).  */
197 static void
unlink_and_free_tree(xmlNode * node)198 unlink_and_free_tree(xmlNode *node)
199 {
200 	xmlUnlinkNode(node);
201 	xmlFreeNode(node);
202 }
203 
204 /* Unlink and free (recursively) all children of the specified node.  */
205 static void
unlink_and_free_children(xmlNode * node)206 unlink_and_free_children(xmlNode *node)
207 {
208 	xmlNode *child;
209 
210 	while ((child = node->last) != NULL)
211 		unlink_and_free_tree(child);
212 }
213 
214 /* Add the new child element 'replacement' to 'parent', replacing any same-named
215  * element that may already exist.  */
216 static void
node_replace_child_element(xmlNode * parent,xmlNode * replacement)217 node_replace_child_element(xmlNode *parent, xmlNode *replacement)
218 {
219 	xmlNode *child;
220 
221 	node_for_each_child(parent, child) {
222 		if (node_is_element(child, replacement->name)) {
223 			xmlReplaceNode(child, replacement);
224 			xmlFreeNode(child);
225 			return;
226 		}
227 	}
228 
229 	xmlAddChild(parent, replacement);
230 }
231 
232 /* Set the text contents of the specified element to the specified string,
233  * replacing the existing contents (if any).  The string is "raw" and is
234  * permitted to contain characters that have special meaning in XML.  */
235 static int
node_set_text(xmlNode * node,const xmlChar * text)236 node_set_text(xmlNode *node, const xmlChar *text)
237 {
238 	xmlNode *text_node = xmlNewText(text);
239 	if (!text_node)
240 		return WIMLIB_ERR_NOMEM;
241 	unlink_and_free_children(node);
242 	xmlAddChild(node, text_node);
243 	return 0;
244 }
245 
246 /* Like 'node_set_text()', but takes in a 'tchar' string.  */
247 static int
node_set_ttext(xmlNode * node,const tchar * ttext)248 node_set_ttext(xmlNode *node, const tchar *ttext)
249 {
250 	const xmlChar *text;
251 	int ret;
252 
253 	ret = tstr_get_utf8(ttext, &text);
254 	if (ret)
255 		return ret;
256 	ret = node_set_text(node, text);
257 	tstr_put_utf8(text);
258 	return ret;
259 }
260 
261 /* Create a new element containing text and optionally link it into a tree.  */
262 static xmlNode *
new_element_with_text(xmlNode * parent,const xmlChar * name,const xmlChar * text)263 new_element_with_text(xmlNode *parent, const xmlChar *name, const xmlChar *text)
264 {
265 	xmlNode *node;
266 
267 	node = xmlNewNode(NULL, name);
268 	if (!node)
269 		return NULL;
270 
271 	if (node_set_text(node, text)) {
272 		xmlFreeNode(node);
273 		return NULL;
274 	}
275 
276 	if (parent)
277 		xmlAddChild(parent, node);
278 	return node;
279 }
280 
281 /* Create a new element containing text and optionally link it into a tree.  */
282 static int
new_element_with_ttext(xmlNode * parent,const xmlChar * name,const tchar * ttext,xmlNode ** node_ret)283 new_element_with_ttext(xmlNode *parent, const xmlChar *name, const tchar *ttext,
284 		       xmlNode **node_ret)
285 {
286 	const xmlChar *text;
287 	int ret;
288 	xmlNode *node;
289 
290 	ret = tstr_get_utf8(ttext, &text);
291 	if (ret)
292 		return ret;
293 	node = new_element_with_text(parent, name, text);
294 	tstr_put_utf8(text);
295 	if (!node)
296 		return WIMLIB_ERR_NOMEM;
297 	if (node_ret)
298 		*node_ret = node;
299 	return 0;
300 }
301 
302 /* Create a new timestamp element and optionally link it into a tree.  */
303 static xmlNode *
new_element_with_timestamp(xmlNode * parent,const xmlChar * name,u64 timestamp)304 new_element_with_timestamp(xmlNode *parent, const xmlChar *name, u64 timestamp)
305 {
306 	xmlNode *node;
307 	char buf[32];
308 
309 	node = xmlNewNode(NULL, name);
310 	if (!node)
311 		goto err;
312 
313 	sprintf(buf, "0x%08"PRIX32, (u32)(timestamp >> 32));
314 	if (!new_element_with_text(node, "HIGHPART", buf))
315 		goto err;
316 
317 	sprintf(buf, "0x%08"PRIX32, (u32)timestamp);
318 	if (!new_element_with_text(node, "LOWPART", buf))
319 		goto err;
320 
321 	if (parent)
322 		xmlAddChild(parent, node);
323 	return node;
324 
325 err:
326 	xmlFreeNode(node);
327 	return NULL;
328 }
329 
330 /* Create a new number element and optionally link it into a tree.  */
331 static xmlNode *
new_element_with_u64(xmlNode * parent,const xmlChar * name,u64 value)332 new_element_with_u64(xmlNode *parent, const xmlChar *name, u64 value)
333 {
334 	char buf[32];
335 
336 	sprintf(buf, "%"PRIu64, value);
337 	return new_element_with_text(parent, name, buf);
338 }
339 
340 /* Allocate a 'struct wim_xml_info'.  The caller is responsible for initializing
341  * the document and the images array.  */
342 static struct wim_xml_info *
alloc_wim_xml_info(void)343 alloc_wim_xml_info(void)
344 {
345 	struct wim_xml_info *info = MALLOC(sizeof(*info));
346 #if TCHAR_IS_UTF16LE
347 	if (info) {
348 		info->next_string_idx = 0;
349 		info->num_strings = 0;
350 	}
351 #endif
352 	return info;
353 }
354 
355 static bool
parse_index(xmlChar ** pp,u32 * index_ret)356 parse_index(xmlChar **pp, u32 *index_ret)
357 {
358 	xmlChar *p = *pp;
359 	u32 index = 0;
360 
361 	*p++ = '\0'; /* overwrite '[' */
362 	while (*p >= '0' && *p <= '9') {
363 		u32 n = (index * 10) + (*p++ - '0');
364 		if (n < index)
365 			return false;
366 		index = n;
367 	}
368 	if (index == 0)
369 		return false;
370 	if (*p != ']')
371 		return false;
372 	p++;
373 	if (*p != '/' && *p != '\0')
374 		return false;
375 
376 	*pp = p;
377 	*index_ret = index;
378 	return true;
379 }
380 
381 static int
do_xml_path_walk(xmlNode * node,const xmlChar * path,bool create,xmlNode ** result_ret)382 do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
383 		 xmlNode **result_ret)
384 {
385 	size_t n = strlen(path) + 1;
386 	xmlChar buf[n];
387 	xmlChar *p;
388 	xmlChar c;
389 
390 	*result_ret = NULL;
391 
392 	if (!node)
393 		return 0;
394 
395 	/* Copy the path to a temporary buffer.  */
396 	memcpy(buf, path, n);
397 	p = buf;
398 
399 	if (*p == '/')
400 		goto bad_syntax;
401 	c = *p;
402 
403 	while (c != '\0') {
404 		const xmlChar *name;
405 		xmlNode *child;
406 		u32 index = 1;
407 
408 		/* We have another path component.  */
409 
410 		/* Parse the element name.  */
411 		name = p;
412 		while (*p != '/' && *p != '\0' && *p != '[')
413 			p++;
414 		if (p == name) /* empty name?  */
415 			goto bad_syntax;
416 
417 		/* Handle a bracketed index, if one was specified.  */
418 		if (*p == '[' && !parse_index(&p, &index))
419 			goto bad_syntax;
420 
421 		c = *p;
422 		*p = '\0';
423 
424 		/* Look for a matching child.  */
425 		node_for_each_child(node, child)
426 			if (node_is_element(child, name) && !--index)
427 				goto next_step;
428 
429 		/* No child matched the path.  If create=false, the lookup
430 		 * failed.  If create=true, create the needed element.  */
431 		if (!create)
432 			return 0;
433 
434 		/* We can't create an element at index 'n' if indices 1...n-1
435 		 * didn't already exist.  */
436 		if (index != 1)
437 			return WIMLIB_ERR_INVALID_PARAM;
438 
439 		child = xmlNewChild(node, NULL, name, NULL);
440 		if (!child)
441 			return WIMLIB_ERR_NOMEM;
442 	next_step:
443 		/* Continue to the next path component, if there is one.  */
444 		node = child;
445 		p++;
446 	}
447 
448 	*result_ret = node;
449 	return 0;
450 
451 bad_syntax:
452 	ERROR("The XML path \"%s\" has invalid syntax.", path);
453 	return WIMLIB_ERR_INVALID_PARAM;
454 }
455 
456 /* Retrieve the XML element, if any, at the specified 'path'.  This supports a
457  * simple filesystem-like syntax.  If the element was found, returns a pointer
458  * to it; otherwise returns NULL.  */
459 static xmlNode *
xml_get_node_by_path(xmlNode * root,const xmlChar * path)460 xml_get_node_by_path(xmlNode *root, const xmlChar *path)
461 {
462 	xmlNode *node;
463 	do_xml_path_walk(root, path, false, &node);
464 	return node;
465 }
466 
467 /* Similar to xml_get_node_by_path(), but creates the element and any requisite
468  * ancestor elements as needed.   If successful, 0 is returned and *node_ret is
469  * set to a pointer to the resulting element.  If unsuccessful, an error code is
470  * returned and *node_ret is set to NULL.  */
471 static int
xml_ensure_node_by_path(xmlNode * root,const xmlChar * path,xmlNode ** node_ret)472 xml_ensure_node_by_path(xmlNode *root, const xmlChar *path, xmlNode **node_ret)
473 {
474 	return do_xml_path_walk(root, path, true, node_ret);
475 }
476 
477 static u64
xml_get_number_by_path(xmlNode * root,const xmlChar * path)478 xml_get_number_by_path(xmlNode *root, const xmlChar *path)
479 {
480 	return node_get_number(xml_get_node_by_path(root, path), 10);
481 }
482 
483 static u64
xml_get_timestamp_by_path(xmlNode * root,const xmlChar * path)484 xml_get_timestamp_by_path(xmlNode *root, const xmlChar *path)
485 {
486 	return node_get_timestamp(xml_get_node_by_path(root, path));
487 }
488 
489 static const xmlChar *
xml_get_text_by_path(xmlNode * root,const xmlChar * path)490 xml_get_text_by_path(xmlNode *root, const xmlChar *path)
491 {
492 	return node_get_text(xml_get_node_by_path(root, path));
493 }
494 
495 static const tchar *
xml_get_ttext_by_path(struct wim_xml_info * info,xmlNode * root,const xmlChar * path)496 xml_get_ttext_by_path(struct wim_xml_info *info, xmlNode *root,
497 		      const xmlChar *path)
498 {
499 	return node_get_ttext(info, xml_get_node_by_path(root, path));
500 }
501 
502 /* Creates/replaces (if ttext is not NULL and not empty) or removes (if ttext is
503  * NULL or empty) an element containing text.  */
504 static int
xml_set_ttext_by_path(xmlNode * root,const xmlChar * path,const tchar * ttext)505 xml_set_ttext_by_path(xmlNode *root, const xmlChar *path, const tchar *ttext)
506 {
507 	int ret;
508 	xmlNode *node;
509 
510 	if (ttext && *ttext) {
511 		/* Create or replace  */
512 		ret = xml_ensure_node_by_path(root, path, &node);
513 		if (ret)
514 			return ret;
515 		return node_set_ttext(node, ttext);
516 	} else {
517 		/* Remove  */
518 		node = xml_get_node_by_path(root, path);
519 		if (node)
520 			unlink_and_free_tree(node);
521 		return 0;
522 	}
523 }
524 
525 /* Unlink and return the node which represents the INDEX attribute of the
526  * specified IMAGE element.  */
527 static xmlAttr *
unlink_index_attribute(xmlNode * image_node)528 unlink_index_attribute(xmlNode *image_node)
529 {
530 	xmlAttr *attr = xmlHasProp(image_node, "INDEX");
531 	xmlUnlinkNode((xmlNode *)attr);
532 	return attr;
533 }
534 
535 /* Compute the total uncompressed size of the streams of the specified inode. */
536 static u64
inode_sum_stream_sizes(const struct wim_inode * inode,const struct blob_table * blob_table)537 inode_sum_stream_sizes(const struct wim_inode *inode,
538 		       const struct blob_table *blob_table)
539 {
540 	u64 total_size = 0;
541 
542 	for (unsigned i = 0; i < inode->i_num_streams; i++) {
543 		const struct blob_descriptor *blob;
544 
545 		blob = stream_blob(&inode->i_streams[i], blob_table);
546 		if (blob)
547 			total_size += blob->size;
548 	}
549 	return total_size;
550 }
551 
552 static int
append_image_node(struct wim_xml_info * info,xmlNode * image_node)553 append_image_node(struct wim_xml_info *info, xmlNode *image_node)
554 {
555 	char buf[32];
556 	xmlNode **images;
557 
558 	/* Limit exceeded?  */
559 	if (unlikely(info->image_count >= MAX_IMAGES))
560 		return WIMLIB_ERR_IMAGE_COUNT;
561 
562 	/* Add the INDEX attribute.  */
563 	sprintf(buf, "%d", info->image_count + 1);
564 	if (!xmlNewProp(image_node, "INDEX", buf))
565 		return WIMLIB_ERR_NOMEM;
566 
567 	/* Append the IMAGE element to the 'images' array.  */
568 	images = REALLOC(info->images,
569 			 (info->image_count + 1) * sizeof(info->images[0]));
570 	if (unlikely(!images))
571 		return WIMLIB_ERR_NOMEM;
572 	info->images = images;
573 	images[info->image_count++] = image_node;
574 
575 	/* Add the IMAGE element to the document.  */
576 	xmlAddChild(info->root, image_node);
577 	return 0;
578 }
579 
580 /*----------------------------------------------------------------------------*
581  *                     Functions for internal library use                     *
582  *----------------------------------------------------------------------------*/
583 
584 /* Allocate an empty 'struct wim_xml_info', containing no images.  */
585 struct wim_xml_info *
xml_new_info_struct(void)586 xml_new_info_struct(void)
587 {
588 	struct wim_xml_info *info;
589 
590 	info = alloc_wim_xml_info();
591 	if (!info)
592 		goto err;
593 
594 	info->doc = xmlNewDoc("1.0");
595 	if (!info->doc)
596 		goto err_free_info;
597 
598 	info->root = xmlNewNode(NULL, "WIM");
599 	if (!info->root)
600 		goto err_free_doc;
601 	xmlDocSetRootElement(info->doc, info->root);
602 
603 	info->images = NULL;
604 	info->image_count = 0;
605 	return info;
606 
607 err_free_doc:
608 	xmlFreeDoc(info->doc);
609 err_free_info:
610 	FREE(info);
611 err:
612 	return NULL;
613 }
614 
615 /* Free a 'struct wim_xml_info'.  */
616 void
xml_free_info_struct(struct wim_xml_info * info)617 xml_free_info_struct(struct wim_xml_info *info)
618 {
619 	if (info) {
620 		xmlFreeDoc(info->doc);
621 		FREE(info->images);
622 	#if TCHAR_IS_UTF16LE
623 		for (size_t i = 0; i < info->num_strings; i++)
624 			FREE(info->strings[i]);
625 	#endif
626 		FREE(info);
627 	}
628 }
629 
630 /* Retrieve the number of images for which there exist IMAGE elements in the XML
631  * document.  */
632 int
xml_get_image_count(const struct wim_xml_info * info)633 xml_get_image_count(const struct wim_xml_info *info)
634 {
635 	return info->image_count;
636 }
637 
638 /* Retrieve the TOTALBYTES value for the WIM file, or 0 if this value is
639  * unavailable.  */
640 u64
xml_get_total_bytes(const struct wim_xml_info * info)641 xml_get_total_bytes(const struct wim_xml_info *info)
642 {
643 	return xml_get_number_by_path(info->root, "TOTALBYTES");
644 }
645 
646 /* Retrieve the TOTALBYTES value for the specified image, or 0 if this value is
647  * unavailable.  */
648 u64
xml_get_image_total_bytes(const struct wim_xml_info * info,int image)649 xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
650 {
651 	return xml_get_number_by_path(info->images[image - 1], "TOTALBYTES");
652 }
653 
654 /* Retrieve the HARDLINKBYTES value for the specified image, or 0 if this value
655  * is unavailable.  */
656 u64
xml_get_image_hard_link_bytes(const struct wim_xml_info * info,int image)657 xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
658 {
659 	return xml_get_number_by_path(info->images[image - 1], "HARDLINKBYTES");
660 }
661 
662 /* Retrieve the WIMBOOT value for the specified image, or false if this value is
663  * unavailable.  */
664 bool
xml_get_wimboot(const struct wim_xml_info * info,int image)665 xml_get_wimboot(const struct wim_xml_info *info, int image)
666 {
667 	return xml_get_number_by_path(info->images[image - 1], "WIMBOOT");
668 }
669 
670 /* Retrieve the Windows build number for the specified image, or 0 if this
671  * information is not available.  */
672 u64
xml_get_windows_build_number(const struct wim_xml_info * info,int image)673 xml_get_windows_build_number(const struct wim_xml_info *info, int image)
674 {
675 	return xml_get_number_by_path(info->images[image - 1],
676 				      "WINDOWS/VERSION/BUILD");
677 }
678 
679 /* Set the WIMBOOT value for the specified image.  */
680 int
xml_set_wimboot(struct wim_xml_info * info,int image)681 xml_set_wimboot(struct wim_xml_info *info, int image)
682 {
683 	return xml_set_ttext_by_path(info->images[image - 1], "WIMBOOT", T("1"));
684 }
685 
686 /*
687  * Update the DIRCOUNT, FILECOUNT, TOTALBYTES, HARDLINKBYTES, and
688  * LASTMODIFICATIONTIME elements for the specified WIM image.
689  *
690  * Note: since these stats are likely to be used for display purposes only, we
691  * no longer attempt to duplicate WIMGAPI's weird bugs when calculating them.
692  */
693 int
xml_update_image_info(WIMStruct * wim,int image)694 xml_update_image_info(WIMStruct *wim, int image)
695 {
696 	const struct wim_image_metadata *imd = wim->image_metadata[image - 1];
697 	xmlNode *image_node = wim->xml_info->images[image - 1];
698 	const struct wim_inode *inode;
699 	u64 dir_count = 0;
700 	u64 file_count = 0;
701 	u64 total_bytes = 0;
702 	u64 hard_link_bytes = 0;
703 	u64 size;
704 	xmlNode *dircount_node;
705 	xmlNode *filecount_node;
706 	xmlNode *totalbytes_node;
707 	xmlNode *hardlinkbytes_node;
708 	xmlNode *lastmodificationtime_node;
709 
710 	image_for_each_inode(inode, imd) {
711 		if (inode_is_directory(inode))
712 			dir_count += inode->i_nlink;
713 		else
714 			file_count += inode->i_nlink;
715 		size = inode_sum_stream_sizes(inode, wim->blob_table);
716 		total_bytes += size * inode->i_nlink;
717 		hard_link_bytes += size * (inode->i_nlink - 1);
718 	}
719 
720 	dircount_node = new_element_with_u64(NULL, "DIRCOUNT", dir_count);
721 	filecount_node = new_element_with_u64(NULL, "FILECOUNT", file_count);
722 	totalbytes_node = new_element_with_u64(NULL, "TOTALBYTES", total_bytes);
723 	hardlinkbytes_node = new_element_with_u64(NULL, "HARDLINKBYTES",
724 						  hard_link_bytes);
725 	lastmodificationtime_node =
726 		new_element_with_timestamp(NULL, "LASTMODIFICATIONTIME",
727 					   now_as_wim_timestamp());
728 
729 	if (unlikely(!dircount_node || !filecount_node || !totalbytes_node ||
730 		     !hardlinkbytes_node || !lastmodificationtime_node)) {
731 		xmlFreeNode(dircount_node);
732 		xmlFreeNode(filecount_node);
733 		xmlFreeNode(totalbytes_node);
734 		xmlFreeNode(hardlinkbytes_node);
735 		xmlFreeNode(lastmodificationtime_node);
736 		return WIMLIB_ERR_NOMEM;
737 	}
738 
739 	node_replace_child_element(image_node, dircount_node);
740 	node_replace_child_element(image_node, filecount_node);
741 	node_replace_child_element(image_node, totalbytes_node);
742 	node_replace_child_element(image_node, hardlinkbytes_node);
743 	node_replace_child_element(image_node, lastmodificationtime_node);
744 	return 0;
745 }
746 
747 /* Add an image to the XML information. */
748 int
xml_add_image(struct wim_xml_info * info,const tchar * name)749 xml_add_image(struct wim_xml_info *info, const tchar *name)
750 {
751 	const u64 now = now_as_wim_timestamp();
752 	xmlNode *image_node;
753 	int ret;
754 
755 	ret = WIMLIB_ERR_NOMEM;
756 	image_node = xmlNewNode(NULL, "IMAGE");
757 	if (!image_node)
758 		goto err;
759 
760 	if (name && *name) {
761 		ret = new_element_with_ttext(image_node, "NAME", name, NULL);
762 		if (ret)
763 			goto err;
764 	}
765 	ret = WIMLIB_ERR_NOMEM;
766 	if (!new_element_with_u64(image_node, "DIRCOUNT", 0))
767 		goto err;
768 	if (!new_element_with_u64(image_node, "FILECOUNT", 0))
769 		goto err;
770 	if (!new_element_with_u64(image_node, "TOTALBYTES", 0))
771 		goto err;
772 	if (!new_element_with_u64(image_node, "HARDLINKBYTES", 0))
773 		goto err;
774 	if (!new_element_with_timestamp(image_node, "CREATIONTIME", now))
775 		goto err;
776 	if (!new_element_with_timestamp(image_node, "LASTMODIFICATIONTIME", now))
777 		goto err;
778 	ret = append_image_node(info, image_node);
779 	if (ret)
780 		goto err;
781 	return 0;
782 
783 err:
784 	xmlFreeNode(image_node);
785 	return ret;
786 }
787 
788 /*
789  * Make a copy of the XML information for the image with index @src_image in the
790  * @src_info XML document and append it to the @dest_info XML document.
791  *
792  * In the process, change the image's name and description to the values
793  * specified by @dest_image_name and @dest_image_description.  Either or both
794  * may be NULL, which indicates that the corresponding element will not be
795  * included in the destination image.
796  */
797 int
xml_export_image(const struct wim_xml_info * src_info,int src_image,struct wim_xml_info * dest_info,const tchar * dest_image_name,const tchar * dest_image_description,bool wimboot)798 xml_export_image(const struct wim_xml_info *src_info, int src_image,
799 		 struct wim_xml_info *dest_info, const tchar *dest_image_name,
800 		 const tchar *dest_image_description, bool wimboot)
801 {
802 	xmlNode *dest_node;
803 	int ret;
804 
805 	ret = WIMLIB_ERR_NOMEM;
806 	dest_node = xmlDocCopyNode(src_info->images[src_image - 1],
807 				   dest_info->doc, 1);
808 	if (!dest_node)
809 		goto err;
810 
811 	ret = xml_set_ttext_by_path(dest_node, "NAME", dest_image_name);
812 	if (ret)
813 		goto err;
814 
815 	ret = xml_set_ttext_by_path(dest_node, "DESCRIPTION",
816 				    dest_image_description);
817 	if (ret)
818 		goto err;
819 
820 	if (wimboot) {
821 		ret = xml_set_ttext_by_path(dest_node, "WIMBOOT", T("1"));
822 		if (ret)
823 			goto err;
824 	}
825 
826 	xmlFreeProp(unlink_index_attribute(dest_node));
827 
828 	ret = append_image_node(dest_info, dest_node);
829 	if (ret)
830 		goto err;
831 	return 0;
832 
833 err:
834 	xmlFreeNode(dest_node);
835 	return ret;
836 }
837 
838 /* Remove the specified image from the XML document.  */
839 void
xml_delete_image(struct wim_xml_info * info,int image)840 xml_delete_image(struct wim_xml_info *info, int image)
841 {
842 	xmlNode *next_image;
843 	xmlAttr *index_attr, *next_index_attr;
844 
845 	/* Free the IMAGE element for the deleted image.  Then, shift all
846 	 * higher-indexed IMAGE elements down by 1, in the process re-assigning
847 	 * their INDEX attributes.  */
848 
849 	next_image = info->images[image - 1];
850 	next_index_attr = unlink_index_attribute(next_image);
851 	unlink_and_free_tree(next_image);
852 
853 	while (image < info->image_count) {
854 		index_attr = next_index_attr;
855 		next_image = info->images[image];
856 		next_index_attr = unlink_index_attribute(next_image);
857 		xmlAddChild(next_image, (xmlNode *)index_attr);
858 		info->images[image - 1] = next_image;
859 		image++;
860 	}
861 
862 	xmlFreeProp(next_index_attr);
863 	info->image_count--;
864 }
865 
866 /* Architecture constants are from w64 mingw winnt.h  */
867 #define PROCESSOR_ARCHITECTURE_INTEL		0
868 #define PROCESSOR_ARCHITECTURE_MIPS		1
869 #define PROCESSOR_ARCHITECTURE_ALPHA		2
870 #define PROCESSOR_ARCHITECTURE_PPC		3
871 #define PROCESSOR_ARCHITECTURE_SHX		4
872 #define PROCESSOR_ARCHITECTURE_ARM		5
873 #define PROCESSOR_ARCHITECTURE_IA64		6
874 #define PROCESSOR_ARCHITECTURE_ALPHA64		7
875 #define PROCESSOR_ARCHITECTURE_MSIL		8
876 #define PROCESSOR_ARCHITECTURE_AMD64		9
877 #define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64	10
878 #define PROCESSOR_ARCHITECTURE_ARM64		12
879 
880 static const tchar *
describe_arch(u64 arch)881 describe_arch(u64 arch)
882 {
883 	static const tchar * const descriptions[] = {
884 		[PROCESSOR_ARCHITECTURE_INTEL] = T("x86"),
885 		[PROCESSOR_ARCHITECTURE_MIPS]  = T("MIPS"),
886 		[PROCESSOR_ARCHITECTURE_ARM]   = T("ARM"),
887 		[PROCESSOR_ARCHITECTURE_IA64]  = T("ia64"),
888 		[PROCESSOR_ARCHITECTURE_AMD64] = T("x86_64"),
889 		[PROCESSOR_ARCHITECTURE_ARM64] = T("ARM64"),
890 	};
891 
892 	if (arch < ARRAY_LEN(descriptions) && descriptions[arch] != NULL)
893 		return descriptions[arch];
894 
895 	return T("unknown");
896 }
897 
898 /* Print information from the WINDOWS element, if present.  */
899 static void
print_windows_info(struct wim_xml_info * info,xmlNode * image_node)900 print_windows_info(struct wim_xml_info *info, xmlNode *image_node)
901 {
902 	xmlNode *windows_node;
903 	xmlNode *langs_node;
904 	xmlNode *version_node;
905 	const tchar *text;
906 
907 	windows_node = xml_get_node_by_path(image_node, "WINDOWS");
908 	if (!windows_node)
909 		return;
910 
911 	tprintf(T("Architecture:           %"TS"\n"),
912 		describe_arch(xml_get_number_by_path(windows_node, "ARCH")));
913 
914 	text = xml_get_ttext_by_path(info, windows_node, "PRODUCTNAME");
915 	if (text)
916 		tprintf(T("Product Name:           %"TS"\n"), text);
917 
918 	text = xml_get_ttext_by_path(info, windows_node, "EDITIONID");
919 	if (text)
920 		tprintf(T("Edition ID:             %"TS"\n"), text);
921 
922 	text = xml_get_ttext_by_path(info, windows_node, "INSTALLATIONTYPE");
923 	if (text)
924 		tprintf(T("Installation Type:      %"TS"\n"), text);
925 
926 	text = xml_get_ttext_by_path(info, windows_node, "HAL");
927 	if (text)
928 		tprintf(T("HAL:                    %"TS"\n"), text);
929 
930 	text = xml_get_ttext_by_path(info, windows_node, "PRODUCTTYPE");
931 	if (text)
932 		tprintf(T("Product Type:           %"TS"\n"), text);
933 
934 	text = xml_get_ttext_by_path(info, windows_node, "PRODUCTSUITE");
935 	if (text)
936 		tprintf(T("Product Suite:          %"TS"\n"), text);
937 
938 	langs_node = xml_get_node_by_path(windows_node, "LANGUAGES");
939 	if (langs_node) {
940 		xmlNode *lang_node;
941 
942 		tprintf(T("Languages:              "));
943 		node_for_each_child(langs_node, lang_node) {
944 			if (!node_is_element(lang_node, "LANGUAGE"))
945 				continue;
946 			text = node_get_ttext(info, lang_node);
947 			if (!text)
948 				continue;
949 			tprintf(T("%"TS" "), text);
950 		}
951 		tputchar(T('\n'));
952 
953 		text = xml_get_ttext_by_path(info, langs_node, "DEFAULT");
954 		if (text)
955 			tprintf(T("Default Language:       %"TS"\n"), text);
956 	}
957 
958 	text = xml_get_ttext_by_path(info, windows_node, "SYSTEMROOT");
959 	if (text)
960 		tprintf(T("System Root:            %"TS"\n"), text);
961 
962 	version_node = xml_get_node_by_path(windows_node, "VERSION");
963 	if (version_node) {
964 		tprintf(T("Major Version:          %"PRIu64"\n"),
965 			xml_get_number_by_path(version_node, "MAJOR"));
966 		tprintf(T("Minor Version:          %"PRIu64"\n"),
967 			xml_get_number_by_path(version_node, "MINOR"));
968 		tprintf(T("Build:                  %"PRIu64"\n"),
969 			xml_get_number_by_path(version_node, "BUILD"));
970 		tprintf(T("Service Pack Build:     %"PRIu64"\n"),
971 			xml_get_number_by_path(version_node, "SPBUILD"));
972 		tprintf(T("Service Pack Level:     %"PRIu64"\n"),
973 			xml_get_number_by_path(version_node, "SPLEVEL"));
974 	}
975 }
976 
977 /* Prints information about the specified image.  */
978 void
xml_print_image_info(struct wim_xml_info * info,int image)979 xml_print_image_info(struct wim_xml_info *info, int image)
980 {
981 	xmlNode * const image_node = info->images[image - 1];
982 	const tchar *text;
983 	tchar timebuf[64];
984 
985 	tprintf(T("Index:                  %d\n"), image);
986 
987 	/* Always print the Name and Description, even if the corresponding XML
988 	 * elements are not present.  */
989 	text = xml_get_ttext_by_path(info, image_node, "NAME");
990 	tprintf(T("Name:                   %"TS"\n"), text ? text : T(""));
991 	text = xml_get_ttext_by_path(info, image_node, "DESCRIPTION");
992 	tprintf(T("Description:            %"TS"\n"), text ? text : T(""));
993 
994 	text = xml_get_ttext_by_path(info, image_node, "DISPLAYNAME");
995 	if (text)
996 		tprintf(T("Display Name:           %"TS"\n"), text);
997 
998 	text = xml_get_ttext_by_path(info, image_node, "DISPLAYDESCRIPTION");
999 	if (text)
1000 		tprintf(T("Display Description:    %"TS"\n"), text);
1001 
1002 	tprintf(T("Directory Count:        %"PRIu64"\n"),
1003 		xml_get_number_by_path(image_node, "DIRCOUNT"));
1004 
1005 	tprintf(T("File Count:             %"PRIu64"\n"),
1006 		xml_get_number_by_path(image_node, "FILECOUNT"));
1007 
1008 	tprintf(T("Total Bytes:            %"PRIu64"\n"),
1009 		xml_get_number_by_path(image_node, "TOTALBYTES"));
1010 
1011 	tprintf(T("Hard Link Bytes:        %"PRIu64"\n"),
1012 		xml_get_number_by_path(image_node, "HARDLINKBYTES"));
1013 
1014 	wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
1015 						       "CREATIONTIME"),
1016 			     timebuf, ARRAY_LEN(timebuf));
1017 	tprintf(T("Creation Time:          %"TS"\n"), timebuf);
1018 
1019 	wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
1020 						       "LASTMODIFICATIONTIME"),
1021 			     timebuf, ARRAY_LEN(timebuf));
1022 	tprintf(T("Last Modification Time: %"TS"\n"), timebuf);
1023 
1024 	print_windows_info(info, image_node);
1025 
1026 	text = xml_get_ttext_by_path(info, image_node, "FLAGS");
1027 	if (text)
1028 		tprintf(T("Flags:                  %"TS"\n"), text);
1029 
1030 	tprintf(T("WIMBoot compatible:     %"TS"\n"),
1031 		xml_get_number_by_path(image_node, "WIMBOOT") ?
1032 			T("yes") : T("no"));
1033 
1034 	tputchar('\n');
1035 }
1036 
1037 /*----------------------------------------------------------------------------*
1038  *                      Reading and writing the XML data                      *
1039  *----------------------------------------------------------------------------*/
1040 
1041 static int
image_node_get_index(xmlNode * node)1042 image_node_get_index(xmlNode *node)
1043 {
1044 	u64 v = node_get_number((const xmlNode *)xmlHasProp(node, "INDEX"), 10);
1045 	return min(v, INT_MAX);
1046 }
1047 
1048 /* Prepare the 'images' array from the XML document tree.  */
1049 static int
setup_images(struct wim_xml_info * info,xmlNode * root)1050 setup_images(struct wim_xml_info *info, xmlNode *root)
1051 {
1052 	xmlNode *child;
1053 	int index;
1054 	int max_index = 0;
1055 	int ret;
1056 
1057 	info->images = NULL;
1058 	info->image_count = 0;
1059 
1060 	node_for_each_child(root, child) {
1061 		if (!node_is_element(child, "IMAGE"))
1062 			continue;
1063 		index = image_node_get_index(child);
1064 		if (unlikely(index < 1 || info->image_count >= MAX_IMAGES))
1065 			goto err_indices;
1066 		max_index = max(max_index, index);
1067 		info->image_count++;
1068 	}
1069 	if (unlikely(max_index != info->image_count))
1070 		goto err_indices;
1071 	ret = WIMLIB_ERR_NOMEM;
1072 	info->images = CALLOC(info->image_count, sizeof(info->images[0]));
1073 	if (unlikely(!info->images))
1074 		goto err;
1075 	node_for_each_child(root, child) {
1076 		if (!node_is_element(child, "IMAGE"))
1077 			continue;
1078 		index = image_node_get_index(child);
1079 		if (unlikely(info->images[index - 1]))
1080 			goto err_indices;
1081 		info->images[index - 1] = child;
1082 	}
1083 	return 0;
1084 
1085 err_indices:
1086 	ERROR("The WIM file's XML document does not contain exactly one IMAGE "
1087 	      "element per image!");
1088 	ret = WIMLIB_ERR_XML;
1089 err:
1090 	FREE(info->images);
1091 	return ret;
1092 }
1093 
1094 /* Reads the XML data from a WIM file.  */
1095 int
read_wim_xml_data(WIMStruct * wim)1096 read_wim_xml_data(WIMStruct *wim)
1097 {
1098 	struct wim_xml_info *info;
1099 	void *buf;
1100 	size_t bufsize;
1101 	xmlDoc *doc;
1102 	xmlNode *root;
1103 	int ret;
1104 
1105 	/* Allocate the 'struct wim_xml_info'.  */
1106 	ret = WIMLIB_ERR_NOMEM;
1107 	info = alloc_wim_xml_info();
1108 	if (!info)
1109 		goto err;
1110 
1111 	/* Read the raw UTF-16LE bytes.  */
1112 	ret = wimlib_get_xml_data(wim, &buf, &bufsize);
1113 	if (ret)
1114 		goto err_free_info;
1115 
1116 	/* Parse the document with libxml2, creating the document tree.  */
1117 	doc = xmlReadMemory(buf, bufsize, NULL, "UTF-16LE", XML_PARSE_NONET);
1118 	FREE(buf);
1119 	buf = NULL;
1120 	if (!doc) {
1121 		ERROR("Unable to parse the WIM file's XML document!");
1122 		ret = WIMLIB_ERR_XML;
1123 		goto err_free_info;
1124 	}
1125 
1126 	/* Verify the root element.  */
1127 	root = xmlDocGetRootElement(doc);
1128 	if (!node_is_element(root, "WIM")) {
1129 		ERROR("The WIM file's XML document has an unexpected format!");
1130 		ret = WIMLIB_ERR_XML;
1131 		goto err_free_doc;
1132 	}
1133 
1134 	/* Verify the WIM file is not encrypted.  */
1135 	if (xml_get_node_by_path(root, "ESD/ENCRYPTED")) {
1136 		ret = WIMLIB_ERR_WIM_IS_ENCRYPTED;
1137 		goto err_free_doc;
1138 	}
1139 
1140 	/* Validate the image elements and set up the images[] array.  */
1141 	ret = setup_images(info, root);
1142 	if (ret)
1143 		goto err_free_doc;
1144 
1145 	/* Save the document and return.  */
1146 	info->doc = doc;
1147 	info->root = root;
1148 	wim->xml_info = info;
1149 	return 0;
1150 
1151 err_free_doc:
1152 	xmlFreeDoc(doc);
1153 err_free_info:
1154 	FREE(info);
1155 err:
1156 	return ret;
1157 }
1158 
1159 /* Swap the INDEX attributes of two IMAGE elements.  */
1160 static void
swap_index_attributes(xmlNode * image_node_1,xmlNode * image_node_2)1161 swap_index_attributes(xmlNode *image_node_1, xmlNode *image_node_2)
1162 {
1163 	xmlAttr *attr_1, *attr_2;
1164 
1165 	if (image_node_1 != image_node_2) {
1166 		attr_1 = unlink_index_attribute(image_node_1);
1167 		attr_2 = unlink_index_attribute(image_node_2);
1168 		xmlAddChild(image_node_1, (xmlNode *)attr_2);
1169 		xmlAddChild(image_node_2, (xmlNode *)attr_1);
1170 	}
1171 }
1172 
1173 static int
prepare_document_for_write(struct wim_xml_info * info,int image,u64 total_bytes,xmlNode ** orig_totalbytes_node_ret)1174 prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes,
1175 			   xmlNode **orig_totalbytes_node_ret)
1176 {
1177 	xmlNode *totalbytes_node = NULL;
1178 
1179 	/* Allocate the new TOTALBYTES element if needed.  */
1180 	if (total_bytes != WIM_TOTALBYTES_USE_EXISTING &&
1181 	    total_bytes != WIM_TOTALBYTES_OMIT) {
1182 		totalbytes_node = new_element_with_u64(NULL, "TOTALBYTES",
1183 						       total_bytes);
1184 		if (!totalbytes_node)
1185 			return WIMLIB_ERR_NOMEM;
1186 	}
1187 
1188 	/* Adjust the IMAGE elements if needed.  */
1189 	if (image != WIMLIB_ALL_IMAGES) {
1190 		/* We're writing a single image only.  Temporarily unlink all
1191 		 * other IMAGE elements from the document.  */
1192 		for (int i = 0; i < info->image_count; i++)
1193 			if (i + 1 != image)
1194 				xmlUnlinkNode(info->images[i]);
1195 
1196 		/* Temporarily set the INDEX attribute of the needed IMAGE
1197 		 * element to 1.  */
1198 		swap_index_attributes(info->images[0], info->images[image - 1]);
1199 	}
1200 
1201 	/* Adjust (add, change, or remove) the TOTALBYTES element if needed.  */
1202 	*orig_totalbytes_node_ret = NULL;
1203 	if (total_bytes != WIM_TOTALBYTES_USE_EXISTING) {
1204 		/* Unlink the previous TOTALBYTES element, if any.  */
1205 		*orig_totalbytes_node_ret = xml_get_node_by_path(info->root,
1206 								 "TOTALBYTES");
1207 		if (*orig_totalbytes_node_ret)
1208 			xmlUnlinkNode(*orig_totalbytes_node_ret);
1209 
1210 		/* Link in the new TOTALBYTES element, if any.  */
1211 		if (totalbytes_node)
1212 			xmlAddChild(info->root, totalbytes_node);
1213 	}
1214 	return 0;
1215 }
1216 
1217 static void
restore_document_after_write(struct wim_xml_info * info,int image,xmlNode * orig_totalbytes_node)1218 restore_document_after_write(struct wim_xml_info *info, int image,
1219 			     xmlNode *orig_totalbytes_node)
1220 {
1221 	/* Restore the IMAGE elements if needed.  */
1222 	if (image != WIMLIB_ALL_IMAGES) {
1223 		/* We wrote a single image only.  Re-link all other IMAGE
1224 		 * elements to the document.  */
1225 		for (int i = 0; i < info->image_count; i++)
1226 			if (i + 1 != image)
1227 				xmlAddChild(info->root, info->images[i]);
1228 
1229 		/* Restore the original INDEX attributes.  */
1230 		swap_index_attributes(info->images[0], info->images[image - 1]);
1231 	}
1232 
1233 	/* Restore the original TOTALBYTES element if needed.  */
1234 	if (orig_totalbytes_node)
1235 		node_replace_child_element(info->root, orig_totalbytes_node);
1236 }
1237 
1238 /*
1239  * Writes the XML data to a WIM file.
1240  *
1241  * 'image' specifies the image(s) to include in the XML data.  Normally it is
1242  * WIMLIB_ALL_IMAGES, but it can also be a 1-based image index.
1243  *
1244  * 'total_bytes' is the number to use in the top-level TOTALBYTES element, or
1245  * WIM_TOTALBYTES_USE_EXISTING to use the existing value from the XML document
1246  * (if any), or WIM_TOTALBYTES_OMIT to omit the TOTALBYTES element entirely.
1247  */
1248 int
write_wim_xml_data(WIMStruct * wim,int image,u64 total_bytes,struct wim_reshdr * out_reshdr,int write_resource_flags)1249 write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
1250 		   struct wim_reshdr *out_reshdr, int write_resource_flags)
1251 {
1252 	struct wim_xml_info *info = wim->xml_info;
1253 	long ret;
1254 	long ret2;
1255 	xmlBuffer *buffer;
1256 	xmlNode *orig_totalbytes_node;
1257 	xmlSaveCtxt *save_ctx;
1258 
1259 	/* Make any needed temporary changes to the document.  */
1260 	ret = prepare_document_for_write(info, image, total_bytes,
1261 					 &orig_totalbytes_node);
1262 	if (ret)
1263 		goto out;
1264 
1265 	/* Create an in-memory buffer to hold the encoded document.  */
1266 	ret = WIMLIB_ERR_NOMEM;
1267 	buffer = xmlBufferCreate();
1268 	if (!buffer)
1269 		goto out_restore_document;
1270 
1271 	/* Encode the document in UTF-16LE, with a byte order mark, and with no
1272 	 * XML declaration.  Some other WIM software requires all of these
1273 	 * characteristics.  */
1274 	ret = WIMLIB_ERR_NOMEM;
1275 	if (xmlBufferCat(buffer, "\xff\xfe"))
1276 		goto out_free_buffer;
1277 	save_ctx = xmlSaveToBuffer(buffer, "UTF-16LE", XML_SAVE_NO_DECL);
1278 	if (!save_ctx)
1279 		goto out_free_buffer;
1280 	ret = xmlSaveDoc(save_ctx, info->doc);
1281 	ret2 = xmlSaveClose(save_ctx);
1282 	if (ret < 0 || ret2 < 0) {
1283 		ERROR("Unable to serialize the WIM file's XML document!");
1284 		ret = WIMLIB_ERR_NOMEM;
1285 		goto out_free_buffer;
1286 	}
1287 
1288 	/* Write the XML data uncompressed.  Although wimlib can handle
1289 	 * compressed XML data, some other WIM software cannot.  */
1290 	ret = write_wim_resource_from_buffer(xmlBufferContent(buffer),
1291 					     xmlBufferLength(buffer),
1292 					     true,
1293 					     &wim->out_fd,
1294 					     WIMLIB_COMPRESSION_TYPE_NONE,
1295 					     0,
1296 					     out_reshdr,
1297 					     NULL,
1298 					     write_resource_flags);
1299 out_free_buffer:
1300 	xmlBufferFree(buffer);
1301 out_restore_document:
1302 	/* Revert any temporary changes we made to the document.  */
1303 	restore_document_after_write(info, image, orig_totalbytes_node);
1304 out:
1305 	return ret;
1306 }
1307 
1308 /*----------------------------------------------------------------------------*
1309  *                           Global setup functions                           *
1310  *----------------------------------------------------------------------------*/
1311 
1312 void
xml_global_init(void)1313 xml_global_init(void)
1314 {
1315 	xmlInitParser();
1316 }
1317 
1318 void
xml_global_cleanup(void)1319 xml_global_cleanup(void)
1320 {
1321 	xmlCleanupParser();
1322 }
1323 
1324 void
xml_set_memory_allocator(void * (* malloc_func)(size_t),void (* free_func)(void *),void * (* realloc_func)(void *,size_t))1325 xml_set_memory_allocator(void *(*malloc_func)(size_t),
1326 			 void (*free_func)(void *),
1327 			 void *(*realloc_func)(void *, size_t))
1328 {
1329 	xmlMemSetup(free_func, malloc_func, realloc_func, wimlib_strdup);
1330 }
1331 
1332 /*----------------------------------------------------------------------------*
1333  *                           Library API functions                            *
1334  *----------------------------------------------------------------------------*/
1335 
1336 WIMLIBAPI int
wimlib_get_xml_data(WIMStruct * wim,void ** buf_ret,size_t * bufsize_ret)1337 wimlib_get_xml_data(WIMStruct *wim, void **buf_ret, size_t *bufsize_ret)
1338 {
1339 	const struct wim_reshdr *xml_reshdr;
1340 
1341 	if (wim->filename == NULL && filedes_is_seekable(&wim->in_fd))
1342 		return WIMLIB_ERR_NO_FILENAME;
1343 
1344 	if (buf_ret == NULL || bufsize_ret == NULL)
1345 		return WIMLIB_ERR_INVALID_PARAM;
1346 
1347 	xml_reshdr = &wim->hdr.xml_data_reshdr;
1348 
1349 	*bufsize_ret = xml_reshdr->uncompressed_size;
1350 	return wim_reshdr_to_data(xml_reshdr, wim, buf_ret);
1351 }
1352 
1353 WIMLIBAPI int
wimlib_extract_xml_data(WIMStruct * wim,FILE * fp)1354 wimlib_extract_xml_data(WIMStruct *wim, FILE *fp)
1355 {
1356 	int ret;
1357 	void *buf;
1358 	size_t bufsize;
1359 
1360 	ret = wimlib_get_xml_data(wim, &buf, &bufsize);
1361 	if (ret)
1362 		return ret;
1363 
1364 	if (fwrite(buf, 1, bufsize, fp) != bufsize) {
1365 		ERROR_WITH_ERRNO("Failed to extract XML data");
1366 		ret = WIMLIB_ERR_WRITE;
1367 	}
1368 	FREE(buf);
1369 	return ret;
1370 }
1371 
1372 static bool
image_name_in_use(const WIMStruct * wim,const tchar * name,int excluded_image)1373 image_name_in_use(const WIMStruct *wim, const tchar *name, int excluded_image)
1374 {
1375 	const struct wim_xml_info *info = wim->xml_info;
1376 	const xmlChar *name_utf8;
1377 	bool found = false;
1378 
1379 	/* Any number of images can have "no name".  */
1380 	if (!name || !*name)
1381 		return false;
1382 
1383 	/* Check for images that have the specified name.  */
1384 	if (tstr_get_utf8(name, &name_utf8))
1385 		return false;
1386 	for (int i = 0; i < info->image_count && !found; i++) {
1387 		if (i + 1 == excluded_image)
1388 			continue;
1389 		found = xmlStrEqual(name_utf8, xml_get_text_by_path(
1390 						    info->images[i], "NAME"));
1391 	}
1392 	tstr_put_utf8(name_utf8);
1393 	return found;
1394 }
1395 
1396 WIMLIBAPI bool
wimlib_image_name_in_use(const WIMStruct * wim,const tchar * name)1397 wimlib_image_name_in_use(const WIMStruct *wim, const tchar *name)
1398 {
1399 	return image_name_in_use(wim, name, WIMLIB_NO_IMAGE);
1400 }
1401 
1402 WIMLIBAPI const tchar *
wimlib_get_image_name(const WIMStruct * wim,int image)1403 wimlib_get_image_name(const WIMStruct *wim, int image)
1404 {
1405 	const struct wim_xml_info *info = wim->xml_info;
1406 	const tchar *name;
1407 
1408 	if (image < 1 || image > info->image_count)
1409 		return NULL;
1410 	name = wimlib_get_image_property(wim, image, T("NAME"));
1411 	return name ? name : T("");
1412 }
1413 
1414 WIMLIBAPI const tchar *
wimlib_get_image_description(const WIMStruct * wim,int image)1415 wimlib_get_image_description(const WIMStruct *wim, int image)
1416 {
1417 	return wimlib_get_image_property(wim, image, T("DESCRIPTION"));
1418 }
1419 
1420 WIMLIBAPI const tchar *
wimlib_get_image_property(const WIMStruct * wim,int image,const tchar * property_name)1421 wimlib_get_image_property(const WIMStruct *wim, int image,
1422 			  const tchar *property_name)
1423 {
1424 	const xmlChar *name;
1425 	const tchar *value;
1426 	struct wim_xml_info *info = wim->xml_info;
1427 
1428 	if (!property_name || !*property_name)
1429 		return NULL;
1430 	if (image < 1 || image > info->image_count)
1431 		return NULL;
1432 	if (tstr_get_utf8(property_name, &name))
1433 		return NULL;
1434 	value = xml_get_ttext_by_path(info, info->images[image - 1], name);
1435 	tstr_put_utf8(name);
1436 	return value;
1437 }
1438 
1439 WIMLIBAPI int
wimlib_set_image_name(WIMStruct * wim,int image,const tchar * name)1440 wimlib_set_image_name(WIMStruct *wim, int image, const tchar *name)
1441 {
1442 	return wimlib_set_image_property(wim, image, T("NAME"), name);
1443 }
1444 
1445 WIMLIBAPI int
wimlib_set_image_descripton(WIMStruct * wim,int image,const tchar * description)1446 wimlib_set_image_descripton(WIMStruct *wim, int image, const tchar *description)
1447 {
1448 	return wimlib_set_image_property(wim, image, T("DESCRIPTION"), description);
1449 }
1450 
1451 WIMLIBAPI int
wimlib_set_image_flags(WIMStruct * wim,int image,const tchar * flags)1452 wimlib_set_image_flags(WIMStruct *wim, int image, const tchar *flags)
1453 {
1454 	return wimlib_set_image_property(wim, image, T("FLAGS"), flags);
1455 }
1456 
1457 WIMLIBAPI int
wimlib_set_image_property(WIMStruct * wim,int image,const tchar * property_name,const tchar * property_value)1458 wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
1459 			  const tchar *property_value)
1460 {
1461 	const xmlChar *name;
1462 	struct wim_xml_info *info = wim->xml_info;
1463 	int ret;
1464 
1465 	if (!property_name || !*property_name)
1466 		return WIMLIB_ERR_INVALID_PARAM;
1467 
1468 	if (image < 1 || image > info->image_count)
1469 		return WIMLIB_ERR_INVALID_IMAGE;
1470 
1471 	if (!tstrcmp(property_name, T("NAME")) &&
1472 	    image_name_in_use(wim, property_value, image))
1473 		return WIMLIB_ERR_IMAGE_NAME_COLLISION;
1474 
1475 	ret = tstr_get_utf8(property_name, &name);
1476 	if (ret)
1477 		return ret;
1478 	ret = xml_set_ttext_by_path(info->images[image - 1], name, property_value);
1479 	tstr_put_utf8(name);
1480 	return ret;
1481 }
1482