1 /* Copyright (C) 2009 Wildfire Games.
2  * This file is part of 0 A.D.
3  *
4  * 0 A.D. is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * 0 A.D. is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with 0 A.D.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "AtlasObject.h"
19 #include "AtlasObjectImpl.h"
20 
21 #include <cassert>
22 #include <cstring>
23 
24 #include <memory>
25 #include <fstream>
26 
27 #include <libxml/parser.h>
28 
29 // UTF conversion code adapted from http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
30 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
31 static const char trailingBytesForUTF8[256] = {
32 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
33 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
34 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
35 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
36 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
37 	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
38 	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
39 	2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
40 static const unsigned long offsetsFromUTF8[6] = {
41 	0x00000000UL, 0x00003080UL, 0x000E2080UL,
42 	0x03C82080UL, 0xFA082080UL, 0x82082080UL };
43 class toXmlChar
44 {
45 public:
toXmlChar(const std::wstring & str)46 	toXmlChar(const std::wstring& str)
47 	{
48 		for (size_t i = 0; i < str.length(); ++i)
49 		{
50 			unsigned short bytesToWrite;
51 			wchar_t ch = str[i];
52 
53 			if (ch < 0x80) bytesToWrite = 1;
54 			else if (ch < 0x800) bytesToWrite = 2;
55 			else if (ch < 0x10000) bytesToWrite = 3;
56 			else if (ch < 0x110000) bytesToWrite = 4;
57 			else bytesToWrite = 3, ch = 0xFFFD; // replacement character
58 
59 			char buf[4];
60 			char* target = &buf[bytesToWrite];
61 			// GCC sometimes warns "array subscript is above array bounds [-Warray-bounds]"
62 			// for the above line, which is a false positive - the C++ standard allows a
63 			// pointer to just after the last element in an array, as long as it's not
64 			// dereferenced (which it isn't here)
65 			switch (bytesToWrite)
66 			{
67 			case 4: *--target = ((ch | 0x80) & 0xBF); ch >>= 6;
68 			case 3: *--target = ((ch | 0x80) & 0xBF); ch >>= 6;
69 			case 2: *--target = ((ch | 0x80) & 0xBF); ch >>= 6;
70 			case 1: *--target = (char)(ch | firstByteMark[bytesToWrite]);
71 			}
72 			data += std::string(buf, bytesToWrite);
73 		}
74 	}
operator const xmlChar*()75 	operator const xmlChar*()
76 	{
77 		return (const xmlChar*)data.c_str();
78 	}
79 
80 private:
81 	std::string data;
82 };
83 
fromXmlChar(const xmlChar * str)84 std::wstring fromXmlChar(const xmlChar* str)
85 {
86 	std::wstring result;
87 	const xmlChar* source = str;
88 	const xmlChar* sourceEnd = str + strlen((const char*)str);
89 	while (source < sourceEnd)
90 	{
91 		unsigned long ch = 0;
92 		int extraBytesToRead = trailingBytesForUTF8[*source];
93 		assert(source + extraBytesToRead < sourceEnd);
94 		switch (extraBytesToRead)
95 		{
96 		case 5: ch += *source++; ch <<= 6;
97 		case 4: ch += *source++; ch <<= 6;
98 		case 3: ch += *source++; ch <<= 6;
99 		case 2: ch += *source++; ch <<= 6;
100 		case 1: ch += *source++; ch <<= 6;
101 		case 0: ch += *source++;
102 		}
103 		ch -= offsetsFromUTF8[extraBytesToRead];
104 		// Make sure it fits in a 16-bit wchar_t
105 		if (ch > 0xFFFF)
106 			ch = 0xFFFD;
107 
108 		result += (wchar_t)ch;
109 	}
110 	return result;
111 }
112 
113 // TODO: replace most of the asserts below (e.g. for when it fails to load
114 // a file) with some proper logging/reporting system
115 
116 static AtSmartPtr<AtNode> ConvertNode(xmlNodePtr node);
117 
LoadFromXML(const std::string & xml)118 AtObj AtlasObject::LoadFromXML(const std::string& xml)
119 {
120 	xmlDocPtr doc = xmlReadMemory(xml.c_str(), xml.length(), "noname.xml", NULL, XML_PARSE_NONET|XML_PARSE_NOCDATA);
121 	if (doc == NULL)
122 		return AtObj();
123 		// TODO: Need to report the error message somehow
124 
125 	xmlNodePtr root = xmlDocGetRootElement(doc);
126 	AtObj obj;
127 	obj.p = ConvertNode(root);
128 
129 	AtObj rootObj;
130 	rootObj.set((const char*)root->name, obj);
131 
132 	xmlFreeDoc(doc);
133 
134 	return rootObj;
135 }
136 
137 // Convert from a DOMElement to an AtNode
ConvertNode(xmlNodePtr node)138 static AtSmartPtr<AtNode> ConvertNode(xmlNodePtr node)
139 {
140 	AtSmartPtr<AtNode> obj (new AtNode());
141 
142 	// Loop through all attributes
143 	for (xmlAttrPtr cur_attr = node->properties; cur_attr; cur_attr = cur_attr->next)
144 	{
145 		std::string name ("@");
146 		name += (const char*)cur_attr->name;
147 		xmlChar* content = xmlNodeGetContent(cur_attr->children);
148 		std::wstring value (fromXmlChar(content));
149 		xmlFree(content);
150 
151 		AtNode* newNode = new AtNode(value.c_str());
152 		obj->children.insert(AtNode::child_pairtype(
153 			name.c_str(), AtNode::Ptr(newNode)
154 		));
155 	}
156 
157 	// Loop through all child elements
158 	for (xmlNodePtr cur_node = node->children; cur_node; cur_node = cur_node->next)
159 	{
160 		if (cur_node->type == XML_ELEMENT_NODE)
161 		{
162 			obj->children.insert(AtNode::child_pairtype(
163 				(const char*)cur_node->name, ConvertNode(cur_node)
164 			));
165 		}
166 		else if (cur_node->type == XML_TEXT_NODE)
167 		{
168 			xmlChar* content = xmlNodeGetContent(cur_node);
169 			std::wstring value (fromXmlChar(content));
170 			xmlFree(content);
171 			obj->value += value;
172 		}
173 	}
174 
175 	// Trim whitespace surrounding the string value
176 	const std::wstring whitespace = L" \t\r\n";
177 	size_t first = obj->value.find_first_not_of(whitespace);
178 	if (first == std::wstring::npos)
179 		obj->value = L"";
180 	else
181 	{
182 		size_t last = obj->value.find_last_not_of(whitespace);
183 		obj->value = obj->value.substr(first, 1+last-first);
184 	}
185 
186 	return obj;
187 }
188 
189 // Build a DOM node from a given AtNode
BuildDOMNode(xmlDocPtr doc,xmlNodePtr node,AtNode::Ptr p)190 static void BuildDOMNode(xmlDocPtr doc, xmlNodePtr node, AtNode::Ptr p)
191 {
192 	if (p)
193 	{
194 		if (p->value.length())
195 			xmlNodeAddContent(node, toXmlChar(p->value));
196 
197 		for (AtNode::child_maptype::const_iterator it = p->children.begin(); it != p->children.end(); ++it)
198 		{
199 			// Test for attribute nodes (whose names start with @)
200 			if (it->first.length() && it->first[0] == '@')
201 			{
202 				assert(it->second);
203 				assert(it->second->children.empty());
204 				xmlNewProp(node, (const xmlChar*)it->first.c_str()+1, toXmlChar(it->second->value));
205 			}
206 			else
207 			{
208 				if (node == NULL) // first node in the document - needs to be made the root node
209 				{
210 					xmlNodePtr root = xmlNewNode(NULL, (const xmlChar*)it->first.c_str());
211 					xmlDocSetRootElement(doc, root);
212 					BuildDOMNode(doc, root, it->second);
213 				}
214 				else
215 				{
216 					xmlNodePtr child = xmlNewChild(node, NULL, (const xmlChar*)it->first.c_str(), NULL);
217 					BuildDOMNode(doc, child, it->second);
218 				}
219 			}
220 		}
221 	}
222 }
223 
SaveToXML(AtObj & obj)224 std::string AtlasObject::SaveToXML(AtObj& obj)
225 {
226 	if (!obj.p || obj.p->children.size() != 1)
227 	{
228 		assert(! "SaveToXML: root must only have one child");
229 		return "";
230 	}
231 
232 	AtNode::Ptr firstChild (obj.p->children.begin()->second);
233 
234 	xmlDocPtr doc = xmlNewDoc((const xmlChar*)"1.0");
235 	BuildDOMNode(doc, NULL, obj.p);
236 
237 	xmlChar* buf;
238 	int size;
239 	xmlDocDumpFormatMemoryEnc(doc, &buf, &size, "utf-8", 1);
240 
241 	std::string ret((const char*)buf, size);
242 
243 	xmlFree(buf);
244 	xmlFreeDoc(doc);
245 
246 	// TODO: handle errors better
247 
248 	return ret;
249 }
250