1 
2  //
3  // XML storage C++ classes version 1.3
4  //
5  // Copyright (c) 2006, 2007, 2008, 2009 Martin Fuchs <martin-fuchs@gmx.net>
6  //
7 
8  /// \file xs-native.cpp
9  /// native internal XMLStorage parser
10 
11 
12 /*
13 
14   All rights reserved.
15 
16   Redistribution and use in source and binary forms, with or without
17   modification, are permitted provided that the following conditions are met:
18 
19   * Redistributions of source code must retain the above copyright
20 	notice, this list of conditions and the following disclaimer.
21   * Redistributions in binary form must reproduce the above copyright
22 	notice, this list of conditions and the following disclaimer in
23 	the documentation and/or other materials provided with the
24 	distribution.
25 
26   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36   POSSIBILITY OF SUCH DAMAGE.
37 
38 */
39 
40 #include <precomp.h>
41 
42 #ifndef XS_NO_COMMENT
43 #define XS_NO_COMMENT	// no #pragma comment(lib, ...) statements in .lib files to enable static linking
44 #endif
45 
46 //#include "xmlstorage.h"
47 
48 
49 #if !defined(XS_USE_EXPAT) && !defined(XS_USE_XERCES)
50 
51 namespace XMLStorage {
52 
53 
~XMLReaderBase()54 XMLReaderBase::~XMLReaderBase()
55 {
56 }
57 
58  /// read XML stream into XML tree below _pos
read()59 void XMLReaderBase::read()
60 {
61 	if (!parse()) {
62 		XMLError error;
63 
64 		error._message = "XML parsing error";
65 		//error._line = ;
66 		//error._column = ;
67 
68 		_errors.push_back(error);
69 	}
70 
71 	finish_read();
72 }
73 
74 
75  /// line buffer for XS-native parser
76 
77 struct Buffer
78 {
BufferXMLStorage::Buffer79 	Buffer()
80 	{
81 		_buffer = (char*) malloc(BUFFER_LEN);
82 		_len = BUFFER_LEN;
83 
84 		reset();
85 	}
86 
~BufferXMLStorage::Buffer87 	~Buffer()
88 	{
89 		free(_buffer);
90 	}
91 
resetXMLStorage::Buffer92 	void reset()
93 	{
94 		_wptr = _buffer;
95 		_buffer_str.erase();
96 	}
97 
appendXMLStorage::Buffer98 	void append(int c)
99 	{
100 		size_t wpos = _wptr-_buffer;
101 
102 		if (wpos >= _len) {
103 			_len <<= 1;
104 			_buffer = (char*) realloc(_buffer, _len);
105 			_wptr = _buffer + wpos;
106 		}
107 
108 		*_wptr++ = static_cast<char>(c);
109 	}
110 
strXMLStorage::Buffer111 	const std::string& str(bool utf8)	// returns UTF-8 encoded buffer content
112 	{
113 #if defined(_WIN32) && !defined(XS_STRING_UTF8)
114 		if (utf8)
115 #endif
116 			_buffer_str.assign(_buffer, _wptr-_buffer);
117 #if defined(_WIN32) && !defined(XS_STRING_UTF8)
118 		else
119 			_buffer_str = get_utf8(_buffer, _wptr-_buffer);
120 #endif
121 
122 		return _buffer_str;
123 	}
124 
lenXMLStorage::Buffer125 	size_t len() const
126 	{
127 		return _wptr - _buffer;
128 	}
129 
has_CDEndXMLStorage::Buffer130 	bool has_CDEnd() const
131 	{
132 		//if (_wptr-_buffer < 3)
133 		//	return false;
134 
135 		return !strncmp(_wptr-3, CDATA_END, 3);
136 	}
137 
get_tagXMLStorage::Buffer138 	XS_String get_tag() const
139 	{
140 		const char* p = _buffer_str.c_str();
141 
142 		if (*p == '<')
143 			++p;
144 
145 		if (*p == '/')
146 			++p;
147 
148 		const char* q = p;
149 
150 		if (*q == '?')
151 			++q;
152 
153 		q = get_xmlsym_end_utf8(q);
154 
155 #ifdef XS_STRING_UTF8
156 		return XS_String(p, q-p);
157 #else
158 		XS_String tag;
159 		assign_utf8(tag, p, q-p);
160 		return tag;
161 #endif
162 	}
163 
164 	 /// read attributes and values
get_attributesXMLStorage::Buffer165 	void get_attributes(XMLNode::AttributeMap& attributes) const
166 	{
167 		const char* p = _buffer_str.c_str();
168 
169 		 // find end of tag name
170 		if (*p == '<')
171 			++p;
172 
173 		if (*p == '/')
174 			++p;
175 		else if (*p == '?')
176 			++p;
177 
178 		p = get_xmlsym_end_utf8(p);
179 
180 		 // read attributes from buffer
181 		while(*p && *p!='>' && *p!='/') {
182 			while(isspace((unsigned char)*p))
183 				++p;
184 
185 			const char* attr_name = p;
186 
187 			p = get_xmlsym_end_utf8(p);
188 
189 			if (*p != '=')
190 				break;	//@TODO error handling
191 
192 			size_t attr_len = p - attr_name;
193 
194 			if (*++p!='"' && *p!='\'')
195 				break;	//@TODO error handling
196 
197 			char delim = *p;
198 			const char* value = ++p;
199 
200 			while(*p && *p!=delim)
201 				++p;
202 
203 			size_t value_len = p - value;
204 
205 			if (*p)
206 				++p;	// '"'
207 
208 #ifdef XS_STRING_UTF8
209 			XS_String name_str(attr_name, attr_len);
210 #else
211 			XS_String name_str;
212 			assign_utf8(name_str, attr_name, attr_len);
213 #endif
214 
215 			attributes[name_str] = DecodeXMLString(std::string(value, value_len));
216 		}
217 	}
218 
219 protected:
220 	char*	_buffer;
221 	char*	_wptr;
222 	size_t	_len;
223 	std::string	_buffer_str;	// UTF-8 encoded
224 };
225 
parse()226 bool XMLReaderBase::parse()
227 {
228 	Buffer buffer;
229 	int c = get();
230 	bool in_comment = false;
231 
232 	while(c != EOF) {
233 		if (in_comment || c=='<') {
234 			buffer.append(c);
235 
236 			 // read start or end tag
237 			for(;;) {
238 				c = get();
239 
240 				if (c == EOF)
241 					break;
242 
243 				buffer.append(c);
244 
245 				if (c == '>')
246 					break;
247 			}
248 
249 			const std::string& b = buffer.str(_utf8);
250 			const char* str = b.c_str();
251 
252 			if (in_comment || !strncmp(str+1, "!--", 3)) {
253 				 // XML comment
254 				DefaultHandler(b);
255 
256 				if (strcmp(str+b.length()-3, "-->"))
257 					in_comment = true;
258 				else
259 					in_comment = false;
260 
261 				c = get();
262 			} else if (str[1] == '/') {
263 				 // end tag
264 
265 				/*@TODO error handling
266 				const XS_String& tag = buffer.get_tag();
267 
268 					if (tag != last_opened_tag) {
269 						ERROR
270 					}
271 				*/
272 
273 				EndElementHandler();
274 
275 				c = get();
276 			} else if (str[1] == '?') {
277 				 // XML declaration
278 				const XS_String& tag = buffer.get_tag();
279 
280 				if (tag == "?xml") {
281 					XMLNode::AttributeMap attributes;
282 					buffer.get_attributes(attributes);
283 
284 					const std::string& version = attributes.get("version");
285 					const std::string& encoding = attributes.get("encoding");
286 
287 					int standalone;
288 					XMLNode::AttributeMap::const_iterator found =	// const_cast for ISO C++ compatibility error of GCC
289 							const_cast<const XMLNode::AttributeMap&>(attributes).find("standalone");
290 					if (found != attributes.end())
291 						standalone = !XS_icmp(found->second.c_str(), XS_TEXT("yes"));
292 					else
293 						standalone = -1;
294 
295 					XmlDeclHandler(version.empty()?NULL:version.c_str(), encoding.empty()?NULL:encoding.c_str(), standalone);
296 
297 					if (!encoding.empty() && !_stricmp(encoding.c_str(), "utf-8"))
298 						_utf8 = true;
299 
300 					c = eat_endl();
301 				} else if (tag == "?xml-stylesheet") {
302 					XMLNode::AttributeMap attributes;
303 					buffer.get_attributes(attributes);
304 
305 					StyleSheet stylesheet(attributes.get("href"), attributes.get("type"), !XS_icmp(attributes.get("alternate"), XS_TEXT("yes")));
306 					stylesheet._title = attributes.get("title");
307 					stylesheet._media = attributes.get("media");
308 					stylesheet._charset = attributes.get("charset");
309 
310 					_format._stylesheets.push_back(stylesheet);
311 
312 					c = eat_endl();
313 				} else {
314 					DefaultHandler(b);
315 					c = get();
316 				}
317 			} else if (str[1] == '!') {
318 				if (!strncmp(str+2, "DOCTYPE ", 8)) {
319 					_format._doctype.parse(str+10);
320 
321 					c = eat_endl();
322 				} else if (!strncmp(str+2, "[CDATA[", 7)) {	// see CDATA_START
323 					 // parse <![CDATA[ ... ]]> strings
324 					while(!buffer.has_CDEnd()) {
325 						c = get();
326 
327 						if (c == EOF)
328 							break;
329 
330 						buffer.append(c);
331 					}
332 
333 					DefaultHandler(buffer.str(_utf8));
334 
335 					c = get();
336 				}
337 			} else {
338 				 // start tag
339 				const XS_String& tag = buffer.get_tag();
340 
341 				if (!tag.empty()) {
342 				    XMLNode::AttributeMap attributes;
343 				    buffer.get_attributes(attributes);
344 
345 				    StartElementHandler(tag, attributes);
346 
347 				    if (str[b.length()-2] == '/')
348 					    EndElementHandler();
349 			    }
350 
351 				c = get();
352 			}
353 		} else {
354 			buffer.append(c);
355 
356 			 // read white space
357 			for(;;) {
358 				 // check for the encoding of the first line end
359 				if (!_endl_defined) {
360 					if (c == '\n') {
361 						_format._endl = "\n";
362 						_endl_defined = true;
363 					} else if (c == '\r') {
364 						_format._endl = "\r\n";
365 						_endl_defined = true;
366 					}
367 				}
368 
369 				c = get();
370 
371 				if (c == EOF)
372 					break;
373 
374 				if (c == '<')
375 					break;
376 
377 				buffer.append(c);
378 			}
379 
380 			DefaultHandler(buffer.str(_utf8));
381 		}
382 
383 		buffer.reset();
384 	}
385 
386 	return true; //TODO return false on invalid XML
387 }
388 
eat_endl()389 int XMLReaderBase::eat_endl()
390 {
391 	int c = get();
392 
393 	if (c == '\r')
394 		c = get();
395 
396 	if (c == '\n')
397 		c = get();
398 
399 	return c;
400 }
401 
402  /// return current parser position as string
get_position() const403 std::string XMLReaderBase::get_position() const
404 {
405 /*@TODO display parser position in case of errors
406 	int line = XML_GetCurrentLineNumber(_parser);
407 	int column = XML_GetCurrentColumnNumber(_parser);
408 
409 	std::ostringstream out;
410 	out << "(" << line << ") : [column " << column << "]";
411 
412 	return out.str();
413 */
414 	return "";
415 }
416 
417 
418 #ifdef XMLNODE_LOCATION
419 
get_location() const420 XMLLocation XMLReaderBase::get_location() const
421 {
422 	return XMLLocation();	//@TODO XMLLocation for XS-native
423 }
424 
str() const425 std::string XMLLocation::str() const
426 {
427 	return "";	//TODO
428 }
429 
430 #endif
431 
432 
433  /// store content, white space and comments
DefaultHandler(const std::string & s)434 void XMLReaderBase::DefaultHandler(const std::string& s)
435 {
436 	_content.append(s);
437 }
438 
439 
440 }	// namespace XMLStorage
441 
442 #endif // !defined(XS_USE_EXPAT) && !defined(XS_USE_XERCES)
443