1
2 //
3 // XML storage C++ classes version 1.3
4 //
5 // Copyright (c) 2006, 2007, 2008, 2009 Martin Fuchs <martin-fuchs@gmx.net>
6 //
7
8 /// \file xs-native.cpp
9 /// native internal XMLStorage parser
10
11
12 /*
13
14 All rights reserved.
15
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions are met:
18
19 * Redistributions of source code must retain the above copyright
20 notice, this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in
23 the documentation and/or other materials provided with the
24 distribution.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37
38 */
39
40 #include <precomp.h>
41
42 #ifndef XS_NO_COMMENT
43 #define XS_NO_COMMENT // no #pragma comment(lib, ...) statements in .lib files to enable static linking
44 #endif
45
46 //#include "xmlstorage.h"
47
48
49 #if !defined(XS_USE_EXPAT) && !defined(XS_USE_XERCES)
50
51 namespace XMLStorage {
52
53
~XMLReaderBase()54 XMLReaderBase::~XMLReaderBase()
55 {
56 }
57
58 /// read XML stream into XML tree below _pos
read()59 void XMLReaderBase::read()
60 {
61 if (!parse()) {
62 XMLError error;
63
64 error._message = "XML parsing error";
65 //error._line = ;
66 //error._column = ;
67
68 _errors.push_back(error);
69 }
70
71 finish_read();
72 }
73
74
75 /// line buffer for XS-native parser
76
77 struct Buffer
78 {
BufferXMLStorage::Buffer79 Buffer()
80 {
81 _buffer = (char*) malloc(BUFFER_LEN);
82 _len = BUFFER_LEN;
83
84 reset();
85 }
86
~BufferXMLStorage::Buffer87 ~Buffer()
88 {
89 free(_buffer);
90 }
91
resetXMLStorage::Buffer92 void reset()
93 {
94 _wptr = _buffer;
95 _buffer_str.erase();
96 }
97
appendXMLStorage::Buffer98 void append(int c)
99 {
100 size_t wpos = _wptr-_buffer;
101
102 if (wpos >= _len) {
103 _len <<= 1;
104 _buffer = (char*) realloc(_buffer, _len);
105 _wptr = _buffer + wpos;
106 }
107
108 *_wptr++ = static_cast<char>(c);
109 }
110
strXMLStorage::Buffer111 const std::string& str(bool utf8) // returns UTF-8 encoded buffer content
112 {
113 #if defined(_WIN32) && !defined(XS_STRING_UTF8)
114 if (utf8)
115 #endif
116 _buffer_str.assign(_buffer, _wptr-_buffer);
117 #if defined(_WIN32) && !defined(XS_STRING_UTF8)
118 else
119 _buffer_str = get_utf8(_buffer, _wptr-_buffer);
120 #endif
121
122 return _buffer_str;
123 }
124
lenXMLStorage::Buffer125 size_t len() const
126 {
127 return _wptr - _buffer;
128 }
129
has_CDEndXMLStorage::Buffer130 bool has_CDEnd() const
131 {
132 //if (_wptr-_buffer < 3)
133 // return false;
134
135 return !strncmp(_wptr-3, CDATA_END, 3);
136 }
137
get_tagXMLStorage::Buffer138 XS_String get_tag() const
139 {
140 const char* p = _buffer_str.c_str();
141
142 if (*p == '<')
143 ++p;
144
145 if (*p == '/')
146 ++p;
147
148 const char* q = p;
149
150 if (*q == '?')
151 ++q;
152
153 q = get_xmlsym_end_utf8(q);
154
155 #ifdef XS_STRING_UTF8
156 return XS_String(p, q-p);
157 #else
158 XS_String tag;
159 assign_utf8(tag, p, q-p);
160 return tag;
161 #endif
162 }
163
164 /// read attributes and values
get_attributesXMLStorage::Buffer165 void get_attributes(XMLNode::AttributeMap& attributes) const
166 {
167 const char* p = _buffer_str.c_str();
168
169 // find end of tag name
170 if (*p == '<')
171 ++p;
172
173 if (*p == '/')
174 ++p;
175 else if (*p == '?')
176 ++p;
177
178 p = get_xmlsym_end_utf8(p);
179
180 // read attributes from buffer
181 while(*p && *p!='>' && *p!='/') {
182 while(isspace((unsigned char)*p))
183 ++p;
184
185 const char* attr_name = p;
186
187 p = get_xmlsym_end_utf8(p);
188
189 if (*p != '=')
190 break; //@TODO error handling
191
192 size_t attr_len = p - attr_name;
193
194 if (*++p!='"' && *p!='\'')
195 break; //@TODO error handling
196
197 char delim = *p;
198 const char* value = ++p;
199
200 while(*p && *p!=delim)
201 ++p;
202
203 size_t value_len = p - value;
204
205 if (*p)
206 ++p; // '"'
207
208 #ifdef XS_STRING_UTF8
209 XS_String name_str(attr_name, attr_len);
210 #else
211 XS_String name_str;
212 assign_utf8(name_str, attr_name, attr_len);
213 #endif
214
215 attributes[name_str] = DecodeXMLString(std::string(value, value_len));
216 }
217 }
218
219 protected:
220 char* _buffer;
221 char* _wptr;
222 size_t _len;
223 std::string _buffer_str; // UTF-8 encoded
224 };
225
parse()226 bool XMLReaderBase::parse()
227 {
228 Buffer buffer;
229 int c = get();
230 bool in_comment = false;
231
232 while(c != EOF) {
233 if (in_comment || c=='<') {
234 buffer.append(c);
235
236 // read start or end tag
237 for(;;) {
238 c = get();
239
240 if (c == EOF)
241 break;
242
243 buffer.append(c);
244
245 if (c == '>')
246 break;
247 }
248
249 const std::string& b = buffer.str(_utf8);
250 const char* str = b.c_str();
251
252 if (in_comment || !strncmp(str+1, "!--", 3)) {
253 // XML comment
254 DefaultHandler(b);
255
256 if (strcmp(str+b.length()-3, "-->"))
257 in_comment = true;
258 else
259 in_comment = false;
260
261 c = get();
262 } else if (str[1] == '/') {
263 // end tag
264
265 /*@TODO error handling
266 const XS_String& tag = buffer.get_tag();
267
268 if (tag != last_opened_tag) {
269 ERROR
270 }
271 */
272
273 EndElementHandler();
274
275 c = get();
276 } else if (str[1] == '?') {
277 // XML declaration
278 const XS_String& tag = buffer.get_tag();
279
280 if (tag == "?xml") {
281 XMLNode::AttributeMap attributes;
282 buffer.get_attributes(attributes);
283
284 const std::string& version = attributes.get("version");
285 const std::string& encoding = attributes.get("encoding");
286
287 int standalone;
288 XMLNode::AttributeMap::const_iterator found = // const_cast for ISO C++ compatibility error of GCC
289 const_cast<const XMLNode::AttributeMap&>(attributes).find("standalone");
290 if (found != attributes.end())
291 standalone = !XS_icmp(found->second.c_str(), XS_TEXT("yes"));
292 else
293 standalone = -1;
294
295 XmlDeclHandler(version.empty()?NULL:version.c_str(), encoding.empty()?NULL:encoding.c_str(), standalone);
296
297 if (!encoding.empty() && !_stricmp(encoding.c_str(), "utf-8"))
298 _utf8 = true;
299
300 c = eat_endl();
301 } else if (tag == "?xml-stylesheet") {
302 XMLNode::AttributeMap attributes;
303 buffer.get_attributes(attributes);
304
305 StyleSheet stylesheet(attributes.get("href"), attributes.get("type"), !XS_icmp(attributes.get("alternate"), XS_TEXT("yes")));
306 stylesheet._title = attributes.get("title");
307 stylesheet._media = attributes.get("media");
308 stylesheet._charset = attributes.get("charset");
309
310 _format._stylesheets.push_back(stylesheet);
311
312 c = eat_endl();
313 } else {
314 DefaultHandler(b);
315 c = get();
316 }
317 } else if (str[1] == '!') {
318 if (!strncmp(str+2, "DOCTYPE ", 8)) {
319 _format._doctype.parse(str+10);
320
321 c = eat_endl();
322 } else if (!strncmp(str+2, "[CDATA[", 7)) { // see CDATA_START
323 // parse <![CDATA[ ... ]]> strings
324 while(!buffer.has_CDEnd()) {
325 c = get();
326
327 if (c == EOF)
328 break;
329
330 buffer.append(c);
331 }
332
333 DefaultHandler(buffer.str(_utf8));
334
335 c = get();
336 }
337 } else {
338 // start tag
339 const XS_String& tag = buffer.get_tag();
340
341 if (!tag.empty()) {
342 XMLNode::AttributeMap attributes;
343 buffer.get_attributes(attributes);
344
345 StartElementHandler(tag, attributes);
346
347 if (str[b.length()-2] == '/')
348 EndElementHandler();
349 }
350
351 c = get();
352 }
353 } else {
354 buffer.append(c);
355
356 // read white space
357 for(;;) {
358 // check for the encoding of the first line end
359 if (!_endl_defined) {
360 if (c == '\n') {
361 _format._endl = "\n";
362 _endl_defined = true;
363 } else if (c == '\r') {
364 _format._endl = "\r\n";
365 _endl_defined = true;
366 }
367 }
368
369 c = get();
370
371 if (c == EOF)
372 break;
373
374 if (c == '<')
375 break;
376
377 buffer.append(c);
378 }
379
380 DefaultHandler(buffer.str(_utf8));
381 }
382
383 buffer.reset();
384 }
385
386 return true; //TODO return false on invalid XML
387 }
388
eat_endl()389 int XMLReaderBase::eat_endl()
390 {
391 int c = get();
392
393 if (c == '\r')
394 c = get();
395
396 if (c == '\n')
397 c = get();
398
399 return c;
400 }
401
402 /// return current parser position as string
get_position() const403 std::string XMLReaderBase::get_position() const
404 {
405 /*@TODO display parser position in case of errors
406 int line = XML_GetCurrentLineNumber(_parser);
407 int column = XML_GetCurrentColumnNumber(_parser);
408
409 std::ostringstream out;
410 out << "(" << line << ") : [column " << column << "]";
411
412 return out.str();
413 */
414 return "";
415 }
416
417
418 #ifdef XMLNODE_LOCATION
419
get_location() const420 XMLLocation XMLReaderBase::get_location() const
421 {
422 return XMLLocation(); //@TODO XMLLocation for XS-native
423 }
424
str() const425 std::string XMLLocation::str() const
426 {
427 return ""; //TODO
428 }
429
430 #endif
431
432
433 /// store content, white space and comments
DefaultHandler(const std::string & s)434 void XMLReaderBase::DefaultHandler(const std::string& s)
435 {
436 _content.append(s);
437 }
438
439
440 } // namespace XMLStorage
441
442 #endif // !defined(XS_USE_EXPAT) && !defined(XS_USE_XERCES)
443