1 2 // 3 // XML storage C++ classes version 1.3 4 // 5 // Copyright (c) 2006, 2007, 2008, 2009 Martin Fuchs <martin-fuchs@gmx.net> 6 // 7 8 /// \file xs-native.cpp 9 /// native internal XMLStorage parser 10 11 12 /* 13 14 All rights reserved. 15 16 Redistribution and use in source and binary forms, with or without 17 modification, are permitted provided that the following conditions are met: 18 19 * Redistributions of source code must retain the above copyright 20 notice, this list of conditions and the following disclaimer. 21 * Redistributions in binary form must reproduce the above copyright 22 notice, this list of conditions and the following disclaimer in 23 the documentation and/or other materials provided with the 24 distribution. 25 26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 POSSIBILITY OF SUCH DAMAGE. 37 38 */ 39 40 #include <precomp.h> 41 42 #ifndef XS_NO_COMMENT 43 #define XS_NO_COMMENT // no #pragma comment(lib, ...) statements in .lib files to enable static linking 44 #endif 45 46 //#include "xmlstorage.h" 47 48 49 #if !defined(XS_USE_EXPAT) && !defined(XS_USE_XERCES) 50 51 namespace XMLStorage { 52 53 54 XMLReaderBase::~XMLReaderBase() 55 { 56 } 57 58 /// read XML stream into XML tree below _pos 59 void XMLReaderBase::read() 60 { 61 if (!parse()) { 62 XMLError error; 63 64 error._message = "XML parsing error"; 65 //error._line = ; 66 //error._column = ; 67 68 _errors.push_back(error); 69 } 70 71 finish_read(); 72 } 73 74 75 /// line buffer for XS-native parser 76 77 struct Buffer 78 { 79 Buffer() 80 { 81 _buffer = (char*) malloc(BUFFER_LEN); 82 _len = BUFFER_LEN; 83 84 reset(); 85 } 86 87 ~Buffer() 88 { 89 free(_buffer); 90 } 91 92 void reset() 93 { 94 _wptr = _buffer; 95 _buffer_str.erase(); 96 } 97 98 void append(int c) 99 { 100 size_t wpos = _wptr-_buffer; 101 102 if (wpos >= _len) { 103 _len <<= 1; 104 _buffer = (char*) realloc(_buffer, _len); 105 _wptr = _buffer + wpos; 106 } 107 108 *_wptr++ = static_cast<char>(c); 109 } 110 111 const std::string& str(bool utf8) // returns UTF-8 encoded buffer content 112 { 113 #if defined(_WIN32) && !defined(XS_STRING_UTF8) 114 if (utf8) 115 #endif 116 _buffer_str.assign(_buffer, _wptr-_buffer); 117 #if defined(_WIN32) && !defined(XS_STRING_UTF8) 118 else 119 _buffer_str = get_utf8(_buffer, _wptr-_buffer); 120 #endif 121 122 return _buffer_str; 123 } 124 125 size_t len() const 126 { 127 return _wptr - _buffer; 128 } 129 130 bool has_CDEnd() const 131 { 132 //if (_wptr-_buffer < 3) 133 // return false; 134 135 return !strncmp(_wptr-3, CDATA_END, 3); 136 } 137 138 XS_String get_tag() const 139 { 140 const char* p = _buffer_str.c_str(); 141 142 if (*p == '<') 143 ++p; 144 145 if (*p == '/') 146 ++p; 147 148 const char* q = p; 149 150 if (*q == '?') 151 ++q; 152 153 q = get_xmlsym_end_utf8(q); 154 155 #ifdef XS_STRING_UTF8 156 return XS_String(p, q-p); 157 #else 158 XS_String tag; 159 assign_utf8(tag, p, q-p); 160 return tag; 161 #endif 162 } 163 164 /// read attributes and values 165 void get_attributes(XMLNode::AttributeMap& attributes) const 166 { 167 const char* p = _buffer_str.c_str(); 168 169 // find end of tag name 170 if (*p == '<') 171 ++p; 172 173 if (*p == '/') 174 ++p; 175 else if (*p == '?') 176 ++p; 177 178 p = get_xmlsym_end_utf8(p); 179 180 // read attributes from buffer 181 while(*p && *p!='>' && *p!='/') { 182 while(isspace((unsigned char)*p)) 183 ++p; 184 185 const char* attr_name = p; 186 187 p = get_xmlsym_end_utf8(p); 188 189 if (*p != '=') 190 break; //@TODO error handling 191 192 size_t attr_len = p - attr_name; 193 194 if (*++p!='"' && *p!='\'') 195 break; //@TODO error handling 196 197 char delim = *p; 198 const char* value = ++p; 199 200 while(*p && *p!=delim) 201 ++p; 202 203 size_t value_len = p - value; 204 205 if (*p) 206 ++p; // '"' 207 208 #ifdef XS_STRING_UTF8 209 XS_String name_str(attr_name, attr_len); 210 #else 211 XS_String name_str; 212 assign_utf8(name_str, attr_name, attr_len); 213 #endif 214 215 attributes[name_str] = DecodeXMLString(std::string(value, value_len)); 216 } 217 } 218 219 protected: 220 char* _buffer; 221 char* _wptr; 222 size_t _len; 223 std::string _buffer_str; // UTF-8 encoded 224 }; 225 226 bool XMLReaderBase::parse() 227 { 228 Buffer buffer; 229 int c = get(); 230 bool in_comment = false; 231 232 while(c != EOF) { 233 if (in_comment || c=='<') { 234 buffer.append(c); 235 236 // read start or end tag 237 for(;;) { 238 c = get(); 239 240 if (c == EOF) 241 break; 242 243 buffer.append(c); 244 245 if (c == '>') 246 break; 247 } 248 249 const std::string& b = buffer.str(_utf8); 250 const char* str = b.c_str(); 251 252 if (in_comment || !strncmp(str+1, "!--", 3)) { 253 // XML comment 254 DefaultHandler(b); 255 256 if (strcmp(str+b.length()-3, "-->")) 257 in_comment = true; 258 else 259 in_comment = false; 260 261 c = get(); 262 } else if (str[1] == '/') { 263 // end tag 264 265 /*@TODO error handling 266 const XS_String& tag = buffer.get_tag(); 267 268 if (tag != last_opened_tag) { 269 ERROR 270 } 271 */ 272 273 EndElementHandler(); 274 275 c = get(); 276 } else if (str[1] == '?') { 277 // XML declaration 278 const XS_String& tag = buffer.get_tag(); 279 280 if (tag == "?xml") { 281 XMLNode::AttributeMap attributes; 282 buffer.get_attributes(attributes); 283 284 const std::string& version = attributes.get("version"); 285 const std::string& encoding = attributes.get("encoding"); 286 287 int standalone; 288 XMLNode::AttributeMap::const_iterator found = // const_cast for ISO C++ compatibility error of GCC 289 const_cast<const XMLNode::AttributeMap&>(attributes).find("standalone"); 290 if (found != attributes.end()) 291 standalone = !XS_icmp(found->second.c_str(), XS_TEXT("yes")); 292 else 293 standalone = -1; 294 295 XmlDeclHandler(version.empty()?NULL:version.c_str(), encoding.empty()?NULL:encoding.c_str(), standalone); 296 297 if (!encoding.empty() && !_stricmp(encoding.c_str(), "utf-8")) 298 _utf8 = true; 299 300 c = eat_endl(); 301 } else if (tag == "?xml-stylesheet") { 302 XMLNode::AttributeMap attributes; 303 buffer.get_attributes(attributes); 304 305 StyleSheet stylesheet(attributes.get("href"), attributes.get("type"), !XS_icmp(attributes.get("alternate"), XS_TEXT("yes"))); 306 stylesheet._title = attributes.get("title"); 307 stylesheet._media = attributes.get("media"); 308 stylesheet._charset = attributes.get("charset"); 309 310 _format._stylesheets.push_back(stylesheet); 311 312 c = eat_endl(); 313 } else { 314 DefaultHandler(b); 315 c = get(); 316 } 317 } else if (str[1] == '!') { 318 if (!strncmp(str+2, "DOCTYPE ", 8)) { 319 _format._doctype.parse(str+10); 320 321 c = eat_endl(); 322 } else if (!strncmp(str+2, "[CDATA[", 7)) { // see CDATA_START 323 // parse <![CDATA[ ... ]]> strings 324 while(!buffer.has_CDEnd()) { 325 c = get(); 326 327 if (c == EOF) 328 break; 329 330 buffer.append(c); 331 } 332 333 DefaultHandler(buffer.str(_utf8)); 334 335 c = get(); 336 } 337 } else { 338 // start tag 339 const XS_String& tag = buffer.get_tag(); 340 341 if (!tag.empty()) { 342 XMLNode::AttributeMap attributes; 343 buffer.get_attributes(attributes); 344 345 StartElementHandler(tag, attributes); 346 347 if (str[b.length()-2] == '/') 348 EndElementHandler(); 349 } 350 351 c = get(); 352 } 353 } else { 354 buffer.append(c); 355 356 // read white space 357 for(;;) { 358 // check for the encoding of the first line end 359 if (!_endl_defined) { 360 if (c == '\n') { 361 _format._endl = "\n"; 362 _endl_defined = true; 363 } else if (c == '\r') { 364 _format._endl = "\r\n"; 365 _endl_defined = true; 366 } 367 } 368 369 c = get(); 370 371 if (c == EOF) 372 break; 373 374 if (c == '<') 375 break; 376 377 buffer.append(c); 378 } 379 380 DefaultHandler(buffer.str(_utf8)); 381 } 382 383 buffer.reset(); 384 } 385 386 return true; //TODO return false on invalid XML 387 } 388 389 int XMLReaderBase::eat_endl() 390 { 391 int c = get(); 392 393 if (c == '\r') 394 c = get(); 395 396 if (c == '\n') 397 c = get(); 398 399 return c; 400 } 401 402 /// return current parser position as string 403 std::string XMLReaderBase::get_position() const 404 { 405 /*@TODO display parser position in case of errors 406 int line = XML_GetCurrentLineNumber(_parser); 407 int column = XML_GetCurrentColumnNumber(_parser); 408 409 std::ostringstream out; 410 out << "(" << line << ") : [column " << column << "]"; 411 412 return out.str(); 413 */ 414 return ""; 415 } 416 417 418 #ifdef XMLNODE_LOCATION 419 420 XMLLocation XMLReaderBase::get_location() const 421 { 422 return XMLLocation(); //@TODO XMLLocation for XS-native 423 } 424 425 std::string XMLLocation::str() const 426 { 427 return ""; //TODO 428 } 429 430 #endif 431 432 433 /// store content, white space and comments 434 void XMLReaderBase::DefaultHandler(const std::string& s) 435 { 436 _content.append(s); 437 } 438 439 440 } // namespace XMLStorage 441 442 #endif // !defined(XS_USE_EXPAT) && !defined(XS_USE_XERCES) 443