1 /*************************************************************************** 2 ulxr_wbxmlparse.h - parse wbxml files 3 ------------------- 4 begin : Fri Jan 09 2004 5 copyright : (C) 2002-2007 by Ewald Arnold 6 email : ulxmlrpcpp@ewald-arnold.de 7 8 $Id: ulxr_wbxmlparse.h 940 2006-12-30 18:22:05Z ewald-arnold $ 9 10 ***************************************************************************/ 11 12 /************************************************************************** 13 * 14 * This program is free software; you can redistribute it and/or modify 15 * it under the terms of the GNU Lesser General Public License as 16 * published by the Free Software Foundation; either version 2 of the License, 17 * or (at your option) any later version. 18 * 19 * This program is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 * GNU General Public License for more details. 23 * 24 * You should have received a copy of the GNU Lesser General Public License 25 * along with this program; if not, write to the Free Software 26 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 27 * 28 ***************************************************************************/ 29 30 #ifndef ULXR_WBXMLPARSE 31 #define ULXR_WBXMLPARSE 32 33 #include <ulxmlrpcpp/ulxmlrpcpp.h> // always first header 34 35 #include <vector> 36 37 #include <ulxmlrpcpp/ulxr_xmlparse_base.h> 38 39 40 namespace ulxr { 41 42 43 /** Base class for binary XML parsing. 44 * This is a very limitited implementation which only handles a subset 45 * which is enough for ulxmlrpcpp. 46 * @ingroup grp_ulxr_parser 47 */ 48 class ULXR_API_DECL0 WbXmlParser : public XmlParserBase 49 { 50 protected: 51 52 /** Destroys the parser. 53 * The derived class is responsible for cleaning up stack<ParserState*>. 54 */ 55 virtual ~WbXmlParser(); 56 57 public: 58 59 /** Constructs a parser. 60 */ 61 WbXmlParser(); 62 63 /** Parse a piece of xml data. 64 * @param buffer pointer start of next data chunk 65 * @param len len of this chunk 66 * @param isFinal true: last call to parser 67 * @return error condition, 0 = ok 68 */ 69 virtual int parse(const char* buffer, int len, int isFinal); 70 71 /** Gets the code for the current error. 72 * @return error code 73 */ 74 virtual unsigned getErrorCode() const; 75 76 /** Gets the description for an error code 77 * @param code error code 78 * @return pointer to description 79 */ 80 virtual CppString getErrorString(unsigned code) const; 81 82 /** Gets the line number in the xml data. 83 * Because the binary data has nothing like a line number, the current number 84 * of start tags is returned. 85 * @return number of start tags 86 */ 87 virtual int getCurrentLineNumber() const; 88 89 /** Maps error codes to xml-rpc error codes. 90 * Compatibility function due to expat, always return input. 91 * @param xpatcode error code 92 * @return the according xml-rpc error 93 */ 94 virtual int mapToFaultCode(int xpatcode) const; 95 96 enum WbXmlToken 97 { 98 wbxml_SWITCH_PAGE = 0x00, //!< switch code page followed by a byte 99 wbxml_END = 0x01, //!< end of attribut list or element 100 wbxml_ENTITY = 0x02, //!< followed by a multibyte UCS-4 entity 101 wbxml_PI = 0x43, //!< Processing instruction 102 wbxml_OPAQUE = 0xC3, //!< opaque data followed by multibyte-length and data 103 104 wbxml_LITERAL = 0x04, //!< unknown empty tag or attribut name, followed by table index 105 wbxml_LITERAL_C = 0x44, //!< unknown tag with content, followed by table index 106 wbxml_LITERAL_A = 0x84, //!< unknown tag with attributes, followed by table index 107 wbxml_LITERAL_AC = 0xC4, //!< unknown tag with content and attributes, followed by table index 108 109 wbxml_EXT_I_0 = 0x40, //!< inline string extension 0 followed by string + terminator 110 wbxml_EXT_I_1 = 0x41, //!< inline string extension 1 followed by string + terminator 111 wbxml_EXT_I_2 = 0x42, //!< inline string extension 2 followed by string + terminator 112 113 wbxml_EXT_T_0 = 0x80, //!< inline integer extension 0 followed by multibyte 114 wbxml_EXT_T_1 = 0x81, //!< inline integer extension 1 followed by multibyte 115 wbxml_EXT_T_2 = 0x82, //!< inline integer extension 2 followed by multibyte 116 117 wbxml_EXT_0 = 0xC0, //!< single byte extension 0 118 wbxml_EXT_1 = 0xC1, //!< single byte extension 1 119 wbxml_EXT_2 = 0xC2, //!< single byte extension 2 120 121 wbxml_STR_I = 0x03, //!< followed by inline string + terminator 122 wbxml_STR_T = 0x83, //!< string table index followed by multibyte 123 124 wbxml_TAG_FIRST = 0x05, //!< first value for empty elements 125 wbxml_TAG_LAST = 0x3f, //!< last value for empty elements 126 wbxml_TAG_C_FIRST = 0x45, //!< first value for elements with content 127 wbxml_TAG_C_LAST = 0x7F, //!< last value for elements with content 128 wbxml_TAG_A_FIRST = 0x85, //!< first value for elements without content but with attributes 129 wbxml_TAG_A_LAST = 0xBF, //!< last value for elements without content but with attributes 130 wbxml_TAG_AC_FIRST = 0xC5, //!< first value for elements with content and attributes 131 wbxml_TAG_AC_LAST = 0xFF //!< last value for elements with content and attributes 132 }; 133 134 static const char wbxml_START_SEQ_STR[]; 135 static const unsigned wbxml_START_SEQ_LEN; 136 137 protected: 138 139 class Token; 140 class Attribute; 141 142 typedef std::vector<Attribute> Attributes; 143 144 /** Tests if the current opening tag is to be parsed by this 145 * inheritance level or by the parent. 146 * @param token current well known token 147 * @param attr tag attributes 148 * @return true: element has been handled 149 */ 150 bool testStartElement(unsigned token, const Attributes &attr); 151 152 /** C++ callback for an opening XML tag. 153 * @param token current literal tag 154 * @param attr tag attributes 155 */ 156 virtual void startElement(unsigned token, const Attributes &attr) = 0; 157 158 /** C++ callback for a closing XML tag. 159 * @return true: element has been handled 160 */ 161 bool testEndElement(); 162 163 /** C++ callback for a closing XML tag. 164 */ 165 virtual void endElement() = 0; 166 167 /** Parses the content of the current xml element. 168 * @param s the current chunk of text 169 */ 170 virtual void charData(const std::string &s); 171 172 private: 173 174 void init(); 175 176 struct ULXR_API_DECL0 WbXmlState 177 { 178 WbXmlState(unsigned state); 179 180 unsigned state; 181 std::string current_str; 182 }; 183 184 std::stack<WbXmlState> wbxmlstates; 185 186 unsigned wb_version; 187 unsigned wb_pubId; 188 unsigned wb_charset; 189 190 unsigned lastErrorCode; 191 unsigned tag_count; 192 }; 193 194 195 class ULXR_API_DECL0 WbXmlParser::Attribute 196 { 197 public: 198 199 Attribute(const CppString &name, const CppString &value); 200 201 CppString getName() const; 202 203 CppString getValue() const; 204 205 private: 206 207 CppString attName; 208 CppString attValue; 209 }; 210 211 212 #define ULXR_MB_INT_14bit(x) (unsigned char) ((x >> 7) & 0x7F | 0x80) \ 213 ,(unsigned char) (x & 0x7F) 214 215 216 } // namespace ulxr 217 218 219 #endif // ULXR_WBXMLPARSE 220