1 /***************************************************************************
2                    ulxr_wbxmlparse.h  -  parse wbxml files
3                              -------------------
4     begin                : Fri Jan 09 2004
5     copyright            : (C) 2002-2007 by Ewald Arnold
6     email                : ulxmlrpcpp@ewald-arnold.de
7 
8     $Id: ulxr_wbxmlparse.h 940 2006-12-30 18:22:05Z ewald-arnold $
9 
10  ***************************************************************************/
11 
12 /**************************************************************************
13  *
14  * This program is free software; you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as
16  * published by the Free Software Foundation; either version 2 of the License,
17  * or (at your option) any later version.
18  *
19  * This program is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22  * GNU General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with this program; if not, write to the Free Software
26  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27  *
28  ***************************************************************************/
29 
30 #ifndef ULXR_WBXMLPARSE
31 #define ULXR_WBXMLPARSE
32 
33 #include <ulxmlrpcpp/ulxmlrpcpp.h>  // always first header
34 
35 #include <vector>
36 
37 #include <ulxmlrpcpp/ulxr_xmlparse_base.h>
38 
39 
40 namespace ulxr {
41 
42 
43 /** Base class for binary XML parsing.
44   * This is a very limitited implementation which only handles a subset
45   * which is enough for ulxmlrpcpp.
46   * @ingroup grp_ulxr_parser
47   */
48 class ULXR_API_DECL0 WbXmlParser : public XmlParserBase
49 {
50  protected:
51 
52  /** Destroys the parser.
53    * The derived class is responsible for cleaning up stack<ParserState*>.
54    */
55    virtual ~WbXmlParser();
56 
57  public:
58 
59  /** Constructs a parser.
60    */
61    WbXmlParser();
62 
63   /** Parse a piece of xml data.
64     * @param buffer   pointer start of next data chunk
65     * @param len      len of this chunk
66     * @param isFinal  true: last call to parser
67     * @return error condition, 0 = ok
68     */
69     virtual int parse(const char* buffer, int len, int isFinal);
70 
71   /** Gets the code for the current error.
72     * @return error code
73     */
74     virtual unsigned getErrorCode() const;
75 
76   /** Gets the description for an error code
77     * @param code  error code
78     * @return  pointer to description
79     */
80     virtual CppString getErrorString(unsigned code) const;
81 
82   /** Gets the line number in the xml data.
83     * Because the binary data has nothing like a line number, the current number
84     * of start tags is returned.
85     * @return  number of start tags
86     */
87     virtual int getCurrentLineNumber() const;
88 
89   /** Maps error codes to xml-rpc error codes.
90     * Compatibility function due to expat, always return input.
91     * @param  xpatcode   error code
92     * @return  the according xml-rpc error
93     */
94     virtual int mapToFaultCode(int xpatcode) const;
95 
96     enum WbXmlToken
97     {
98       wbxml_SWITCH_PAGE  = 0x00,   //!< switch code page followed by a byte
99       wbxml_END          = 0x01,   //!< end of attribut list or element
100       wbxml_ENTITY       = 0x02,   //!< followed by a multibyte UCS-4 entity
101       wbxml_PI           = 0x43,   //!< Processing instruction
102       wbxml_OPAQUE       = 0xC3,   //!< opaque data followed by multibyte-length and data
103 
104       wbxml_LITERAL      = 0x04,   //!< unknown empty tag or attribut name, followed by table index
105       wbxml_LITERAL_C    = 0x44,   //!< unknown tag with content, followed by table index
106       wbxml_LITERAL_A    = 0x84,   //!< unknown tag with attributes, followed by table index
107       wbxml_LITERAL_AC   = 0xC4,   //!< unknown tag with content and attributes, followed by table index
108 
109       wbxml_EXT_I_0      = 0x40,   //!< inline string extension 0 followed by string + terminator
110       wbxml_EXT_I_1      = 0x41,   //!< inline string extension 1 followed by string + terminator
111       wbxml_EXT_I_2      = 0x42,   //!< inline string extension 2 followed by string + terminator
112 
113       wbxml_EXT_T_0      = 0x80,   //!< inline integer extension 0 followed by multibyte
114       wbxml_EXT_T_1      = 0x81,   //!< inline integer extension 1 followed by multibyte
115       wbxml_EXT_T_2      = 0x82,   //!< inline integer extension 2 followed by multibyte
116 
117       wbxml_EXT_0        = 0xC0,   //!< single byte extension 0
118       wbxml_EXT_1        = 0xC1,   //!< single byte extension 1
119       wbxml_EXT_2        = 0xC2,   //!< single byte extension 2
120 
121       wbxml_STR_I        = 0x03,   //!< followed by inline string + terminator
122       wbxml_STR_T        = 0x83,   //!< string table index followed by multibyte
123 
124       wbxml_TAG_FIRST    = 0x05,   //!< first value for empty elements
125       wbxml_TAG_LAST     = 0x3f,   //!< last value for empty elements
126       wbxml_TAG_C_FIRST  = 0x45,   //!< first value for elements with content
127       wbxml_TAG_C_LAST   = 0x7F,   //!< last value for elements with content
128       wbxml_TAG_A_FIRST  = 0x85,   //!< first value for elements without content but with attributes
129       wbxml_TAG_A_LAST   = 0xBF,   //!< last value for elements without content but with attributes
130       wbxml_TAG_AC_FIRST = 0xC5,   //!< first value for elements with content and attributes
131       wbxml_TAG_AC_LAST  = 0xFF    //!< last value for elements with content and attributes
132     };
133 
134     static const char     wbxml_START_SEQ_STR[];
135     static const unsigned wbxml_START_SEQ_LEN;
136 
137   protected:
138 
139     class Token;
140     class Attribute;
141 
142     typedef std::vector<Attribute>  Attributes;
143 
144  /** Tests if the current opening tag is to be parsed by this
145    * inheritance level or by the parent.
146    * @param  token  current well known token
147    * @param  attr   tag attributes
148    * @return true: element has been handled
149    */
150    bool testStartElement(unsigned token, const Attributes &attr);
151 
152  /** C++ callback for an opening XML tag.
153    * @param  token  current literal tag
154    * @param  attr   tag attributes
155    */
156    virtual void startElement(unsigned token, const Attributes &attr) =  0;
157 
158  /** C++ callback for a closing XML tag.
159    * @return true: element has been handled
160    */
161    bool testEndElement();
162 
163  /** C++ callback for a closing XML tag.
164    */
165    virtual void endElement() = 0;
166 
167  /** Parses the content of the current xml element.
168    * @param  s   the current chunk of text
169    */
170    virtual void charData(const std::string &s);
171 
172 private:
173 
174    void init();
175 
176    struct ULXR_API_DECL0 WbXmlState
177    {
178      WbXmlState(unsigned state);
179 
180      unsigned    state;
181      std::string current_str;
182    };
183 
184    std::stack<WbXmlState> wbxmlstates;
185 
186    unsigned    wb_version;
187    unsigned    wb_pubId;
188    unsigned    wb_charset;
189 
190    unsigned    lastErrorCode;
191    unsigned    tag_count;
192 };
193 
194 
195 class ULXR_API_DECL0 WbXmlParser::Attribute
196 {
197   public:
198 
199     Attribute(const CppString &name, const CppString &value);
200 
201     CppString getName() const;
202 
203     CppString getValue() const;
204 
205   private:
206 
207     CppString attName;
208     CppString attValue;
209 };
210 
211 
212 #define ULXR_MB_INT_14bit(x) (unsigned char) ((x >> 7) & 0x7F | 0x80) \
213                              ,(unsigned char) (x & 0x7F)
214 
215 
216 }  // namespace ulxr
217 
218 
219 #endif // ULXR_WBXMLPARSE
220