1 /**
2  * @cond doxygenLibsbmlInternal
3  *
4  * @file    ExpatHandler.cpp
5  * @brief   Redirect Expat events to an XMLHandler
6  * @author  Ben Bornstein
7  *
8  * <!--------------------------------------------------------------------------
9  * This file is part of libSBML.  Please visit http://sbml.org for more
10  * information about SBML, and the latest version of libSBML.
11  *
12  * Copyright (C) 2020 jointly by the following organizations:
13  *     1. California Institute of Technology, Pasadena, CA, USA
14  *     2. University of Heidelberg, Heidelberg, Germany
15  *     3. University College London, London, UK
16  *
17  * Copyright (C) 2019 jointly by the following organizations:
18  *     1. California Institute of Technology, Pasadena, CA, USA
19  *     2. University of Heidelberg, Heidelberg, Germany
20  *
21  * Copyright (C) 2013-2018 jointly by the following organizations:
22  *     1. California Institute of Technology, Pasadena, CA, USA
23  *     2. EMBL European Bioinformatics Institute (EMBL-EBI), Hinxton, UK
24  *     3. University of Heidelberg, Heidelberg, Germany
25  *
26  * Copyright (C) 2009-2013 jointly by the following organizations:
27  *     1. California Institute of Technology, Pasadena, CA, USA
28  *     2. EMBL European Bioinformatics Institute (EMBL-EBI), Hinxton, UK
29  *
30  * Copyright (C) 2006-2008 by the California Institute of Technology,
31  *     Pasadena, CA, USA
32  *
33  * Copyright (C) 2002-2005 jointly by the following organizations:
34  *     1. California Institute of Technology, Pasadena, CA, USA
35  *     2. Japan Science and Technology Agency, Japan
36  *
37  * This library is free software; you can redistribute it and/or modify it
38  * under the terms of the GNU Lesser General Public License as published by
39  * the Free Software Foundation.  A copy of the license agreement is provided
40  * in the file named "LICENSE.txt" included with this software distribution and
41  * also available online as http://sbml.org/software/libsbml/license.html
42  * ---------------------------------------------------------------------- -->*/
43 
44 #include <expat.h>
45 
46 #include <sbml/xml/XMLHandler.h>
47 #include <sbml/xml/XMLTriple.h>
48 #include <sbml/xml/XMLToken.h>
49 #include <sbml/xml/XMLError.h>
50 #include <sbml/xml/XMLErrorLog.h>
51 
52 #include <sbml/xml/ExpatAttributes.h>
53 #include <sbml/xml/ExpatHandler.h>
54 
55 #include <sbml/util/util.h>
56 
57 using namespace std;
58 
59 LIBSBML_CPP_NAMESPACE_BEGIN
60 
61 /*
62  * The functions below are internal to this file.  They simply redirect to
63  * the corresponding ExpatHandler method (assuming UserData contains a
64  * pointer to ExpatHandler).  I first saw this redirect scheme used in
65  * Stefan Hoops' ExpatParser class.
66  */
67 
68 static void
XMLDeclHandler(void * userData,const XML_Char * version,const XML_Char * encoding,int)69 XMLDeclHandler (void* userData,
70                 const XML_Char* version,
71                 const XML_Char* encoding,
72                 int)
73 {
74   // call this function even if version or encoding arent set
75   //if (version == 0) return;
76   //if (encoding == 0) return;
77   static_cast<ExpatHandler*>(userData)->XML(version, encoding);
78 }
79 
80 
81 static void
startElement(void * userData,const XML_Char * name,const XML_Char ** attrs)82 startElement (void* userData, const XML_Char* name, const XML_Char** attrs)
83 {
84   static_cast<ExpatHandler*>(userData)->startElement(name, attrs);
85 }
86 
87 
88 static void
startNamespace(void * userData,const XML_Char * prefix,const XML_Char * uri)89 startNamespace (void* userData, const XML_Char* prefix, const XML_Char* uri)
90 {
91   static_cast<ExpatHandler*>(userData)->startNamespace(prefix, uri);
92 }
93 
94 
95 static void
endElement(void * userData,const XML_Char * name)96 endElement (void* userData, const XML_Char* name)
97 {
98   static_cast<ExpatHandler*>(userData)->endElement(name);
99 }
100 
101 
102 static void
characters(void * userData,const XML_Char * chars,int length)103 characters (void* userData, const XML_Char* chars, int length)
104 {
105   static_cast<ExpatHandler*>(userData)->characters(chars, length);
106 }
107 
108 
109 static int
unknownEncodingHandler(void *,const XML_Char *,XML_Encoding *)110 unknownEncodingHandler(void* /*encodingHandlerData*/,
111            const XML_Char* /*name*/,
112            XML_Encoding* /*info*/)
113 {
114   return XML_STATUS_ERROR;
115 }
116 
117 
118 /**
119  * Creates a new ExpatHandler.  Expat events will be redirected to the
120  * given XMLHandler.
121  */
ExpatHandler(XML_Parser parser,XMLHandler & handler)122 ExpatHandler::ExpatHandler (XML_Parser parser, XMLHandler& handler) :
123    mParser ( parser  )
124  , mHandler( handler )
125 {
126   XML_SetXmlDeclHandler      ( mParser, LIBSBML_CPP_NAMESPACE ::XMLDeclHandler    );
127   XML_SetElementHandler      ( mParser, LIBSBML_CPP_NAMESPACE ::startElement,
128                                         LIBSBML_CPP_NAMESPACE ::endElement        );
129   XML_SetCharacterDataHandler( mParser, LIBSBML_CPP_NAMESPACE ::characters        );
130   XML_SetNamespaceDeclHandler( mParser, LIBSBML_CPP_NAMESPACE ::startNamespace, 0 );
131   XML_SetUserData            ( mParser, static_cast<void*>(this)     );
132   XML_SetReturnNSTriplet     ( mParser, 1                            );
133   mHandlerError = NULL;
134   setHasXMLDeclaration(false);
135 }
136 
137 
138 /**
139  * Copy Constructor
140  */
ExpatHandler(const ExpatHandler & other)141 ExpatHandler::ExpatHandler (const ExpatHandler& other)
142   : mParser  (other.mParser)
143   , mHandler (other.mHandler)
144   , mNamespaces (other.mNamespaces)
145   , mHandlerError(NULL)
146 {
147 }
148 
149 
150 /**
151  * Assignment operator
152  */
operator =(const ExpatHandler & other)153 ExpatHandler& ExpatHandler::operator=(const ExpatHandler& other)
154 {
155   if (this == &other) return *this;
156 
157   mParser = other.mParser;
158   mHandler = other.mHandler;
159   mNamespaces = other.mNamespaces;
160   mHandlerError = NULL;
161 
162   return *this;
163 }
164 
165 
166 /**
167  * Destroys this ExpatHandler.
168  */
~ExpatHandler()169 ExpatHandler::~ExpatHandler ()
170 {
171 }
172 
173 
174 /**
175  * Receive notification of the beginning of the document.
176  */
177 void
startDocument()178 ExpatHandler::startDocument ()
179 {
180   mHandler.startDocument();
181 }
182 
183 
184 /**
185  * Receive notification of the XML declaration, i.e.
186  * <?xml version="1.0" encoding="UTF-8"?>
187  */
188 int
XML(const XML_Char * version,const XML_Char * encoding)189 ExpatHandler::XML (const XML_Char* version, const XML_Char* encoding)
190 {
191   setHasXMLDeclaration(true);
192 
193   XML_SetUnknownEncodingHandler( mParser, &unknownEncodingHandler, 0 );
194   if (encoding == NULL)
195   {
196     mHandler.XML(version, "");
197     return XML_STATUS_ERROR;
198   }
199   else if (version == NULL)
200   {
201     mHandler.XML("", encoding);
202     return XML_STATUS_ERROR;
203   }
204   else
205   {
206     mHandler.XML(version, encoding);
207   }
208 
209   return 0;
210 }
211 
212 
213 /**
214  * Receive notification of the start of an element.
215  *
216  * @param  name   the element name.
217  * @param  attrs  the specified or defaulted attributes.
218  */
219 void
startElement(const XML_Char * name,const XML_Char ** attrs)220 ExpatHandler::startElement (const XML_Char* name, const XML_Char** attrs)
221 {
222   const XMLTriple       triple    ( name  );
223   const ExpatAttributes attributes( attrs, name );
224   const XMLToken        element   ( triple, attributes, mNamespaces,
225 			            getLine(), getColumn() );
226 
227   mHandler.startElement(element);
228   mNamespaces.clear();
229 }
230 
231 
232 /**
233  * Receive notification of the start of an XML namespace.
234  *
235  * @param  prefix  the namespace prefix or NULL (for xmlns="...").
236  * @param  uri     the namespace uri    or NULL (for xmlns="").
237  */
238 void
startNamespace(const XML_Char * prefix,const XML_Char * uri)239 ExpatHandler::startNamespace (const XML_Char* prefix, const XML_Char* uri)
240 {
241   // Expat doesn't flag the use of the prefix 'xml' as an error, but
242   // according to the XML Namespaces 1.0 (2nd ed, Aug 2006) specification,
243   // "The prefix xml is by definition bound to the namespace name
244   // http://www.w3.org/XML/1998/namespace. It MAY, but need not, be
245   // declared, and MUST NOT be bound to any other namespace name."
246   // I guess we have to catch this ourselves, then?
247 
248   if (streq(prefix, "xml")
249       && !streq(uri, "http://www.w3.org/XML/1998/namespace"))
250   {
251     mHandlerError = new XMLError(BadXMLPrefixValue,
252                                  "The prefix 'xml' is reserved in XML",
253                                  getLine(), getColumn());
254   }
255   else
256   {
257     mNamespaces.add(uri ? uri : "", prefix ? prefix : "");
258   }
259 }
260 
261 
262 /**
263  * Receive notification of the end of the document.
264  */
265 void
endDocument()266 ExpatHandler::endDocument ()
267 {
268   mHandler.endDocument();
269 }
270 
271 
272 /**
273  * Receive notification of the end of an element.
274  *
275  * @param  name  the element name.
276  */
277 void
endElement(const XML_Char * name)278 ExpatHandler::endElement (const XML_Char* name)
279 {
280   const XMLTriple  triple ( name );
281   const XMLToken   element( triple, getLine(), getColumn() );
282 
283   mHandler.endElement(element);
284 }
285 
286 
287 /**
288  * Receive notification of character data inside an element.
289  *
290  * @param  chars   the characters.
291  * @param  length  the number of characters to use from the character array.
292  */
293 void
characters(const XML_Char * chars,int length)294 ExpatHandler::characters (const XML_Char* chars, int length)
295 {
296   XMLToken data( string(chars, length) );
297   mHandler.characters(data);
298 }
299 
300 
301 /**
302  * @return the column number of the current XML event.
303  */
304 unsigned int
getColumn() const305 ExpatHandler::getColumn () const
306 {
307   return static_cast<unsigned int>( XML_GetCurrentColumnNumber(mParser) );
308 }
309 
310 
311 /**
312  * @return the line number of the current XML event.
313  */
314 unsigned int
getLine() const315 ExpatHandler::getLine () const
316 {
317   return static_cast<unsigned int>( XML_GetCurrentLineNumber(mParser) );
318 }
319 
320 
321 LIBSBML_CPP_NAMESPACE_END
322 /** @endcond */
323