1 /**********************************************************************
2 xml.h Declaration of XMLConversion,
3 declaration and definition of XMLBaseFormat and XMLMoleculeFormat
4 Copyright (C) 2005-2006 by Chris Morley
5 
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 ***********************************************************************/
15 
16 #ifndef OB_XML_H
17 #define OB_XML_H
18 
19 #include <typeinfo>
20 
21 #include <openbabel/obconversion.h>
22 #include <openbabel/obmolecformat.h>
23 #include <openbabel/mol.h>
24 
25 #include <libxml/xmlreader.h>
26 #include <libxml/xmlwriter.h>
27 #include <typeinfo>
28 
29 namespace OpenBabel
30 {
31 
32 
33   //forward declaration
34   class XMLBaseFormat;
35 
36   //******************************************************
37   /** \class XMLConversion xml.h <openbabel/xml.h>
38       \brief A subclass for conversion of XML formats
39 
40       An extended OBConversion class which includes a libxml2 reader for use
41       with XML formats. Copies an OBConversion and then extends it
42       with a XML parser. Instances made on the heap are deleted when
43       the original OBConversion object is.
44 
45       This class is not intended to be used externally -- instead use
46       OBConversion which will find both XML and non-XML OBFormats.
47 
48       Instead, this subclass also has support for handling specific
49       needs in XML formats. For example, an XML file may include
50       multiple namespaces, and the conversion should call appropriate
51       XMLBaseFormat formats as needed.
52   **/
53   class XMLConversion : public OBConversion
54     {
55     public:
56       ///Existing OBConversion instance copied
57       XMLConversion(OBConversion* pConv);
58 
59       ///Frees reader and writer if necessary
60       ~XMLConversion();
61 
62       bool SetupReader();///< opens libxml2 reader
63       bool SetupWriter();///< opens libxml2 writer
64 
65       ///Parses the input xml stream and sends each element to the format's callback routines
66       bool ReadXML(XMLBaseFormat* pFormat, OBBase* pOb);
67 
68       ///Read and discard XML text up to the next occurrence of the tag e.g."/molecule>"
69       ///This is left as the current node. Returns 1 on success, 0 if not found, -1 if failed.
70       int SkipXML(const char* ctag);
71 
72       typedef std::map<std::string, XMLBaseFormat*> NsMapType;
73 
74       ///This static function returns a reference to the map
75       ///Avoids "static initialization order fiasco"
Namespaces()76       static NsMapType& Namespaces()
77         {
78           static NsMapType ns;
79           return ns;
80 
81           //static NsMapType* nsm = NULL;
82           //if (!nsm)
83           //  nsm = new NsMapType;
84           //return *nsm;
85         };
86 
87       static void RegisterXMLFormat(XMLBaseFormat* pFormat,
88                                     bool IsDefault=false, const char* uri=nullptr);
89 
90       ///Returns the extended OBConversion class, making it if necessary
91       static XMLConversion* GetDerived(OBConversion* pConv, bool ForReading=true);
92 
93       ///Because OBConversion::Convert is still using the unextended OBConversion object
94       ///we need to obtain the conversion parameters from it when requested
IsLast()95       bool IsLast()
96         { return _pConv->IsLast(); }
GetOutputIndex()97       int GetOutputIndex()
98         { return  _pConv->GetOutputIndex(); }
99 
100 
GetReader()101       xmlTextReaderPtr GetReader() const
102         { return _reader;   };
103 
GetWriter()104       xmlTextWriterPtr GetWriter() const
105         { return _writer;   };
106 
OutputToStream()107       void OutputToStream()
108         {
109           xmlOutputBufferFlush(_buf);
110         }
111 
GetDefaultXMLClass()112       static XMLBaseFormat* GetDefaultXMLClass() //TODO make dependent on object type
113         { return _pDefault;};
114 
LookForNamespace()115       void LookForNamespace()
116         { _LookingForNamespace = true; };
117 
118       ///Static callback functions for xmlReaderForIO()
119       static int ReadStream(void * context, char * buffer, int len);
120       static int WriteStream(void * context, const char * buffer, int len);
121       //static int CloseStream(void* context);
122 
123       std::string GetAttribute(const char* attrname);
124 
125       ///Sets value to element content. Returns false if there is no content.
126       std::string GetContent();
127 
128       ///Sets value to element content as an integer. Returns false if there is no content.
129       bool    GetContentInt(int& value);
130 
131       ///Sets value to element content as an double. Returns false if there is no content.
132       bool GetContentDouble(double& value);
133 
134     private:
135       static XMLBaseFormat* _pDefault;
136       OBConversion* _pConv;
137       std::streampos  _requestedpos, _lastpos;
138       xmlTextReaderPtr _reader;
139       xmlTextWriterPtr _writer;
140       xmlOutputBufferPtr _buf;
141       //    xmlBufferPtr _buf;
142       bool _LookingForNamespace;
143     public:
144       bool _SkipNextRead;
145     };
146 
147   //*************************************************
148   /// \class XMLBaseFormat xml.h <openbabel/xml.h>
149   /// \brief Abstract class containing common functionality for XML formats.
150   class XMLBaseFormat : public OBFormat
151     {
152     protected:
153       XMLConversion* _pxmlConv;
154 
155       //formating for output
156       std::string _prefix;
157       int baseindent, ind;
158       std::string nsdecl;
159       int _embedlevel;
160 
161     public:
~XMLBaseFormat()162       ~XMLBaseFormat(){}
163       virtual const char* NamespaceURI()const=0;
DoElement(const std::string & ElName)164       virtual bool DoElement(const std::string& ElName){return false;};
EndElement(const std::string & ElName)165       virtual bool EndElement(const std::string& ElName){return false;};
166       /// The tag at the end of the chemical object e.g. "/molecule>"
EndTag()167       virtual const char* EndTag(){return ">";};
168 
169     protected:
reader()170       xmlTextReaderPtr reader() const
171         {
172           return _pxmlConv->GetReader();
173         }
174 
writer()175       xmlTextWriterPtr writer() const
176         {
177           return _pxmlConv->GetWriter();
178         }
179 
OutputToStream()180       void OutputToStream()
181         {
182           _pxmlConv->OutputToStream();
183         }
184 
185       ///Skip past first n objects in input stream (or current one with n=0)
186       /// Returns 1 on success, -1 on error and 0 if not implemented
SkipObjects(int n,OBConversion * pConv)187       virtual int SkipObjects(int n, OBConversion* pConv)
188         {
189           //don't implement on base class
190           if(*EndTag()=='>')
191             return 0;
192 
193           //Set up XMLConversion class with reader
194           _pxmlConv = XMLConversion::GetDerived(pConv,true);
195           if(!_pxmlConv)
196             return -1;
197 
198           //always find the end of at least 1 object
199           if(n==0)++n;
200 
201           //Skip n objects, returning -1 if not successful
202           int i;
203           for(i=0; i<n; ++i)
204             if(_pxmlConv->SkipXML(EndTag())!=1)
205               return -1;
206 
207           return 1;
208         }
209 
210     };
211 
212   //*************************************************
213   /// \class XMLMoleculeFormat xml.h <openbabel/xml.h>
214   /// \brief Abstract class for XML formats which represent molecules
215   class XMLMoleculeFormat : public XMLBaseFormat
216     {
217     protected:
218       OBMol* _pmol;
219 
220     public:
~XMLMoleculeFormat()221       ~XMLMoleculeFormat(){}
ReadChemObject(OBConversion * pConv)222       virtual bool ReadChemObject(OBConversion* pConv)
223         {
224           return OBMoleculeFormat::ReadChemObjectImpl(pConv, this);
225         };
226 
WriteChemObject(OBConversion * pConv)227       virtual bool WriteChemObject(OBConversion* pConv)
228         {
229           return OBMoleculeFormat::WriteChemObjectImpl(pConv, this);
230         };
231 
ReadMolecule(OBBase * pOb,OBConversion * pConv)232       virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv)
233         {
234           _pmol = dynamic_cast<OBMol*>(pOb);
235           if(!_pmol)
236             return false;
237           _pxmlConv = XMLConversion::GetDerived(pConv,true);
238           if(!_pxmlConv)
239             return false;
240           _embedlevel = -1;
241           return _pxmlConv->ReadXML(this,pOb);
242         };
243 
GetType()244       const std::type_info& GetType()
245         {
246           return typeid(OBMol*);
247         };
248 
249     };
250 
251 
252 }//namespace
253 
254 //! \file
255 //! \brief Declaration of XMLConversion,
256 //!  declaration and definition of XMLBaseFormat and XMLMoleculeFormat
257 
258 #endif
259