1 /**********************************************************************
2 obconversion.h - Handle file conversions. Declaration of OBFormat, OBConversion
3 
4 Copyright (C) 2004-2009 by Chris Morley
5 
6 This file is part of the Open Babel project.
7 For more information, see <http://openbabel.org/>
8 
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation version 2 of the License.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 ***********************************************************************/
18 
19 #ifndef OB_CONV_H
20 #define OB_CONV_H
21 
22 #include <openbabel/babelconfig.h>
23 
24 #include <iostream>
25 #include <fstream>
26 #include <sstream>
27 #include <cassert>
28 
29 #include <string>
30 #include <vector>
31 #include <map>
32 #ifdef HAVE_STRINGS_H
33 #include <strings.h>
34 #endif
35 
36 #include <openbabel/oberror.h>
37 #include <openbabel/format.h>
38 #include <openbabel/lineend.h>
39 
40 // These macros are used in DLL builds. If they have not
41 // been set in babelconfig.h, define them as nothing.
42 #ifndef OBCONV
43 	#define OBCONV
44 #endif
45 #ifndef OBDLL
46 	#define OBDLL
47 #endif
48 
49 namespace OpenBabel {
50 
51   // Needed to preserve deprecated API
52   typedef OBPlugin::PluginIterator Formatpos;
53 
54   OBERROR extern  OBMessageHandler obErrorLog;
55 
56   //*************************************************
57   /// @brief Class to convert from one format to another.
58   // Class introduction in obconversion.cpp
59   class OBCONV OBConversion
60     {
61       /// @nosubgrouping
62     public:
63       /// @name Construction
64       //@{
65       OBConversion(std::istream* is=nullptr, std::ostream* os=nullptr);
66       OBConversion(std::string inFilename, std::string outFilename="");
67       /// @brief Copy constructor.  Stream *ownership* is not copied. Source remains responsible for the memory.
68       OBConversion(const OBConversion& o);
69       /// @brief Assignment.  Stream *ownership* is not copied.  Source remains responsible for the memory.
70       OBConversion& operator=(const OBConversion& rhs);
71 
72       virtual     ~OBConversion();
73       //@}
74       /// @name Collection of formats
75       //@{
76       /// @brief Called once by each format class
77       static int				RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME = nullptr);
78       /// @brief Searches registered formats
79       static OBFormat*	FindFormat(const char* ID);
80       /// @brief Searches registered formats
81       /// \since version 2.3
82       static OBFormat*  FindFormat(const std::string ID);
83       /// @brief Searches registered formats for an ID the same as the file extension
84       static OBFormat*	FormatFromExt(const char* filename);
85       static OBFormat*	FormatFromExt(const char* filename, bool& isgzip);
86       /// @brief Searches registered formats for an ID the same as the file extension
87       /// \since version 2.3
88       static OBFormat*	FormatFromExt(const std::string filename);
89       static OBFormat*	FormatFromExt(const std::string filename, bool& isgzip);
90       /// @brief Searches registered formats for a MIME the same as the chemical MIME type passed
91       static OBFormat*        FormatFromMIME(const char* MIME);
92 
93       ///Deprecated!.Repeatedly called to recover available Formats
94 #ifndef SWIG
95       static bool	        GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat);
96 #endif
97       //@}
98 
99       /// @name Information
100       //@{
101       static const char* Description(); //generic conversion options
102       //@}
103 
104       /// These return a filtered stream for reading/writing (possible filters include compression, decompression, and newline transformation)
105       /// @name Parameter get and set
106       //@{
GetInStream()107       std::istream* GetInStream() const {return pInput;};
GetOutStream()108       std::ostream* GetOutStream() const {return pOutput;};
109 
110       /// @brief Set input stream.  If takeOwnership is true, will deallocate when done.
111       /// If isGzipped is true, will treat as a gzipped stream regardless of option settings,
112       //  if false, then will be treated as gzipped stream only if z/zin is set.
113       void          SetInStream(std::istream* pIn, bool takeOwnership=false);
114       void          SetOutStream(std::ostream* pOut, bool takeOwnership=false);
115 
116       /// Sets the formats from their ids, e g CML
117       bool        SetInAndOutFormats(const char* inID, const char* outID, bool ingzip=false, bool outgzip=false);
118       bool        SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut, bool ingzip=false, bool outgzip=false);
119       /// Sets the input format from an id e.g. CML
120       bool	      SetInFormat(const char* inID, bool isgzip=false);
121       bool	      SetInFormat(OBFormat* pIn, bool isgzip=false);
122       /// Sets the output format from an id e.g. CML
123       bool	      SetOutFormat(const char* outID, bool isgzip=false);
124       bool	      SetOutFormat(OBFormat* pOut, bool isgzip=false);
125 
GetInFormat()126       OBFormat*   GetInFormat() const{return pInFormat;};
GetOutFormat()127       OBFormat*   GetOutFormat() const{return pOutFormat;};
GetInGzipped()128       bool GetInGzipped() const{return inFormatGzip;};
GetOutGzipped()129       bool GetOutGzipped() const{return outFormatGzip;};
GetInFilename()130       std::string GetInFilename() const{return InFilename;};
GetOutFilename()131       std::string GetOutFilename() const{return OutFilename;};
132 
133       ///Get the position in the input stream of the object being read
GetInPos()134       std::streampos GetInPos()const{return wInpos;};
135 
136       ///Get the length in the input stream of the object being read
GetInLen()137       size_t GetInLen()const{return wInlen;};
138 
139       /// \return a default title which is the filename
140       const char* GetTitle() const;
141 
142       ///@brief Extension method: deleted in ~OBConversion()
GetAuxConv()143       OBConversion* GetAuxConv() const {return pAuxConv;};
SetAuxConv(OBConversion * pConv)144       void          SetAuxConv(OBConversion* pConv) {pAuxConv=pConv;};
145       //@}
146       /** @name Option handling
147        Three types of Option provide information and control instructions to the
148        conversion process, INOPTIONS, OUTOPTIONS, GENOPTIONS, and are stored in each
149        OBConversion object in separate maps. Each option has an id and an optional
150        text string. They are set individually by AddOption() or (rarely) collectively
151        in SetOptions(). Options cannot be altered but can be replaced with AddOption()
152        and deleted with RemoveOption(), which, however, should be used in an op derived
153        from OBOp (because of iterator invalidation).
154 
155        If the "Convert" interface is used, the GENOPTIONS are acted upon in the
156        OBBase::DoTransformations() functions (currently only OBMol has one). This
157        happens after the object has been input but before it has been output.
158        All the options are available to input and output formats, etc. via the IsOption()
159        function, and the interpretation of any text string needs to be done subsequently.
160 
161        In the commandline interface, options with single character ids are are indicated
162        like -s, and those with multiple character ids like --gen3D. An option may have
163        one or more parameters which appear, space separated, in the option's text string.
164        With babel, unless the option is at the end of the command, it is necessary for
165        the number of its parameters to be exactly that specified in RegisterOptionParam().
166        The default is 0, but if it is more, and babel is likely to be used, this function
167        should be called in the constructor of a format or op.
168        With obabel (or the GUI), it is not necessary to call RegisterOptionParam().
169 
170        New GENOPTIONS can be defined (as plugins) using the class OBOp.
171 
172        It is customary for a format or op to document any INOPTIONS or OUTPTIONS it
173        uses in its Description() function. As well as providing documentation during
174        use, this is also parsed by the GUI to construct its checkboxes,etc., so it is
175        advisable to give new Descriptions the same form as existing ones.
176 
177        Some conversion options, such as -f, -l, -m, are unlikely to be used in
178        programming, but are listed in OBConversion::Description().  The built-in
179        GENOPTIONS for OBMol objects are listed in OBMol::ClassDescription() which
180        is in transform.cpp and also in this documentation under AddOption().
181        */
182       //@{
183       ///@brief Three types of options set on the the command line by -a? , -x? , or -?
184       enum Option_type { INOPTIONS, OUTOPTIONS, GENOPTIONS, ALL };
185 
186       ///@brief Determine whether an option is set. \return NULL if option not and a pointer to the associated text if it is
187       const char* IsOption(const char* opt,Option_type opttyp=OUTOPTIONS);
188 
189       ///@brief Access the map with option name as key and any associated text as value
GetOptions(Option_type opttyp)190       const std::map<std::string,std::string>* GetOptions(Option_type opttyp)
191         { return &OptionsArray[opttyp];};
192 
193       ///@brief Set an option of specified type, with optional text
194       void AddOption(const char* opt, Option_type opttyp=OUTOPTIONS, const char* txt=nullptr);
195 
196       bool RemoveOption(const char* opt, Option_type optype);
197 
198       ///@brief Set several single character options of specified type from string like ab"btext"c"ctext"
199       void SetOptions(const char* options, Option_type opttyp);
200 
201       ///@brief For example -h takes 0 parameters; -f takes 1. Call in a format constructor.
202       static void RegisterOptionParam(std::string name, OBFormat* pFormat,
203                                       int numberParams=0, Option_type typ=OUTOPTIONS);
204 
205       /// \return the number of parameters registered for the option, or 0 if not found
206       static int GetOptionParams(std::string name, Option_type typ);
207       //@}
208 
209       ///@brief Copies the options (by default of all types) from one OBConversion Object to another.
210       void CopyOptions(OBConversion* pSourceConv, Option_type typ=ALL);
211 
212       /// @name Supported file format
213       //@{
214       // @brief Set and return the list of supported input format
215       std::vector<std::string> GetSupportedInputFormat();
216       // @brief Set and return the list of supported output format
217       std::vector<std::string> GetSupportedOutputFormat();
218       //@}
219 
220       /// @name Conversion
221       //@{
222       /// @brief Conversion for single input and output stream
223       int         Convert(std::istream* is, std::ostream* os);
224 
225       /// @brief Conversion with existing streams
226       int         Convert();
227 
228       /// @brief Conversion with multiple input/output files:
229       /// makes input and output streams, and carries out normal, batch, aggregation, and splitting conversion.
230       int					FullConvert(std::vector<std::string>& FileList,
231                               std::string& OutputFileName, std::vector<std::string>& OutputFileList);
232       //@}
233 
234       /// @name Conversion loop control
235       //@{
236       int     AddChemObject(OBBase* pOb);///< @brief Adds to internal array during input
237       OBBase*  GetChemObject(); ///< @brief Retrieve from internal array during output
238       bool     IsLast();///< @brief True if no more objects to be output
239       bool     IsFirstInput();///< @brief True if the first input object is being processed
240       void     SetFirstInput(bool b=true);///< @brief Setwhether or not is the first input
241       int      GetOutputIndex() const ;///< @brief Retrieves number of ChemObjects that have been actually output
242       void     SetOutputIndex(int indx);///< @brief Sets output index (maybe to control whether seen as first object)
243       void     SetMoreFilesToCome();///<@brief Used with multiple input files. Off by default.
244       void     SetOneObjectOnly(bool b=true);///< @brief Used with multiple input files. Off by default.
SetLast(bool b)245       void     SetLast(bool b){SetOneObjectOnly(b);}///< @brief Synonym for SetOneObjectOnly()
IsLastFile()246       bool     IsLastFile(){ return !MoreFilesToCome;}///< @brief True if no more files to be read
247       /// @brief Number of objects read and processed
248       /// Incremented after options are processed, so 0 for first object.  Returns -1 if Convert interface not used.
GetCount()249       int      GetCount()const { return Count; }
250       //@}
251       /// @name Convenience functions
252       //@{
253       ///The default format is set in a single OBFormat class (generally it is OBMol)
GetDefaultFormat()254       static OBFormat* GetDefaultFormat(){return OBFormat::FindType(nullptr);};
255 
256       /// @brief Outputs an object of a class derived from OBBase.
257 
258       /// Part of "API" interface.
259       /// The output stream can be specified and the change is retained in the OBConversion instance
260       bool				Write(OBBase* pOb, std::ostream* pout=nullptr);
261 
262       /// @brief Outputs an object of a class derived from OBBase as a string
263 
264       /// Part of "API" interface.
265       /// The output stream is temporarily changed to the string and then restored
266       /// This method is primarily intended for scripting languages without "stream" classes
267       /// The optional "trimWhitespace" parameter allows trailing whitespace to be removed
268       /// (e.g., in a SMILES string or InChI, etc.)
269       std::string                     WriteString(OBBase* pOb, bool trimWhitespace = false);
270 
271       /// @brief Outputs an object of a class derived from OBBase as a file (with the supplied path)
272 
273       /// Part of "API" interface.
274       /// The output stream is changed to the supplied file and the change is retained in the
275       /// OBConversion instance.
276       /// This method is primarily intended for scripting languages without "stream" classes
277       bool                            WriteFile(OBBase* pOb, std::string filePath);
278 
279       /// @brief Manually closes and deletes the output stream
280       /// The file is closed anyway when in the OBConversion destructor or when WriteFile
281       /// is called again.
282       /// \since version 2.1
283       void CloseOutFile();
284 
285       /// @brief Reads an object of a class derived from OBBase into pOb.
286 
287       /// Part of "API" interface.
288       /// The input stream can be specified and the change is retained in the OBConversion instance
289       /// \return false and pOb=NULL on error
290       bool	Read(OBBase* pOb, std::istream* pin=nullptr);
291 
292       /// Part of "API" interface.
293       /// The input stream can be specified and the change is retained in the OBConversion instance
294       /// \return NULL on error
295 //      OBBase*	ReadObject(std::istream* pin=NULL);
296 
297       /// @brief Reads an object of a class derived from OBBase into pOb from the supplied string
298 
299       /// Part of "API" interface.
300       /// \return false and pOb=NULL on error
301       /// This method is primarily intended for scripting languages without "stream" classes
302       /// Any existing input stream will be replaced by stringstream.
303       bool	ReadString(OBBase* pOb, std::string input);
304 
305       /// @brief Reads an object of a class derived from OBBase into pOb from the file specified
306 
307       /// Part of "API" interface.
308       /// The output stream is changed to the supplied file and the change is retained in the
309       /// OBConversion instance. For multi-molecule files, the remaining molecules
310       /// can be read by repeatedly calling the Read() method.
311       /// \return false and pOb=NULL on error
312       /// This method is primarily intended for scripting languages without "stream" classes
313       bool	ReadFile(OBBase* pOb, std::string filePath);
314 
315       /// Part of the "Convert" interface.
316       /// Open the files and update the streams in the OBConversion object.
317       /// This method is primarily intended for scripting languages without "stream" classes
318       /// and will usually followed by a call to Convert().
319       /// Will set format from file extension if format has not already been set.
320       /// Files will be opened even if format cannot be determined, but not if file path is empty.
321       /// \return false if unsuccessful.
322       bool OpenInAndOutFiles(std::string infilepath, std::string outfilepath);
323 
324       /// @brief Sends a message like "2 molecules converted" to clog
325       /// The type of object is taken from the TargetClassDescription
326       /// of the specified class (or the output format if not specified)and
327       /// is appropriately singular or plural.
328       void ReportNumberConverted(int count, OBFormat* pFormat=nullptr);
329 
330       /// \return the number of objects in the inputstream,
331       /// or -1 if error or if SkipObjects for the input format is not implemented
332       /// Adjusts for the value of -f and -l options (first and last objects).
333       int NumInputObjects();
334 
335 
336 protected:
337       ///Replaces * in BaseName by InFile without extension and path
338       static std::string BatchFileName(std::string& BaseName, std::string& InFile);
339       ///Replaces * in BaseName by Count
340       static std::string IncrementedFileName(std::string& BaseName, const int Count);
341       ///Checks for misunderstandings when using the -m option
342       static bool CheckForUnintendedBatch(const std::string& infile, const std::string& outfile);
343 
344       void ClearInStreams();
345       //@}
346 
347     protected:
348 
349       //helper class for saving stream state
350       struct StreamState
351       {
352           std::ios *pStream; //active stream
353           std::vector<std::ios *> ownedStreams; //streams we own the memory to
354 
StreamStateStreamState355           StreamState(): pStream(nullptr) {}
~StreamStateStreamState356           ~StreamState()
357           {
358             assert(ownedStreams.size() == 0); //should be popped
359           }
360 
361           void pushInput(OBConversion& conv);
362           void popInput(OBConversion& conv);
363 
364           void pushOutput(OBConversion& conv);
365           void popOutput(OBConversion& conv);
366 
isSetStreamState367           bool isSet() const { return pStream != nullptr; }
368       };
369 
370       bool             SetStartAndEnd();
371 //      static FMapType& FormatsMap();///<contains ID and pointer to all OBFormat classes
372 //      static FMapType& FormatsMIMEMap();///<contains MIME and pointer to all OBFormat classes
373       typedef std::map<std::string,int> OPAMapType;
374       static OPAMapType& OptionParamArray(Option_type typ);
375       bool             OpenAndSetFormat(bool SetFormat, std::ifstream* is, std::stringstream* ss=nullptr);
376 
377       std::string	  InFilename, OutFilename; //OutFileName added v2.4.0
378 
379       typedef   FilteringInputStream< LineEndingExtractor > LEInStream;
380 
381       std::istream *pInput; //input stream, may be filtered
382       std::vector<std::istream *> ownedInStreams; //streams we own the memory to
383 
384       std::ostream *pOutput; //output stream, may have filters applied
385       std::vector<std::ostream *> ownedOutStreams; //streams we own the memory to
386 
387 
388       static OBFormat*  pDefaultFormat;
389       OBFormat* 	  pInFormat;
390       OBFormat*	  pOutFormat;
391 
392       std::map<std::string,std::string> OptionsArray[3];
393 
394       int		  Index;
395       unsigned int	  StartNumber;
396       unsigned int	  EndNumber;
397       int	          Count;
398       bool			m_IsFirstInput;
399       bool		  m_IsLast;
400       bool		  MoreFilesToCome;
401       bool		  OneObjectOnly;
402       bool		  ReadyToInput;
403       bool      SkippedMolecules;    /// skip molecules using -f and -l
404 
405       //unlike the z and zin options, these are not sticky - setting formats will reset them
406       bool inFormatGzip;
407       bool outFormatGzip;
408 
409       OBBase*		  pOb1;
410       std::streampos wInpos; ///<position in the input stream of the object being written
411       std::streampos rInpos; ///<position in the input stream of the object being read
412       size_t wInlen; ///<length in the input stream of the object being written
413       size_t rInlen; ///<length in the input stream of the object being read
414 
415       OBConversion* pAuxConv;///<Way to extend OBConversion
416 
417       std::vector<std::string> SupportedInputFormat; ///< list of supported input format
418       std::vector<std::string> SupportedOutputFormat; ///< list of supported output format
419 
420     };
421 
422 } //namespace OpenBabel
423 #endif //OB_CONV_H
424 
425 //! \file
426 //! \brief Handle file conversions. Declaration of OBFormat, OBConversion.
427 
428 
429