1 /********************************************************************** 2 obconversion.h - Handle file conversions. Declaration of OBFormat, OBConversion 3 4 Copyright (C) 2004-2009 by Chris Morley 5 6 This file is part of the Open Babel project. 7 For more information, see <http://openbabel.org/> 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation version 2 of the License. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 ***********************************************************************/ 18 19 #ifndef OB_CONV_H 20 #define OB_CONV_H 21 22 #include <openbabel/babelconfig.h> 23 24 #include <iostream> 25 #include <fstream> 26 #include <sstream> 27 #include <cassert> 28 29 #include <string> 30 #include <vector> 31 #include <map> 32 #ifdef HAVE_STRINGS_H 33 #include <strings.h> 34 #endif 35 36 #include <openbabel/oberror.h> 37 #include <openbabel/format.h> 38 #include <openbabel/lineend.h> 39 40 // These macros are used in DLL builds. If they have not 41 // been set in babelconfig.h, define them as nothing. 42 #ifndef OBCONV 43 #define OBCONV 44 #endif 45 #ifndef OBDLL 46 #define OBDLL 47 #endif 48 49 namespace OpenBabel { 50 51 // Needed to preserve deprecated API 52 typedef OBPlugin::PluginIterator Formatpos; 53 54 OBERROR extern OBMessageHandler obErrorLog; 55 56 //************************************************* 57 /// @brief Class to convert from one format to another. 58 // Class introduction in obconversion.cpp 59 class OBCONV OBConversion 60 { 61 /// @nosubgrouping 62 public: 63 /// @name Construction 64 //@{ 65 OBConversion(std::istream* is=nullptr, std::ostream* os=nullptr); 66 OBConversion(std::string inFilename, std::string outFilename=""); 67 /// @brief Copy constructor. Stream *ownership* is not copied. Source remains responsible for the memory. 68 OBConversion(const OBConversion& o); 69 /// @brief Assignment. Stream *ownership* is not copied. Source remains responsible for the memory. 70 OBConversion& operator=(const OBConversion& rhs); 71 72 virtual ~OBConversion(); 73 //@} 74 /// @name Collection of formats 75 //@{ 76 /// @brief Called once by each format class 77 static int RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME = nullptr); 78 /// @brief Searches registered formats 79 static OBFormat* FindFormat(const char* ID); 80 /// @brief Searches registered formats 81 /// \since version 2.3 82 static OBFormat* FindFormat(const std::string ID); 83 /// @brief Searches registered formats for an ID the same as the file extension 84 static OBFormat* FormatFromExt(const char* filename); 85 static OBFormat* FormatFromExt(const char* filename, bool& isgzip); 86 /// @brief Searches registered formats for an ID the same as the file extension 87 /// \since version 2.3 88 static OBFormat* FormatFromExt(const std::string filename); 89 static OBFormat* FormatFromExt(const std::string filename, bool& isgzip); 90 /// @brief Searches registered formats for a MIME the same as the chemical MIME type passed 91 static OBFormat* FormatFromMIME(const char* MIME); 92 93 ///Deprecated!.Repeatedly called to recover available Formats 94 #ifndef SWIG 95 static bool GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat); 96 #endif 97 //@} 98 99 /// @name Information 100 //@{ 101 static const char* Description(); //generic conversion options 102 //@} 103 104 /// These return a filtered stream for reading/writing (possible filters include compression, decompression, and newline transformation) 105 /// @name Parameter get and set 106 //@{ GetInStream()107 std::istream* GetInStream() const {return pInput;}; GetOutStream()108 std::ostream* GetOutStream() const {return pOutput;}; 109 110 /// @brief Set input stream. If takeOwnership is true, will deallocate when done. 111 /// If isGzipped is true, will treat as a gzipped stream regardless of option settings, 112 // if false, then will be treated as gzipped stream only if z/zin is set. 113 void SetInStream(std::istream* pIn, bool takeOwnership=false); 114 void SetOutStream(std::ostream* pOut, bool takeOwnership=false); 115 116 /// Sets the formats from their ids, e g CML 117 bool SetInAndOutFormats(const char* inID, const char* outID, bool ingzip=false, bool outgzip=false); 118 bool SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut, bool ingzip=false, bool outgzip=false); 119 /// Sets the input format from an id e.g. CML 120 bool SetInFormat(const char* inID, bool isgzip=false); 121 bool SetInFormat(OBFormat* pIn, bool isgzip=false); 122 /// Sets the output format from an id e.g. CML 123 bool SetOutFormat(const char* outID, bool isgzip=false); 124 bool SetOutFormat(OBFormat* pOut, bool isgzip=false); 125 GetInFormat()126 OBFormat* GetInFormat() const{return pInFormat;}; GetOutFormat()127 OBFormat* GetOutFormat() const{return pOutFormat;}; GetInGzipped()128 bool GetInGzipped() const{return inFormatGzip;}; GetOutGzipped()129 bool GetOutGzipped() const{return outFormatGzip;}; GetInFilename()130 std::string GetInFilename() const{return InFilename;}; GetOutFilename()131 std::string GetOutFilename() const{return OutFilename;}; 132 133 ///Get the position in the input stream of the object being read GetInPos()134 std::streampos GetInPos()const{return wInpos;}; 135 136 ///Get the length in the input stream of the object being read GetInLen()137 size_t GetInLen()const{return wInlen;}; 138 139 /// \return a default title which is the filename 140 const char* GetTitle() const; 141 142 ///@brief Extension method: deleted in ~OBConversion() GetAuxConv()143 OBConversion* GetAuxConv() const {return pAuxConv;}; SetAuxConv(OBConversion * pConv)144 void SetAuxConv(OBConversion* pConv) {pAuxConv=pConv;}; 145 //@} 146 /** @name Option handling 147 Three types of Option provide information and control instructions to the 148 conversion process, INOPTIONS, OUTOPTIONS, GENOPTIONS, and are stored in each 149 OBConversion object in separate maps. Each option has an id and an optional 150 text string. They are set individually by AddOption() or (rarely) collectively 151 in SetOptions(). Options cannot be altered but can be replaced with AddOption() 152 and deleted with RemoveOption(), which, however, should be used in an op derived 153 from OBOp (because of iterator invalidation). 154 155 If the "Convert" interface is used, the GENOPTIONS are acted upon in the 156 OBBase::DoTransformations() functions (currently only OBMol has one). This 157 happens after the object has been input but before it has been output. 158 All the options are available to input and output formats, etc. via the IsOption() 159 function, and the interpretation of any text string needs to be done subsequently. 160 161 In the commandline interface, options with single character ids are are indicated 162 like -s, and those with multiple character ids like --gen3D. An option may have 163 one or more parameters which appear, space separated, in the option's text string. 164 With babel, unless the option is at the end of the command, it is necessary for 165 the number of its parameters to be exactly that specified in RegisterOptionParam(). 166 The default is 0, but if it is more, and babel is likely to be used, this function 167 should be called in the constructor of a format or op. 168 With obabel (or the GUI), it is not necessary to call RegisterOptionParam(). 169 170 New GENOPTIONS can be defined (as plugins) using the class OBOp. 171 172 It is customary for a format or op to document any INOPTIONS or OUTPTIONS it 173 uses in its Description() function. As well as providing documentation during 174 use, this is also parsed by the GUI to construct its checkboxes,etc., so it is 175 advisable to give new Descriptions the same form as existing ones. 176 177 Some conversion options, such as -f, -l, -m, are unlikely to be used in 178 programming, but are listed in OBConversion::Description(). The built-in 179 GENOPTIONS for OBMol objects are listed in OBMol::ClassDescription() which 180 is in transform.cpp and also in this documentation under AddOption(). 181 */ 182 //@{ 183 ///@brief Three types of options set on the the command line by -a? , -x? , or -? 184 enum Option_type { INOPTIONS, OUTOPTIONS, GENOPTIONS, ALL }; 185 186 ///@brief Determine whether an option is set. \return NULL if option not and a pointer to the associated text if it is 187 const char* IsOption(const char* opt,Option_type opttyp=OUTOPTIONS); 188 189 ///@brief Access the map with option name as key and any associated text as value GetOptions(Option_type opttyp)190 const std::map<std::string,std::string>* GetOptions(Option_type opttyp) 191 { return &OptionsArray[opttyp];}; 192 193 ///@brief Set an option of specified type, with optional text 194 void AddOption(const char* opt, Option_type opttyp=OUTOPTIONS, const char* txt=nullptr); 195 196 bool RemoveOption(const char* opt, Option_type optype); 197 198 ///@brief Set several single character options of specified type from string like ab"btext"c"ctext" 199 void SetOptions(const char* options, Option_type opttyp); 200 201 ///@brief For example -h takes 0 parameters; -f takes 1. Call in a format constructor. 202 static void RegisterOptionParam(std::string name, OBFormat* pFormat, 203 int numberParams=0, Option_type typ=OUTOPTIONS); 204 205 /// \return the number of parameters registered for the option, or 0 if not found 206 static int GetOptionParams(std::string name, Option_type typ); 207 //@} 208 209 ///@brief Copies the options (by default of all types) from one OBConversion Object to another. 210 void CopyOptions(OBConversion* pSourceConv, Option_type typ=ALL); 211 212 /// @name Supported file format 213 //@{ 214 // @brief Set and return the list of supported input format 215 std::vector<std::string> GetSupportedInputFormat(); 216 // @brief Set and return the list of supported output format 217 std::vector<std::string> GetSupportedOutputFormat(); 218 //@} 219 220 /// @name Conversion 221 //@{ 222 /// @brief Conversion for single input and output stream 223 int Convert(std::istream* is, std::ostream* os); 224 225 /// @brief Conversion with existing streams 226 int Convert(); 227 228 /// @brief Conversion with multiple input/output files: 229 /// makes input and output streams, and carries out normal, batch, aggregation, and splitting conversion. 230 int FullConvert(std::vector<std::string>& FileList, 231 std::string& OutputFileName, std::vector<std::string>& OutputFileList); 232 //@} 233 234 /// @name Conversion loop control 235 //@{ 236 int AddChemObject(OBBase* pOb);///< @brief Adds to internal array during input 237 OBBase* GetChemObject(); ///< @brief Retrieve from internal array during output 238 bool IsLast();///< @brief True if no more objects to be output 239 bool IsFirstInput();///< @brief True if the first input object is being processed 240 void SetFirstInput(bool b=true);///< @brief Setwhether or not is the first input 241 int GetOutputIndex() const ;///< @brief Retrieves number of ChemObjects that have been actually output 242 void SetOutputIndex(int indx);///< @brief Sets output index (maybe to control whether seen as first object) 243 void SetMoreFilesToCome();///<@brief Used with multiple input files. Off by default. 244 void SetOneObjectOnly(bool b=true);///< @brief Used with multiple input files. Off by default. SetLast(bool b)245 void SetLast(bool b){SetOneObjectOnly(b);}///< @brief Synonym for SetOneObjectOnly() IsLastFile()246 bool IsLastFile(){ return !MoreFilesToCome;}///< @brief True if no more files to be read 247 /// @brief Number of objects read and processed 248 /// Incremented after options are processed, so 0 for first object. Returns -1 if Convert interface not used. GetCount()249 int GetCount()const { return Count; } 250 //@} 251 /// @name Convenience functions 252 //@{ 253 ///The default format is set in a single OBFormat class (generally it is OBMol) GetDefaultFormat()254 static OBFormat* GetDefaultFormat(){return OBFormat::FindType(nullptr);}; 255 256 /// @brief Outputs an object of a class derived from OBBase. 257 258 /// Part of "API" interface. 259 /// The output stream can be specified and the change is retained in the OBConversion instance 260 bool Write(OBBase* pOb, std::ostream* pout=nullptr); 261 262 /// @brief Outputs an object of a class derived from OBBase as a string 263 264 /// Part of "API" interface. 265 /// The output stream is temporarily changed to the string and then restored 266 /// This method is primarily intended for scripting languages without "stream" classes 267 /// The optional "trimWhitespace" parameter allows trailing whitespace to be removed 268 /// (e.g., in a SMILES string or InChI, etc.) 269 std::string WriteString(OBBase* pOb, bool trimWhitespace = false); 270 271 /// @brief Outputs an object of a class derived from OBBase as a file (with the supplied path) 272 273 /// Part of "API" interface. 274 /// The output stream is changed to the supplied file and the change is retained in the 275 /// OBConversion instance. 276 /// This method is primarily intended for scripting languages without "stream" classes 277 bool WriteFile(OBBase* pOb, std::string filePath); 278 279 /// @brief Manually closes and deletes the output stream 280 /// The file is closed anyway when in the OBConversion destructor or when WriteFile 281 /// is called again. 282 /// \since version 2.1 283 void CloseOutFile(); 284 285 /// @brief Reads an object of a class derived from OBBase into pOb. 286 287 /// Part of "API" interface. 288 /// The input stream can be specified and the change is retained in the OBConversion instance 289 /// \return false and pOb=NULL on error 290 bool Read(OBBase* pOb, std::istream* pin=nullptr); 291 292 /// Part of "API" interface. 293 /// The input stream can be specified and the change is retained in the OBConversion instance 294 /// \return NULL on error 295 // OBBase* ReadObject(std::istream* pin=NULL); 296 297 /// @brief Reads an object of a class derived from OBBase into pOb from the supplied string 298 299 /// Part of "API" interface. 300 /// \return false and pOb=NULL on error 301 /// This method is primarily intended for scripting languages without "stream" classes 302 /// Any existing input stream will be replaced by stringstream. 303 bool ReadString(OBBase* pOb, std::string input); 304 305 /// @brief Reads an object of a class derived from OBBase into pOb from the file specified 306 307 /// Part of "API" interface. 308 /// The output stream is changed to the supplied file and the change is retained in the 309 /// OBConversion instance. For multi-molecule files, the remaining molecules 310 /// can be read by repeatedly calling the Read() method. 311 /// \return false and pOb=NULL on error 312 /// This method is primarily intended for scripting languages without "stream" classes 313 bool ReadFile(OBBase* pOb, std::string filePath); 314 315 /// Part of the "Convert" interface. 316 /// Open the files and update the streams in the OBConversion object. 317 /// This method is primarily intended for scripting languages without "stream" classes 318 /// and will usually followed by a call to Convert(). 319 /// Will set format from file extension if format has not already been set. 320 /// Files will be opened even if format cannot be determined, but not if file path is empty. 321 /// \return false if unsuccessful. 322 bool OpenInAndOutFiles(std::string infilepath, std::string outfilepath); 323 324 /// @brief Sends a message like "2 molecules converted" to clog 325 /// The type of object is taken from the TargetClassDescription 326 /// of the specified class (or the output format if not specified)and 327 /// is appropriately singular or plural. 328 void ReportNumberConverted(int count, OBFormat* pFormat=nullptr); 329 330 /// \return the number of objects in the inputstream, 331 /// or -1 if error or if SkipObjects for the input format is not implemented 332 /// Adjusts for the value of -f and -l options (first and last objects). 333 int NumInputObjects(); 334 335 336 protected: 337 ///Replaces * in BaseName by InFile without extension and path 338 static std::string BatchFileName(std::string& BaseName, std::string& InFile); 339 ///Replaces * in BaseName by Count 340 static std::string IncrementedFileName(std::string& BaseName, const int Count); 341 ///Checks for misunderstandings when using the -m option 342 static bool CheckForUnintendedBatch(const std::string& infile, const std::string& outfile); 343 344 void ClearInStreams(); 345 //@} 346 347 protected: 348 349 //helper class for saving stream state 350 struct StreamState 351 { 352 std::ios *pStream; //active stream 353 std::vector<std::ios *> ownedStreams; //streams we own the memory to 354 StreamStateStreamState355 StreamState(): pStream(nullptr) {} ~StreamStateStreamState356 ~StreamState() 357 { 358 assert(ownedStreams.size() == 0); //should be popped 359 } 360 361 void pushInput(OBConversion& conv); 362 void popInput(OBConversion& conv); 363 364 void pushOutput(OBConversion& conv); 365 void popOutput(OBConversion& conv); 366 isSetStreamState367 bool isSet() const { return pStream != nullptr; } 368 }; 369 370 bool SetStartAndEnd(); 371 // static FMapType& FormatsMap();///<contains ID and pointer to all OBFormat classes 372 // static FMapType& FormatsMIMEMap();///<contains MIME and pointer to all OBFormat classes 373 typedef std::map<std::string,int> OPAMapType; 374 static OPAMapType& OptionParamArray(Option_type typ); 375 bool OpenAndSetFormat(bool SetFormat, std::ifstream* is, std::stringstream* ss=nullptr); 376 377 std::string InFilename, OutFilename; //OutFileName added v2.4.0 378 379 typedef FilteringInputStream< LineEndingExtractor > LEInStream; 380 381 std::istream *pInput; //input stream, may be filtered 382 std::vector<std::istream *> ownedInStreams; //streams we own the memory to 383 384 std::ostream *pOutput; //output stream, may have filters applied 385 std::vector<std::ostream *> ownedOutStreams; //streams we own the memory to 386 387 388 static OBFormat* pDefaultFormat; 389 OBFormat* pInFormat; 390 OBFormat* pOutFormat; 391 392 std::map<std::string,std::string> OptionsArray[3]; 393 394 int Index; 395 unsigned int StartNumber; 396 unsigned int EndNumber; 397 int Count; 398 bool m_IsFirstInput; 399 bool m_IsLast; 400 bool MoreFilesToCome; 401 bool OneObjectOnly; 402 bool ReadyToInput; 403 bool SkippedMolecules; /// skip molecules using -f and -l 404 405 //unlike the z and zin options, these are not sticky - setting formats will reset them 406 bool inFormatGzip; 407 bool outFormatGzip; 408 409 OBBase* pOb1; 410 std::streampos wInpos; ///<position in the input stream of the object being written 411 std::streampos rInpos; ///<position in the input stream of the object being read 412 size_t wInlen; ///<length in the input stream of the object being written 413 size_t rInlen; ///<length in the input stream of the object being read 414 415 OBConversion* pAuxConv;///<Way to extend OBConversion 416 417 std::vector<std::string> SupportedInputFormat; ///< list of supported input format 418 std::vector<std::string> SupportedOutputFormat; ///< list of supported output format 419 420 }; 421 422 } //namespace OpenBabel 423 #endif //OB_CONV_H 424 425 //! \file 426 //! \brief Handle file conversions. Declaration of OBFormat, OBConversion. 427 428 429