1 /**********************************************************************
2 obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.
3 
4 Copyright (C) 2005 Chris Morley
5 
6 This file is part of the Open Babel project.
7 For more information, see <http://openbabel.org/>
8 
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation version 2 of the License.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 ***********************************************************************/
18 #include <openbabel/babelconfig.h>
19 #include <openbabel/obmolecformat.h>
20 #include <openbabel/mol.h>
21 #ifdef HAVE_SHARED_POINTER
22   #include <openbabel/reaction.h>
23 #endif
24 
25 #include <algorithm>
26 
27 using namespace std;
28 namespace OpenBabel
29 {
30   bool OBMoleculeFormat::OptionsRegistered=false;
31   std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
32   OBMol* OBMoleculeFormat::_jmol;
33   std::vector<OBMol> OBMoleculeFormat::MolArray;
34   bool OBMoleculeFormat::StoredMolsReady=false;
35 
ReadChemObjectImpl(OBConversion * pConv,OBFormat * pFormat)36   bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
37   {
38     std::istream *ifs = pConv->GetInStream();
39     if (!ifs || !ifs->good())
40       return false;
41 
42     OBMol* pmol = new OBMol;
43 
44     if(pConv->IsOption("C",OBConversion::GENOPTIONS))
45       return DeferMolOutput(pmol, pConv, pFormat);
46 
47     bool ret=true;
48    if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
49    {
50      //On first call, separate molecule and put fragments in MolArray.
51      //On subsequent calls, remove a fragment from MolArray and send it for writing
52      //Done this way so that each fragment can be written to its own file (with -m option)
53      if(!StoredMolsReady)
54      {
55        while(ret) //do all the molecules in the file
56        {
57          ret = pFormat->ReadMolecule(pmol,pConv);
58 
59          if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK)))
60          {
61            vector<OBMol> SepArray = pmol->Separate(); //use un-transformed molecule
62            //Add an appropriate title to each fragment
63            if(SepArray.size()>1)
64              for (unsigned int i=0; i<SepArray.size(); ++i)
65              {
66                stringstream ss;
67                ss << pmol->GetTitle() << '#' << i+1;
68                string title = ss.str();
69                SepArray[i].SetTitle(title);
70              }
71            else
72               SepArray[0].SetTitle(pmol->GetTitle());
73 
74            copy(SepArray.begin(),SepArray.end(),back_inserter(MolArray));
75          }
76        }
77        reverse(MolArray.begin(),MolArray.end());
78        StoredMolsReady = true;
79        //Clear the flags of the input stream(which may have found eof) to ensure will
80        //try to read anothe molecule and allow the stored ones to be sent for output.
81        pConv->GetInStream()->clear();
82      }
83 
84      if(MolArray.empty()) //normal end of fragments
85        ret =false;
86      else
87      {
88        // Copying is needed because the OBMol passed to AddChemObject will be deleted.
89        // The OBMol in the vector is deleted here.
90        OBMol* pMolCopy = new OBMol( MolArray.back());
91        MolArray.pop_back();
92        ret = pConv->AddChemObject(
93            pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))!=0;
94      }
95      if(!ret)
96        StoredMolsReady = false;
97 
98      delete pmol;
99      return ret;
100    }
101 
102     ret=pFormat->ReadMolecule(pmol,pConv);
103 
104     OBMol* ptmol = nullptr;
105     //Molecule is valid if it has some atoms
106     //or it represents a reaction
107     //or the format allows zero-atom molecules and it has a title or properties
108     if(ret && (pmol->NumAtoms() > 0
109       || pmol->IsReaction()
110       || (pFormat->Flags()&ZEROATOMSOK && (*pmol->GetTitle() || pmol->HasData(1)))))
111     {
112       ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv));
113       if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS)
114                 || pConv->IsOption("join",OBConversion::GENOPTIONS)))
115       {
116         //With j option, accumulate all mols in one stored in this class
117         if(pConv->IsFirstInput())
118           _jmol = new OBMol;
119         pConv->AddChemObject(_jmol);
120         //will be discarded in WriteChemObjectImpl until the last input mol. This complication
121         //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
122         //is zeroed at the end of a file and _jmol is in danger of not being output.
123         *_jmol += *ptmol;
124         delete ptmol;
125         return true;
126       }
127     }
128     else
129       delete pmol;
130 
131     // Normal operation - send molecule to be written
132     ret = ret && (pConv->AddChemObject(ptmol)!=0); //success of both writing and reading
133     return ret;
134   }
135 
WriteChemObjectImpl(OBConversion * pConv,OBFormat * pFormat)136   bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
137   {
138     if(pConv->IsOption("C",OBConversion::GENOPTIONS))
139       return OutputDeferredMols(pConv);
140     if(pConv->IsOption("j",OBConversion::GENOPTIONS)
141         || pConv->IsOption("join",OBConversion::GENOPTIONS))
142       {
143         //arrives here at the end of a file
144         if(!pConv->IsLast())
145           return true;
146         bool ret=pFormat->WriteMolecule(_jmol,pConv);
147         pConv->SetOutputIndex(1);
148         delete _jmol;
149         return ret;
150       }
151 
152 
153     //Retrieve the target OBMol
154     OBBase* pOb = pConv->GetChemObject();
155 
156     OBMol* pmol = dynamic_cast<OBMol*> (pOb);
157     bool ret=false;
158     if(pmol)
159       {
160         if(pmol->NumAtoms()==0)
161           {
162             std::string auditMsg = "OpenBabel::Molecule ";
163             auditMsg += pmol->GetTitle();
164             auditMsg += " has 0 atoms";
165             obErrorLog.ThrowError(__FUNCTION__,
166                                   auditMsg,
167                                   obInfo);
168           }
169         ret=true;
170 
171         ret = DoOutputOptions(pOb, pConv);
172 
173         if(ret)
174           ret = pFormat->WriteMolecule(pmol,pConv);
175     }
176 
177 #ifdef HAVE_SHARED_POINTER
178     //If sent a OBReaction* (rather than a OBMol*) output the consituent molecules
179     OBReaction* pReact = dynamic_cast<OBReaction*> (pOb);
180     if(pReact)
181       ret = OutputMolsFromReaction(pReact, pConv, pFormat);
182 #endif
183     delete pOb;
184     return ret;
185   }
186 
DoOutputOptions(OBBase * pOb,OBConversion * pConv)187   bool OBMoleculeFormat::DoOutputOptions(OBBase* pOb, OBConversion* pConv)
188   {
189     if(pConv->IsOption("addoutindex", OBConversion::GENOPTIONS)) {
190       stringstream ss;
191       ss << pOb->GetTitle() << " " << pConv->GetOutputIndex();
192       pOb->SetTitle(ss.str().c_str());
193     }
194 
195     OBMol* pmol = dynamic_cast<OBMol*> (pOb);
196     if(pmol) {
197       if(pConv->IsOption("writeconformers", OBConversion::GENOPTIONS)) {
198         //The last conformer is written in the calling function
199         int c = 0;
200         for (; c < pmol->NumConformers()-1; ++c) {
201           pmol->SetConformer(c);
202           if(!pConv->GetOutFormat()->WriteMolecule(pmol, pConv))
203             break;
204         }
205         pmol->SetConformer(c);
206       }
207     }
208     return true;
209   }
210 
211   /*! Instead of sending molecules for output via AddChemObject(), they are
212     saved in here in OBMoleculeFormat or discarded. By default they are
213     saved only if they are in the first input file. Parts of subsequent
214     molecules, such as chemical structure, coordinates and OBGenericData
215     can replace the parts in molecules with the same title that have already
216     been stored, subject to a set of rules. After all input files have been
217     read, the stored molecules (possibly now having augmented properties) are
218     sent to the output format.
219 
220     Is a static function with *this as parameter so that it can be called from other
221     format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat.
222   */
DeferMolOutput(OBMol * pmol,OBConversion * pConv,OBFormat * pF)223   bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
224   {
225     static bool IsFirstFile;
226     bool OnlyMolsInFirstFile=true;
227 
228     if(pConv->IsFirstInput())
229       {
230         IsFirstFile=true;
231         IMols.clear();
232         pConv->AddOption("OutputAtEnd", OBConversion::GENOPTIONS);
233       }
234     else
235       {
236         if((std::streamoff)pConv->GetInStream()->tellg()<=0)
237           IsFirstFile=false;//File has changed
238       }
239 
240     if (!pF->ReadMolecule(pmol,pConv))
241       {
242         delete pmol;
243         return false;
244       }
245     const char* ptitle = pmol->GetTitle();
246     if(*ptitle==0)
247       obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
248     else
249       {
250         string title(ptitle);
251         string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
252         if(pos!=string::npos)
253           title.erase(pos);
254 
255         map<std::string, OBMol*>::iterator itr;
256         itr = IMols.find(title);
257         if(itr!=IMols.end())
258           {
259             //Molecule with the same title has been input previously: update it
260             OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
261             if(pNewMol)
262               {
263                 delete itr->second;
264                 IMols[title] = pNewMol;
265               }
266             else
267               {
268                 //error: cleanup and return false
269                 delete pmol;
270                 return DeleteDeferredMols();
271               }
272           }
273         else
274           {
275             //Molecule not already saved in IMols: save it if in first file
276             if(!OnlyMolsInFirstFile || IsFirstFile)
277               {
278                 IMols[title] = pmol;
279                 return true; //don't delete pmol
280               }
281           }
282       }
283     delete pmol;
284     return true;
285   }
286 
287   /*! Makes a new OBMol on the heap by combining two molecules according to the rule below.
288     If both have OBGenericData of the same type, or OBPairData with the
289     same attribute,  the version from pFirst is used.
290     Returns a pointer to a new OBMol which will need deleting by the calling program
291     (probably by being sent to an output format).
292     If the molecules cannot be regarded as being the same structure a NULL
293     pointer is returned and an error message logged.
294 
295     pFirst and pSecond and the objects they point to are not changed. (const
296     modifiers difficult because class OBMol not designed appropriately)
297 
298     Combining molecules: rules for each of the three parts
299     Title:
300     Use the title of pFirst unless it has none, when use that of pSecond.
301     Warning if neither molecule has a title.
302 
303     Structure
304     - a structure with atoms replaces one with no atoms
305     - a structure with bonds replaces one with no bonds,
306     provided the formula is the same, else an error.
307     - structures with atoms and bonds are compared by InChI; error if not the same.
308     - a structure with 3D coordinates replaces one with 2D coordinates
309     - a structure with 2D coordinates replace one with 0D coordinates
310 
311     OBGenericData
312     OBPairData
313   */
MakeCombinedMolecule(OBMol * pFirst,OBMol * pSecond)314   OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
315   {
316     //Decide on which OBMol provides the new title
317     string title("No title");
318     if(*pFirst->GetTitle()!=0)
319       title = pFirst->GetTitle();
320     else
321       {
322         if(*pSecond->GetTitle()!=0)
323           title = pSecond->GetTitle();
324         else
325           obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
326       }
327 
328     //Decide on which OBMol provides the new structure
329     bool swap=false;
330     if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
331       swap=true;
332     else if(pSecond->NumAtoms()!=0)
333       {
334         if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
335           {
336             obErrorLog.ThrowError(__FUNCTION__,
337                                   "Molecules with name = " + title + " have different formula",obError);
338             return nullptr;
339           }
340         else
341           {
342             if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
343               swap=true;
344             else
345               {
346                 //Compare by inchi; error if different NOT YET IMPLEMENTED
347                 //Use the one with the higher dimension
348                 if(pSecond->GetDimension() > pFirst->GetDimension())
349                   swap=true;
350               }
351           }
352       }
353 
354     OBMol* pNewMol = new OBMol;
355     pNewMol->SetTitle(title);
356 
357     OBMol* pMain = swap ? pSecond : pFirst;
358     OBMol* pOther = swap ? pFirst : pSecond;
359 
360     *pNewMol = *pMain; //Now copies all data
361 
362     //Copy some OBGenericData from the OBMol which did not provide the structure
363     vector<OBGenericData*>::iterator igd;
364     for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
365       {
366         //copy only if not already data of the same type from molecule already copied to pNewMol
367         unsigned datatype = (*igd)->GetDataType();
368         OBGenericData* pData = pNewMol->GetData(datatype);
369         if(datatype==OBGenericDataType::PairData)
370           {
371             if(pData->GetAttribute() == (*igd)->GetAttribute())
372               continue;
373           }
374         else if (pNewMol->GetData(datatype) != nullptr)
375           continue;
376 
377         OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
378         pNewMol->SetData(pCopiedData);
379       }
380     return pNewMol;
381   }
382 
OutputDeferredMols(OBConversion * pConv)383   bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
384   {
385     std::map<std::string, OBMol*>::iterator itr, lastitr;
386     bool ret=false;
387     int i=1;
388     lastitr = IMols.end();
389     --lastitr;
390     pConv->SetOneObjectOnly(false);
391     for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
392       {
393         if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))
394           continue;
395         pConv->SetOutputIndex(i);
396         if(itr==lastitr)
397           pConv->SetOneObjectOnly(); //to set IsLast
398 
399         ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);
400 
401         delete itr->second; //always delete OBMol object
402         itr->second = nullptr; // so can be deleted in DeleteDeferredMols()
403         if (!ret) break;
404       }
405     DeleteDeferredMols();//cleans up in case there have been errors
406     return ret;
407   }
408 
DeleteDeferredMols()409   bool OBMoleculeFormat::DeleteDeferredMols()
410   {
411     //Empties IMols, deteting the OBMol objects whose pointers are stored there
412     std::map<std::string, OBMol*>::iterator itr;
413     for(itr=IMols.begin();itr!=IMols.end();++itr)
414       {
415         delete itr->second; //usually NULL
416       }
417     IMols.clear();
418     return false;
419   }
420 
421   ///////////////////////////////////////////////////////////////////
422 #ifdef HAVE_SHARED_POINTER
OutputMolsFromReaction(OBReaction * pReact,OBConversion * pConv,OBFormat * pFormat)423   bool OBMoleculeFormat::OutputMolsFromReaction
424     (OBReaction* pReact, OBConversion* pConv, OBFormat* pFormat)
425   {
426     //Output all the constituent molecules of the reaction
427 
428     //Collect the molecules first, just for convenience
429     vector<obsharedptr<OBMol> > mols;
430     for(int i=0;i<pReact->NumReactants();i++)
431       mols.push_back(pReact->GetReactant(i));
432     for(int i=0;i<pReact->NumProducts();i++)
433       mols.push_back(pReact->GetProduct(i));
434     for (int i = 0; i<pReact->NumAgents(); i++)
435       mols.push_back(pReact->GetAgent(i));
436 
437     if(pReact->GetTransitionState())
438       mols.push_back(pReact->GetTransitionState());
439 
440     pConv->SetOutputIndex(pConv->GetOutputIndex() - 1); // The OBReaction object is not output
441     if((pFormat->Flags() & WRITEONEONLY) && mols.size()>1)
442     {
443       stringstream ss;
444       ss << "There are " << mols.size() << " molecules to be output,"
445          << "but this format is for single molecules only";
446       obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
447       mols.resize(1);
448     }
449     bool ok = true;
450     for(unsigned int i=0;i<mols.size() && ok;++i)
451     {
452       if(mols[i])
453       {
454         //Have to do set these manually because not using "Convert" interface
455         pConv->SetLast(i==mols.size()-1);
456         pConv->SetOutputIndex(pConv->GetOutputIndex()+1);
457         ok = pFormat->WriteMolecule(
458           mols[i]->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv),pConv);
459       }
460     }
461     return ok;
462   }
463 #endif
464   //////////////////////////////////////////////////////////////////
465   /** Attempts to read the index file datafilename.obindx successively
466       from the following directories:
467       - the current directory
468       - that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
469       if the environment variable is not set
470       - in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
471       An index of type NameIndexType is then constructed. NameIndexType is defined
472       in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map.
473       In any case it is searched by
474       @code
475       NameIndexType::iterator itr = index.find(molecule_name);
476       if(itr!=index.end())
477       unsigned pos_in_datafile = itr->second;
478       @endcode
479       pos_in_datafile is used as a parameter in seekg() to read from the datafile
480 
481       If no index is found, it is constructed from the datafile by reading all of
482       it using the format pInFormat, and written to the directory containing the datafile.
483       This means that this function can be used without worrying whether there is an index.
484       It will be slow to execute the first time, but subsequent uses get the speed benefit
485       of indexed access to the datafile.
486 
487       The serialization and de-serialization of the NameIndexType is entirely in
488       this routine and could possibly be improved. Currently re-hashing is done
489       every time the index is read.
490   **/
491 
ReadNameIndex(NameIndexType & index,const string & datafilename,OBFormat * pInFormat)492   bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
493                                        const string& datafilename, OBFormat* pInFormat)
494   {
495     struct headertype
496     {
497       char filename[256];
498       size_t size;
499     } header;
500 
501     NameIndexType::iterator itr;
502 
503     ifstream indexstream;
504     OpenDatafile(indexstream, datafilename + ".obindx");
505     if(!indexstream)
506       {
507         //Need to prepare the index
508         ifstream datastream;
509         string datafilepath = OpenDatafile(datastream, datafilename);
510         if(!datastream)
511           {
512             obErrorLog.ThrowError(__FUNCTION__,
513                                   datafilename + " was not found or could not be opened",  obError);
514             return false;
515           }
516 
517         OBConversion Conv(&datastream, nullptr);
518         Conv.SetInFormat(pInFormat);
519         OBMol mol;
520         streampos pos;
521         while(Conv.Read(&mol))
522           {
523             string name = mol.GetTitle();
524             if(!name.empty())
525               index.insert(make_pair(name, pos));
526             mol.Clear();
527             pos = datastream.tellg();
528           }
529         obErrorLog.ThrowError(__FUNCTION__,
530                               "Prepared an index for " + datafilepath, obAuditMsg);
531         //Save index to file
532         ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary);
533         if(!dofs) return false;
534 
535         strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
536         header.filename[sizeof(header.filename) - 1] = '\0';
537         header.size = index.size();
538         dofs.write((const char*)&header, sizeof(headertype));
539 
540         for(itr=index.begin();itr!=index.end();++itr)
541           {
542             //#chars; chars;  ofset(4bytes).
543             const char n = static_cast<char> (itr->first.size());
544             dofs.put(n);
545             dofs.write(itr->first.c_str(),n);
546             dofs.write((const char*)&itr->second,sizeof(unsigned));
547           }
548       }
549     else
550       {
551         //Read index data from file and put into hash_map
552         indexstream.read((char*)&header,sizeof(headertype));
553         itr=index.begin(); // for hint
554         for(unsigned int i=0;i<header.size;++i)
555           {
556             char len;
557             indexstream.get(len);
558             string title(len, 0);
559             unsigned pos;
560             indexstream.read(&title[0],len);
561             indexstream.read((char*)&pos,sizeof(unsigned));
562             index.insert(itr, make_pair(title,pos));
563           }
564       }
565     return true;
566   }
567 
568 } //namespace OpenBabel
569 
570 //! \file obmolecformat.cpp
571 //! \brief Subclass of OBFormat for conversion of OBMol.
572