1 /********************************************************************** 2 obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol. 3 4 Copyright (C) 2005 Chris Morley 5 6 This file is part of the Open Babel project. 7 For more information, see <http://openbabel.org/> 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation version 2 of the License. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 ***********************************************************************/ 18 #include <openbabel/babelconfig.h> 19 #include <openbabel/obmolecformat.h> 20 #include <openbabel/mol.h> 21 #ifdef HAVE_SHARED_POINTER 22 #include <openbabel/reaction.h> 23 #endif 24 25 #include <algorithm> 26 27 using namespace std; 28 namespace OpenBabel 29 { 30 bool OBMoleculeFormat::OptionsRegistered=false; 31 std::map<std::string, OBMol*> OBMoleculeFormat::IMols; 32 OBMol* OBMoleculeFormat::_jmol; 33 std::vector<OBMol> OBMoleculeFormat::MolArray; 34 bool OBMoleculeFormat::StoredMolsReady=false; 35 ReadChemObjectImpl(OBConversion * pConv,OBFormat * pFormat)36 bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat) 37 { 38 std::istream *ifs = pConv->GetInStream(); 39 if (!ifs || !ifs->good()) 40 return false; 41 42 OBMol* pmol = new OBMol; 43 44 if(pConv->IsOption("C",OBConversion::GENOPTIONS)) 45 return DeferMolOutput(pmol, pConv, pFormat); 46 47 bool ret=true; 48 if(pConv->IsOption("separate",OBConversion::GENOPTIONS)) 49 { 50 //On first call, separate molecule and put fragments in MolArray. 51 //On subsequent calls, remove a fragment from MolArray and send it for writing 52 //Done this way so that each fragment can be written to its own file (with -m option) 53 if(!StoredMolsReady) 54 { 55 while(ret) //do all the molecules in the file 56 { 57 ret = pFormat->ReadMolecule(pmol,pConv); 58 59 if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK))) 60 { 61 vector<OBMol> SepArray = pmol->Separate(); //use un-transformed molecule 62 //Add an appropriate title to each fragment 63 if(SepArray.size()>1) 64 for (unsigned int i=0; i<SepArray.size(); ++i) 65 { 66 stringstream ss; 67 ss << pmol->GetTitle() << '#' << i+1; 68 string title = ss.str(); 69 SepArray[i].SetTitle(title); 70 } 71 else 72 SepArray[0].SetTitle(pmol->GetTitle()); 73 74 copy(SepArray.begin(),SepArray.end(),back_inserter(MolArray)); 75 } 76 } 77 reverse(MolArray.begin(),MolArray.end()); 78 StoredMolsReady = true; 79 //Clear the flags of the input stream(which may have found eof) to ensure will 80 //try to read anothe molecule and allow the stored ones to be sent for output. 81 pConv->GetInStream()->clear(); 82 } 83 84 if(MolArray.empty()) //normal end of fragments 85 ret =false; 86 else 87 { 88 // Copying is needed because the OBMol passed to AddChemObject will be deleted. 89 // The OBMol in the vector is deleted here. 90 OBMol* pMolCopy = new OBMol( MolArray.back()); 91 MolArray.pop_back(); 92 ret = pConv->AddChemObject( 93 pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv))!=0; 94 } 95 if(!ret) 96 StoredMolsReady = false; 97 98 delete pmol; 99 return ret; 100 } 101 102 ret=pFormat->ReadMolecule(pmol,pConv); 103 104 OBMol* ptmol = nullptr; 105 //Molecule is valid if it has some atoms 106 //or it represents a reaction 107 //or the format allows zero-atom molecules and it has a title or properties 108 if(ret && (pmol->NumAtoms() > 0 109 || pmol->IsReaction() 110 || (pFormat->Flags()&ZEROATOMSOK && (*pmol->GetTitle() || pmol->HasData(1))))) 111 { 112 ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS),pConv)); 113 if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS) 114 || pConv->IsOption("join",OBConversion::GENOPTIONS))) 115 { 116 //With j option, accumulate all mols in one stored in this class 117 if(pConv->IsFirstInput()) 118 _jmol = new OBMol; 119 pConv->AddChemObject(_jmol); 120 //will be discarded in WriteChemObjectImpl until the last input mol. This complication 121 //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object 122 //is zeroed at the end of a file and _jmol is in danger of not being output. 123 *_jmol += *ptmol; 124 delete ptmol; 125 return true; 126 } 127 } 128 else 129 delete pmol; 130 131 // Normal operation - send molecule to be written 132 ret = ret && (pConv->AddChemObject(ptmol)!=0); //success of both writing and reading 133 return ret; 134 } 135 WriteChemObjectImpl(OBConversion * pConv,OBFormat * pFormat)136 bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat) 137 { 138 if(pConv->IsOption("C",OBConversion::GENOPTIONS)) 139 return OutputDeferredMols(pConv); 140 if(pConv->IsOption("j",OBConversion::GENOPTIONS) 141 || pConv->IsOption("join",OBConversion::GENOPTIONS)) 142 { 143 //arrives here at the end of a file 144 if(!pConv->IsLast()) 145 return true; 146 bool ret=pFormat->WriteMolecule(_jmol,pConv); 147 pConv->SetOutputIndex(1); 148 delete _jmol; 149 return ret; 150 } 151 152 153 //Retrieve the target OBMol 154 OBBase* pOb = pConv->GetChemObject(); 155 156 OBMol* pmol = dynamic_cast<OBMol*> (pOb); 157 bool ret=false; 158 if(pmol) 159 { 160 if(pmol->NumAtoms()==0) 161 { 162 std::string auditMsg = "OpenBabel::Molecule "; 163 auditMsg += pmol->GetTitle(); 164 auditMsg += " has 0 atoms"; 165 obErrorLog.ThrowError(__FUNCTION__, 166 auditMsg, 167 obInfo); 168 } 169 ret=true; 170 171 ret = DoOutputOptions(pOb, pConv); 172 173 if(ret) 174 ret = pFormat->WriteMolecule(pmol,pConv); 175 } 176 177 #ifdef HAVE_SHARED_POINTER 178 //If sent a OBReaction* (rather than a OBMol*) output the consituent molecules 179 OBReaction* pReact = dynamic_cast<OBReaction*> (pOb); 180 if(pReact) 181 ret = OutputMolsFromReaction(pReact, pConv, pFormat); 182 #endif 183 delete pOb; 184 return ret; 185 } 186 DoOutputOptions(OBBase * pOb,OBConversion * pConv)187 bool OBMoleculeFormat::DoOutputOptions(OBBase* pOb, OBConversion* pConv) 188 { 189 if(pConv->IsOption("addoutindex", OBConversion::GENOPTIONS)) { 190 stringstream ss; 191 ss << pOb->GetTitle() << " " << pConv->GetOutputIndex(); 192 pOb->SetTitle(ss.str().c_str()); 193 } 194 195 OBMol* pmol = dynamic_cast<OBMol*> (pOb); 196 if(pmol) { 197 if(pConv->IsOption("writeconformers", OBConversion::GENOPTIONS)) { 198 //The last conformer is written in the calling function 199 int c = 0; 200 for (; c < pmol->NumConformers()-1; ++c) { 201 pmol->SetConformer(c); 202 if(!pConv->GetOutFormat()->WriteMolecule(pmol, pConv)) 203 break; 204 } 205 pmol->SetConformer(c); 206 } 207 } 208 return true; 209 } 210 211 /*! Instead of sending molecules for output via AddChemObject(), they are 212 saved in here in OBMoleculeFormat or discarded. By default they are 213 saved only if they are in the first input file. Parts of subsequent 214 molecules, such as chemical structure, coordinates and OBGenericData 215 can replace the parts in molecules with the same title that have already 216 been stored, subject to a set of rules. After all input files have been 217 read, the stored molecules (possibly now having augmented properties) are 218 sent to the output format. 219 220 Is a static function with *this as parameter so that it can be called from other 221 format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat. 222 */ DeferMolOutput(OBMol * pmol,OBConversion * pConv,OBFormat * pF)223 bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF ) 224 { 225 static bool IsFirstFile; 226 bool OnlyMolsInFirstFile=true; 227 228 if(pConv->IsFirstInput()) 229 { 230 IsFirstFile=true; 231 IMols.clear(); 232 pConv->AddOption("OutputAtEnd", OBConversion::GENOPTIONS); 233 } 234 else 235 { 236 if((std::streamoff)pConv->GetInStream()->tellg()<=0) 237 IsFirstFile=false;//File has changed 238 } 239 240 if (!pF->ReadMolecule(pmol,pConv)) 241 { 242 delete pmol; 243 return false; 244 } 245 const char* ptitle = pmol->GetTitle(); 246 if(*ptitle==0) 247 obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning); 248 else 249 { 250 string title(ptitle); 251 string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended 252 if(pos!=string::npos) 253 title.erase(pos); 254 255 map<std::string, OBMol*>::iterator itr; 256 itr = IMols.find(title); 257 if(itr!=IMols.end()) 258 { 259 //Molecule with the same title has been input previously: update it 260 OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol); 261 if(pNewMol) 262 { 263 delete itr->second; 264 IMols[title] = pNewMol; 265 } 266 else 267 { 268 //error: cleanup and return false 269 delete pmol; 270 return DeleteDeferredMols(); 271 } 272 } 273 else 274 { 275 //Molecule not already saved in IMols: save it if in first file 276 if(!OnlyMolsInFirstFile || IsFirstFile) 277 { 278 IMols[title] = pmol; 279 return true; //don't delete pmol 280 } 281 } 282 } 283 delete pmol; 284 return true; 285 } 286 287 /*! Makes a new OBMol on the heap by combining two molecules according to the rule below. 288 If both have OBGenericData of the same type, or OBPairData with the 289 same attribute, the version from pFirst is used. 290 Returns a pointer to a new OBMol which will need deleting by the calling program 291 (probably by being sent to an output format). 292 If the molecules cannot be regarded as being the same structure a NULL 293 pointer is returned and an error message logged. 294 295 pFirst and pSecond and the objects they point to are not changed. (const 296 modifiers difficult because class OBMol not designed appropriately) 297 298 Combining molecules: rules for each of the three parts 299 Title: 300 Use the title of pFirst unless it has none, when use that of pSecond. 301 Warning if neither molecule has a title. 302 303 Structure 304 - a structure with atoms replaces one with no atoms 305 - a structure with bonds replaces one with no bonds, 306 provided the formula is the same, else an error. 307 - structures with atoms and bonds are compared by InChI; error if not the same. 308 - a structure with 3D coordinates replaces one with 2D coordinates 309 - a structure with 2D coordinates replace one with 0D coordinates 310 311 OBGenericData 312 OBPairData 313 */ MakeCombinedMolecule(OBMol * pFirst,OBMol * pSecond)314 OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond) 315 { 316 //Decide on which OBMol provides the new title 317 string title("No title"); 318 if(*pFirst->GetTitle()!=0) 319 title = pFirst->GetTitle(); 320 else 321 { 322 if(*pSecond->GetTitle()!=0) 323 title = pSecond->GetTitle(); 324 else 325 obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning); 326 } 327 328 //Decide on which OBMol provides the new structure 329 bool swap=false; 330 if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0) 331 swap=true; 332 else if(pSecond->NumAtoms()!=0) 333 { 334 if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula()) 335 { 336 obErrorLog.ThrowError(__FUNCTION__, 337 "Molecules with name = " + title + " have different formula",obError); 338 return nullptr; 339 } 340 else 341 { 342 if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0) 343 swap=true; 344 else 345 { 346 //Compare by inchi; error if different NOT YET IMPLEMENTED 347 //Use the one with the higher dimension 348 if(pSecond->GetDimension() > pFirst->GetDimension()) 349 swap=true; 350 } 351 } 352 } 353 354 OBMol* pNewMol = new OBMol; 355 pNewMol->SetTitle(title); 356 357 OBMol* pMain = swap ? pSecond : pFirst; 358 OBMol* pOther = swap ? pFirst : pSecond; 359 360 *pNewMol = *pMain; //Now copies all data 361 362 //Copy some OBGenericData from the OBMol which did not provide the structure 363 vector<OBGenericData*>::iterator igd; 364 for(igd=pOther->BeginData();igd!=pOther->EndData();++igd) 365 { 366 //copy only if not already data of the same type from molecule already copied to pNewMol 367 unsigned datatype = (*igd)->GetDataType(); 368 OBGenericData* pData = pNewMol->GetData(datatype); 369 if(datatype==OBGenericDataType::PairData) 370 { 371 if(pData->GetAttribute() == (*igd)->GetAttribute()) 372 continue; 373 } 374 else if (pNewMol->GetData(datatype) != nullptr) 375 continue; 376 377 OBGenericData* pCopiedData = (*igd)->Clone(pNewMol); 378 pNewMol->SetData(pCopiedData); 379 } 380 return pNewMol; 381 } 382 OutputDeferredMols(OBConversion * pConv)383 bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv) 384 { 385 std::map<std::string, OBMol*>::iterator itr, lastitr; 386 bool ret=false; 387 int i=1; 388 lastitr = IMols.end(); 389 --lastitr; 390 pConv->SetOneObjectOnly(false); 391 for(itr=IMols.begin();itr!=IMols.end();++itr,++i) 392 { 393 if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv)) 394 continue; 395 pConv->SetOutputIndex(i); 396 if(itr==lastitr) 397 pConv->SetOneObjectOnly(); //to set IsLast 398 399 ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv); 400 401 delete itr->second; //always delete OBMol object 402 itr->second = nullptr; // so can be deleted in DeleteDeferredMols() 403 if (!ret) break; 404 } 405 DeleteDeferredMols();//cleans up in case there have been errors 406 return ret; 407 } 408 DeleteDeferredMols()409 bool OBMoleculeFormat::DeleteDeferredMols() 410 { 411 //Empties IMols, deteting the OBMol objects whose pointers are stored there 412 std::map<std::string, OBMol*>::iterator itr; 413 for(itr=IMols.begin();itr!=IMols.end();++itr) 414 { 415 delete itr->second; //usually NULL 416 } 417 IMols.clear(); 418 return false; 419 } 420 421 /////////////////////////////////////////////////////////////////// 422 #ifdef HAVE_SHARED_POINTER OutputMolsFromReaction(OBReaction * pReact,OBConversion * pConv,OBFormat * pFormat)423 bool OBMoleculeFormat::OutputMolsFromReaction 424 (OBReaction* pReact, OBConversion* pConv, OBFormat* pFormat) 425 { 426 //Output all the constituent molecules of the reaction 427 428 //Collect the molecules first, just for convenience 429 vector<obsharedptr<OBMol> > mols; 430 for(int i=0;i<pReact->NumReactants();i++) 431 mols.push_back(pReact->GetReactant(i)); 432 for(int i=0;i<pReact->NumProducts();i++) 433 mols.push_back(pReact->GetProduct(i)); 434 for (int i = 0; i<pReact->NumAgents(); i++) 435 mols.push_back(pReact->GetAgent(i)); 436 437 if(pReact->GetTransitionState()) 438 mols.push_back(pReact->GetTransitionState()); 439 440 pConv->SetOutputIndex(pConv->GetOutputIndex() - 1); // The OBReaction object is not output 441 if((pFormat->Flags() & WRITEONEONLY) && mols.size()>1) 442 { 443 stringstream ss; 444 ss << "There are " << mols.size() << " molecules to be output," 445 << "but this format is for single molecules only"; 446 obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning); 447 mols.resize(1); 448 } 449 bool ok = true; 450 for(unsigned int i=0;i<mols.size() && ok;++i) 451 { 452 if(mols[i]) 453 { 454 //Have to do set these manually because not using "Convert" interface 455 pConv->SetLast(i==mols.size()-1); 456 pConv->SetOutputIndex(pConv->GetOutputIndex()+1); 457 ok = pFormat->WriteMolecule( 458 mols[i]->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS), pConv),pConv); 459 } 460 } 461 return ok; 462 } 463 #endif 464 ////////////////////////////////////////////////////////////////// 465 /** Attempts to read the index file datafilename.obindx successively 466 from the following directories: 467 - the current directory 468 - that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR 469 if the environment variable is not set 470 - in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name 471 An index of type NameIndexType is then constructed. NameIndexType is defined 472 in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map. 473 In any case it is searched by 474 @code 475 NameIndexType::iterator itr = index.find(molecule_name); 476 if(itr!=index.end()) 477 unsigned pos_in_datafile = itr->second; 478 @endcode 479 pos_in_datafile is used as a parameter in seekg() to read from the datafile 480 481 If no index is found, it is constructed from the datafile by reading all of 482 it using the format pInFormat, and written to the directory containing the datafile. 483 This means that this function can be used without worrying whether there is an index. 484 It will be slow to execute the first time, but subsequent uses get the speed benefit 485 of indexed access to the datafile. 486 487 The serialization and de-serialization of the NameIndexType is entirely in 488 this routine and could possibly be improved. Currently re-hashing is done 489 every time the index is read. 490 **/ 491 ReadNameIndex(NameIndexType & index,const string & datafilename,OBFormat * pInFormat)492 bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index, 493 const string& datafilename, OBFormat* pInFormat) 494 { 495 struct headertype 496 { 497 char filename[256]; 498 size_t size; 499 } header; 500 501 NameIndexType::iterator itr; 502 503 ifstream indexstream; 504 OpenDatafile(indexstream, datafilename + ".obindx"); 505 if(!indexstream) 506 { 507 //Need to prepare the index 508 ifstream datastream; 509 string datafilepath = OpenDatafile(datastream, datafilename); 510 if(!datastream) 511 { 512 obErrorLog.ThrowError(__FUNCTION__, 513 datafilename + " was not found or could not be opened", obError); 514 return false; 515 } 516 517 OBConversion Conv(&datastream, nullptr); 518 Conv.SetInFormat(pInFormat); 519 OBMol mol; 520 streampos pos; 521 while(Conv.Read(&mol)) 522 { 523 string name = mol.GetTitle(); 524 if(!name.empty()) 525 index.insert(make_pair(name, pos)); 526 mol.Clear(); 527 pos = datastream.tellg(); 528 } 529 obErrorLog.ThrowError(__FUNCTION__, 530 "Prepared an index for " + datafilepath, obAuditMsg); 531 //Save index to file 532 ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary); 533 if(!dofs) return false; 534 535 strncpy(header.filename,datafilename.c_str(), sizeof(header.filename)); 536 header.filename[sizeof(header.filename) - 1] = '\0'; 537 header.size = index.size(); 538 dofs.write((const char*)&header, sizeof(headertype)); 539 540 for(itr=index.begin();itr!=index.end();++itr) 541 { 542 //#chars; chars; ofset(4bytes). 543 const char n = static_cast<char> (itr->first.size()); 544 dofs.put(n); 545 dofs.write(itr->first.c_str(),n); 546 dofs.write((const char*)&itr->second,sizeof(unsigned)); 547 } 548 } 549 else 550 { 551 //Read index data from file and put into hash_map 552 indexstream.read((char*)&header,sizeof(headertype)); 553 itr=index.begin(); // for hint 554 for(unsigned int i=0;i<header.size;++i) 555 { 556 char len; 557 indexstream.get(len); 558 string title(len, 0); 559 unsigned pos; 560 indexstream.read(&title[0],len); 561 indexstream.read((char*)&pos,sizeof(unsigned)); 562 index.insert(itr, make_pair(title,pos)); 563 } 564 } 565 return true; 566 } 567 568 } //namespace OpenBabel 569 570 //! \file obmolecformat.cpp 571 //! \brief Subclass of OBFormat for conversion of OBMol. 572