1 /********************************************************************** 2 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. 3 Some portions Copyright (C) 2001-2007 by Geoffrey R. Hutchison 4 Some portions Copyright (C) 2004 by Chris Morley 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation version 2 of the License. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 ***********************************************************************/ 15 #include <openbabel/babelconfig.h> 16 17 #include <openbabel/obmolecformat.h> 18 #include <openbabel/mol.h> 19 #include <openbabel/atom.h> 20 #include <openbabel/bond.h> 21 #include <openbabel/obiter.h> 22 #include <openbabel/elements.h> 23 #include <openbabel/generic.h> 24 #include <openbabel/kekulize.h> 25 #include <openbabel/obfunctions.h> 26 #include <openbabel/data.h> 27 #include <cstdlib> 28 29 using namespace std; 30 namespace OpenBabel 31 { 32 //The routine WriteSmiOrderedMol2() in the original mol2.cpp is presumably 33 //another output format, but was not made available in version 100.1.2, nor 34 //is it here. 35 36 class MOL2Format : public OBMoleculeFormat 37 { 38 public: 39 //Register this format type ID MOL2Format()40 MOL2Format() 41 { 42 OBConversion::RegisterFormat("mol2",this, "chemical/x-mol2"); 43 OBConversion::RegisterFormat("ml2",this); 44 OBConversion::RegisterFormat("sy2",this); 45 OBConversion::RegisterOptionParam("c", this, 0, OBConversion::INOPTIONS); 46 OBConversion::RegisterOptionParam("c", this, 0, OBConversion::OUTOPTIONS); 47 OBConversion::RegisterOptionParam("l", this, 0, OBConversion::OUTOPTIONS); 48 OBConversion::RegisterOptionParam("u", this, 0, OBConversion::OUTOPTIONS); 49 } 50 Description()51 virtual const char* Description() //required 52 { 53 return 54 "Sybyl Mol2 format\n" 55 "Read Options e.g. -ac\n" 56 " c Read UCSF Dock scores saved in comments preceding molecules\n\n" 57 "Write Options e.g. -xl\n" 58 " l Output ignores residue information (only ligands)\n" 59 " c Write UCSF Dock scores saved in comments preceding molecules\n" 60 " u Do not write formal charge information in UNITY records\n\n"; 61 }; 62 SpecificationURL()63 virtual const char* SpecificationURL() 64 { 65 return "http://www.tripos.com/data/support/mol2.pdf"; 66 }; //optional 67 GetMIMEType()68 virtual const char* GetMIMEType() 69 { return "chemical/x-mol2"; }; 70 71 virtual int SkipObjects(int n, OBConversion* pConv); 72 73 //*** This section identical for most OBMol conversions *** 74 //////////////////////////////////////////////////// 75 /// The "API" interface functions 76 virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv); 77 virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); 78 }; 79 //*** 80 81 //Make an instance of the format class 82 MOL2Format theMOL2Format; 83 84 // Helper function for ReadMolecule 85 // \return Is this atom a sulfur in a (di)thiocarboxyl (-CS2, -COS, CS2H or COSH) group? IsThiocarboxylSulfur(OBAtom * queryatom)86 static bool IsThiocarboxylSulfur(OBAtom* queryatom) 87 { 88 if (queryatom->GetAtomicNum() != OBElements::Sulfur) 89 return(false); 90 if (queryatom->GetHvyDegree() != 1) 91 return(false); 92 93 OBAtom *atom = nullptr; 94 OBBond *bond; 95 OBBondIterator i; 96 97 for (bond = queryatom->BeginBond(i); bond; bond = queryatom->NextBond(i)) 98 if ((bond->GetNbrAtom(queryatom))->GetAtomicNum() == OBElements::Carbon) 99 { 100 atom = bond->GetNbrAtom(queryatom); 101 break; 102 } 103 if (!atom) 104 return(false); 105 if (!(atom->CountFreeSulfurs() == 2) 106 && !(atom->CountFreeOxygens() == 1 && atom->CountFreeSulfurs() == 1)) 107 return(false); 108 109 //atom is connected to a carbon that has a total 110 //of 2 attached free sulfurs or 1 free oxygen and 1 free sulfur 111 return(true); 112 } 113 IsOxygenOrSulfur(OBAtom * atom)114 static bool IsOxygenOrSulfur(OBAtom *atom) 115 { 116 switch (atom->GetAtomicNum()) { 117 case 8: case 16: return true; 118 default: return false; 119 } 120 } 121 GetAtomicNumAndIsotope(const char * symbol,int * isotope)122 static unsigned int GetAtomicNumAndIsotope(const char* symbol, int *isotope) 123 { 124 const char* p = symbol; 125 switch (p[0]) { 126 case 'D': 127 if (p[1] == '\0') { 128 *isotope = 2; 129 return 1; 130 } 131 break; 132 case 'T': 133 if (p[1] == '\0') { 134 *isotope = 3; 135 return 1; 136 } 137 break; 138 } 139 return OBElements::GetAtomicNum(symbol); 140 } 141 142 //read from ifs until next rti is found and return it read_until_rti(istream & ifs)143 static string read_until_rti(istream & ifs) 144 { 145 char buffer[BUFF_SIZE]; 146 for (;;) 147 { 148 if (!ifs.getline(buffer,BUFF_SIZE)) 149 return ""; 150 if (!strncmp(buffer,"@<TRIPOS>",9)) 151 return string(buffer); 152 } 153 } 154 ///////////////////////////////////////////////////////////////// ReadMolecule(OBBase * pOb,OBConversion * pConv)155 bool MOL2Format::ReadMolecule(OBBase* pOb, OBConversion* pConv) 156 { 157 158 OBMol* pmol = pOb->CastAndClear<OBMol>(); 159 if (pmol == nullptr) 160 return false; 161 162 //Define some references so we can use the old parameter names 163 istream &ifs = *pConv->GetInStream(); 164 OBMol &mol = *pmol; 165 166 //Old code follows... 167 bool foundAtomLine = false; 168 char buffer[BUFF_SIZE]; 169 char *comment = nullptr; 170 string str,str1; 171 vector<string> vstr; 172 int len; 173 174 // Prevent reperception 175 mol.SetChainsPerceived(); 176 177 mol.BeginModify(); 178 179 for (;;) 180 { 181 if (!ifs.getline(buffer,BUFF_SIZE)) 182 return(false); 183 if (pConv->IsOption("c", OBConversion::INOPTIONS) != nullptr && EQn(buffer, "###########", 10)) 184 { 185 char attr[32], val[32]; 186 sscanf(buffer, "########## %[^:]:%s", attr, val); 187 OBPairData *dd = new OBPairData; 188 dd->SetAttribute(attr); 189 dd->SetValue(val); 190 dd->SetOrigin(fileformatInput); 191 mol.SetData(dd); 192 } 193 if (EQn(buffer,"@<TRIPOS>MOLECULE",17)) 194 break; 195 } 196 197 // OK, just read MOLECULE line 198 int lcount; 199 int natoms,nbonds; 200 bool hasPartialCharges = true; 201 for (lcount=0;;lcount++) 202 { 203 if (!ifs.getline(buffer,BUFF_SIZE)) 204 return(false); 205 if (EQn(buffer,"@<TRIPOS>ATOM",13)) 206 { 207 foundAtomLine = true; 208 break; 209 } 210 211 if (lcount == 0) 212 { 213 tokenize(vstr,buffer); 214 if (!vstr.empty()) 215 mol.SetTitle(buffer); 216 } 217 else if (lcount == 1) 218 sscanf(buffer,"%d%d",&natoms,&nbonds); 219 else if (lcount == 3) // charge descriptions 220 { 221 // Annotate origin of partial charges 222 OBPairData *dp = new OBPairData; 223 dp->SetAttribute("PartialCharges"); 224 dp->SetValue(buffer); 225 dp->SetOrigin(fileformatInput); 226 mol.SetData(dp); 227 228 if (strncasecmp(buffer, "NO_CHARGES", 10) == 0) 229 hasPartialCharges = false; 230 } 231 else if (lcount == 4) //energy (?) 232 { 233 tokenize(vstr,buffer); 234 if (!vstr.empty() && vstr.size() == 3) 235 if (vstr[0] == "Energy") 236 mol.SetEnergy(atof(vstr[2].c_str())); 237 } 238 else if (lcount == 5) //comment 239 { 240 if ( buffer[0] ) 241 { 242 len = (int) strlen(buffer)+1; 243 //! @todo allow better multi-line comments 244 // which don't allow ill-formed data to consume memory 245 // Thanks to Andrew Dalke for the pointer 246 if (comment != nullptr) 247 delete [] comment; 248 comment = new char [len]; 249 memcpy(comment,buffer,len); 250 } 251 } 252 } 253 254 if (!foundAtomLine) 255 { 256 mol.EndModify(); 257 mol.Clear(); 258 obErrorLog.ThrowError(__FUNCTION__, "Unable to read Mol2 format file. No atoms found.", obWarning); 259 return(false); 260 } 261 262 mol.ReserveAtoms(natoms); 263 264 int i; 265 vector3 v; 266 OBAtom atom; 267 double x,y,z,pcharge; 268 char temp_type[BUFF_SIZE], resname[BUFF_SIZE], atmid[BUFF_SIZE]; 269 int elemno, resnum = -1; 270 int isotope = 0; 271 bool has_explicit_hydrogen = false; 272 bool has_residue_information = false; 273 274 ttab.SetFromType("SYB"); 275 for (i = 0;i < natoms;i++) 276 { 277 if (!ifs.getline(buffer,BUFF_SIZE)) 278 return(false); 279 sscanf(buffer," %*s %1024s %lf %lf %lf %1024s %d %1024s %lf", 280 atmid, &x,&y,&z, temp_type, &resnum, resname, &pcharge); 281 282 atom.SetVector(x, y, z); 283 atom.SetFormalCharge(0); 284 285 // Handle "CL" and "BR" and other mis-typed atoms 286 str = temp_type; 287 if (strncmp(temp_type, "CL", 2) == 0) { 288 str = "Cl"; 289 } else if (strncmp(temp_type,"BR",2) == 0) { 290 str = "Br"; 291 } else if (strncmp(temp_type,"S.o2", 4) == 0) { 292 str = "S.O2"; 293 } else if (strncmp(temp_type,"S.o", 3) == 0) { 294 str = "S.O"; 295 } else if (strncmp(temp_type,"SI", 2) == 0) { 296 str = "Si"; 297 // The following cases are entries which are not in openbabel/data/types.txt 298 // and should probably be added there 299 } else if (strncmp(temp_type,"S.1", 3) == 0) { 300 str = "S.2"; // no idea what the best type might be here 301 } else if (strncmp(temp_type,"P.", 2) == 0) { 302 str = "P.3"; 303 } else if (strncasecmp(temp_type,"Ti.", 3) == 0) { // e.g. Ti.th 304 str = "Ti"; 305 } else if (strncasecmp(temp_type,"Ru.", 3) == 0) { // e.g. Ru.oh 306 str = "Ru"; 307 // Fixes PR#3557898 308 } else if (strncmp(temp_type, "N.4", 3) == 0) { 309 atom.SetFormalCharge(1); 310 } 311 312 ttab.SetToType("ATN"); 313 ttab.Translate(str1,str); 314 elemno = atoi(str1.c_str()); 315 ttab.SetToType("IDX"); 316 317 // We might have missed some SI or FE type things above, so here's 318 // another check 319 if( !elemno && isupper(temp_type[1]) ) 320 { 321 temp_type[1] = (char)tolower(temp_type[1]); 322 str = temp_type; 323 ttab.SetToType("ATN"); 324 ttab.Translate(str1,str); 325 elemno = atoi(str1.c_str()); 326 ttab.SetToType("IDX"); 327 } 328 // One last check if there isn't a period in the type, 329 // it's a malformed atom type, but it may be the element symbol 330 // GaussView does this (PR#1739905) 331 if ( !elemno ) { 332 // check if it's "Du" or "Xx" and the element is in the atom name 333 if (str == "Du" || str == "Xx") { 334 str = atmid; 335 for (unsigned int i = 0; i < str.length(); ++i) 336 if (!isalpha(str[i])) { 337 str.erase(i); 338 break; // we've erased the end of the string 339 } 340 } 341 342 343 std::stringstream errorMsg; 344 errorMsg << "This Mol2 file is non-standard. Problem with molecule: " 345 << mol.GetTitle() 346 << " Cannot interpret atom types correctly, instead attempting to interpret atom type: " 347 << str << " as elements instead."; 348 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); 349 350 string::size_type dotPos = str.find('.'); 351 if (dotPos == string::npos) { 352 elemno = GetAtomicNumAndIsotope(str.c_str(), &isotope); 353 } 354 } 355 356 atom.SetAtomicNum(elemno); 357 if (isotope) 358 atom.SetIsotope(isotope); 359 else if (elemno == 1) 360 has_explicit_hydrogen = true; 361 ttab.SetToType("INT"); 362 ttab.Translate(str1,str); 363 atom.SetType(str1); 364 atom.SetPartialCharge(pcharge); 365 // MMFF94 has different atom types for Cu(I) and Cu(II) 366 // as well as for Fe(II) and Fe(III), so the correct formal 367 // charge is needed for correct atom type assignment 368 if (str1 == "Cu" || str1 == "Fe") 369 atom.SetFormalCharge((int)pcharge); 370 if (!mol.AddAtom(atom)) 371 return(false); 372 if (!IsNearZero(pcharge)) 373 hasPartialCharges = true; 374 375 // Add residue information if it exists 376 if (resnum != -1 && resnum != 0 && 377 strlen(resname) != 0 && strncmp(resname,"<1>", 3) != 0) 378 { 379 has_residue_information = true; 380 OBResidue *res = (mol.NumResidues() > 0) ? 381 mol.GetResidue(mol.NumResidues()-1) : nullptr; 382 if (res == nullptr || res->GetName() != resname || 383 res->GetNum() != resnum) 384 { 385 vector<OBResidue*>::iterator ri; 386 for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri)) 387 if (res->GetName() == resname && 388 res->GetNum() == resnum) 389 break; 390 391 if (res == nullptr) 392 { 393 res = mol.NewResidue(); 394 res->SetName(resname); 395 res->SetNum(resnum); 396 } 397 } 398 OBAtom *atomPtr = mol.GetAtom(mol.NumAtoms()); 399 res->AddAtom(atomPtr); 400 res->SetAtomID(atomPtr, atmid); 401 } // end adding residue info 402 } 403 404 string nextrti; 405 do { nextrti = read_until_rti(ifs); } 406 while(nextrti != "@<TRIPOS>UNITY_ATOM_ATTR" && nextrti != "@<TRIPOS>BOND" && nextrti.length() > 0); 407 408 if(nextrti == "@<TRIPOS>UNITY_ATOM_ATTR") 409 { //read in formal charge information, must be done before Kekulization 410 int aid = 0, num = 0; 411 while (ifs.peek() != '@' && ifs.getline(buffer,BUFF_SIZE)) 412 { 413 sscanf(buffer,"%d %d",&aid, &num); 414 for(int i = 0; i < num; i++) 415 { 416 if (!ifs.getline(buffer,BUFF_SIZE)) 417 return(false); 418 if(strncmp(buffer, "charge", 6) == 0) 419 { 420 int charge = 0; 421 sscanf(buffer,"%*s %d",&charge); 422 if(aid <= mol.NumAtoms()) 423 { 424 OBAtom *atom = mol.GetAtom(aid); 425 atom->SetFormalCharge(charge); 426 } 427 } 428 } 429 } 430 } 431 432 while(nextrti != "@<TRIPOS>BOND" && nextrti.length() > 0) 433 nextrti = read_until_rti(ifs); 434 435 if(nextrti != "@<TRIPOS>BOND") 436 return false; 437 438 int start, end; 439 bool needs_kekulization = false; 440 for (i = 0; i < nbonds; i++) 441 { 442 if (!ifs.getline(buffer,BUFF_SIZE)) 443 return(false); 444 445 sscanf(buffer,"%*d %d %d %1024s",&start,&end,temp_type); 446 str = temp_type; 447 unsigned int flags = 0; 448 int order; 449 if (str == "ar" || str == "AR" || str == "Ar") { 450 order = 1; 451 flags = OB_AROMATIC_BOND; 452 needs_kekulization = true; 453 } 454 else if (str == "AM" || str == "am" || str == "Am") 455 order = 1; 456 else 457 order = atoi(str.c_str()); 458 459 mol.AddBond(start, end, order, flags); 460 } 461 462 // TODO: Add a test case for the statement below of Paolo Tosco 463 // - I am currently assuming that is not a problem for the 464 // the current kekulization code, but it needs to be 465 // checked 466 // "Make a pass to ensure that there are no double bonds 467 // between atoms which are also involved in aromatic bonds 468 // as that may ill-condition kekulization (fixes potential 469 // issues with molecules like CEWYIM30 (MMFF94 validation suite)" 470 471 mol.SetAromaticPerceived(); // don't trigger reperception 472 473 if (has_explicit_hydrogen) { 474 FOR_ATOMS_OF_MOL(atom, mol) { 475 unsigned int total_valence = atom->GetTotalDegree(); 476 switch (atom->GetAtomicNum()) { 477 case 8: 478 if (total_valence != 1) continue; 479 if (strcmp(atom->GetType(), "O2") != 0) continue; // TODO: the O.co2 type is lost by this point 480 { 481 OBAtomBondIter bit(&*atom); 482 if (!bit->IsAromatic() && bit->GetBondOrder() == 1) 483 atom->SetFormalCharge(-1); // set -1 charge on dangling O.co2 484 } 485 break; 486 case 17: // Cl 487 if (total_valence == 0) 488 atom->SetFormalCharge(-1); 489 break; 490 } 491 } 492 } 493 494 // Kekulization is necessary if an aromatic bond is present 495 if (needs_kekulization) { 496 // "de-aromatize" carboxylates and (di)thiocarboxylates 497 // The typical case (in our test suite anyway) is a carboxylate binding to 498 // a metal ion. The two O's have charges of -0.5 and the bond orders are aromatic 499 FOR_ATOMS_OF_MOL(atom, mol) { 500 OBAtom* oxygenOrSulfur = &*atom; 501 // Look first for a terminal O/S 502 if (!IsOxygenOrSulfur(oxygenOrSulfur) || oxygenOrSulfur->GetTotalDegree() != 1) continue; 503 OBAtomBondIter bitA(oxygenOrSulfur); 504 OBBond *bondA = &*bitA; 505 if (!bondA->IsAromatic()) continue; 506 // Look for the carbon 507 OBAtom *carbon = bondA->GetNbrAtom(oxygenOrSulfur); 508 if (carbon->GetAtomicNum() != 6) continue; 509 // Look for the other oxygen or sulfur 510 OBAtom* otherOxygenOrSulfur = nullptr; 511 OBBond* bondB = nullptr; 512 FOR_BONDS_OF_ATOM(bitB, carbon) { 513 if (&*bitB == bondA || !bitB->IsAromatic()) continue; 514 OBAtom* nbr = bitB->GetNbrAtom(carbon); 515 if (IsOxygenOrSulfur(nbr) && nbr->GetTotalDegree() == 1) { 516 otherOxygenOrSulfur = nbr; 517 bondB = &*bitB; 518 } 519 } 520 if (!otherOxygenOrSulfur) continue; 521 if(otherOxygenOrSulfur->GetFormalCharge() != 0) continue; //formal charge already set on one 522 523 // Now set as C(=O)O 524 bondA->SetAromatic(false); 525 oxygenOrSulfur->SetFormalCharge(-1); 526 527 bondB->SetAromatic(false); 528 bondB->SetBondOrder(2); 529 } 530 531 // First of all, set the atoms at the ends of the aromatic bonds to also 532 // be aromatic. This information is required for OBKekulize. 533 FOR_BONDS_OF_MOL(bond, mol) { 534 if (bond->IsAromatic()) { 535 bond->GetBeginAtom()->SetAromatic(); 536 bond->GetEndAtom()->SetAromatic(); 537 } 538 } 539 bool ok = OBKekulize(&mol); 540 if (!ok) { 541 stringstream errorMsg; 542 errorMsg << "Failed to kekulize aromatic bonds in MOL2 file"; 543 std::string title = mol.GetTitle(); 544 if (!title.empty()) 545 errorMsg << " (title is " << title << ")"; 546 errorMsg << endl; 547 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); 548 // return false; Should we return false for a kekulization failure? 549 } 550 } 551 552 mol.EndModify(); 553 554 // Suggestion by Liu Zhiguo 2008-01-26 555 // Mol2 files define atom types -- there is no need to re-perceive 556 mol.SetAtomTypesPerceived(); 557 558 if (has_residue_information) 559 mol.SetChainsPerceived(); 560 561 if (!has_explicit_hydrogen) { 562 // Guess how many hydrogens are present on each atom based on typical valencies 563 // TODO: implement the MOL2 valence model (if it exists) 564 FOR_ATOMS_OF_MOL(matom, mol) { 565 if (matom->GetImplicitHCount() == 0) 566 OBAtomAssignTypicalImplicitHydrogens(&*matom); 567 } 568 } 569 570 //must add generic data after end modify - otherwise it will be blown away 571 if (comment) 572 { 573 OBCommentData *cd = new OBCommentData; 574 cd->SetData(comment); 575 cd->SetOrigin(fileformatInput); 576 mol.SetData(cd); 577 delete [] comment; 578 comment = nullptr; 579 } 580 if (hasPartialCharges) 581 mol.SetPartialChargesPerceived(); 582 583 /* Disabled due to PR#3048758 -- seekg is very slow with gzipped mol2 584 // continue untill EOF or untill next molecule record 585 streampos pos; 586 for(;;) 587 { 588 pos = ifs.tellg(); 589 if (!ifs.getline(buffer,BUFF_SIZE)) 590 break; 591 if (EQn(buffer,"@<TRIPOS>MOLECULE",17)) 592 break; 593 } 594 595 ifs.seekg(pos); // go back to the end of the molecule 596 */ 597 598 return(true); 599 } 600 601 //////////////////////////////////////////////////////////////// 602 WriteMolecule(OBBase * pOb,OBConversion * pConv)603 bool MOL2Format::WriteMolecule(OBBase* pOb, OBConversion* pConv) 604 { 605 OBMol* pmol = dynamic_cast<OBMol*>(pOb); 606 if (pmol == nullptr) 607 return false; 608 609 //Define some references so we can use the old parameter names 610 ostream &ofs = *pConv->GetOutStream(); 611 OBMol &mol = *pmol; 612 bool ligandsOnly = pConv->IsOption("l", OBConversion::OUTOPTIONS) != nullptr; 613 bool skipFormalCharge = pConv->IsOption("u", OBConversion::OUTOPTIONS) != nullptr; 614 615 //The old code follows.... 616 string str,str1; 617 char buffer[BUFF_SIZE],label[BUFF_SIZE]; 618 char rnum[BUFF_SIZE],rlabel[BUFF_SIZE]; 619 620 //Check if UCSF Dock style coments are on 621 if (pConv->IsOption("c", OBConversion::OUTOPTIONS) != nullptr) { 622 vector<OBGenericData*>::iterator k; 623 vector<OBGenericData*> vdata = mol.GetData(); 624 ofs << endl; 625 for (k = vdata.begin();k != vdata.end();++k) { 626 if ((*k)->GetDataType() == OBGenericDataType::PairData 627 && (*k)->GetOrigin()!=local //internal OBPairData is not written 628 && (*k)->GetAttribute()!="PartialCharges") 629 { 630 ofs << "##########\t" << (*k)->GetAttribute() << ":\t" << ((OBPairData*)(*k))->GetValue() << endl; 631 } 632 } 633 ofs << endl; 634 } 635 636 ofs << "@<TRIPOS>MOLECULE" << endl; 637 str = mol.GetTitle(); 638 if (str.empty()) 639 ofs << "*****" << endl; 640 else 641 ofs << str << endl; 642 643 snprintf(buffer, BUFF_SIZE," %d %d 0 0 0", mol.NumAtoms(),mol.NumBonds()); 644 ofs << buffer << endl; 645 ofs << "SMALL" << endl; // TODO: detect if we have protein, biopolymer, etc. 646 647 OBPairData *dp = (OBPairData*)mol.GetData("PartialCharges"); 648 if (dp != nullptr) { 649 // Tripos spec says: 650 // NO_CHARGES, DEL_RE, GASTEIGER, GAST_HUCK, HUCKEL, PULLMAN, 651 // GAUSS80_CHARGES, AMPAC_CHARGES, MULLIKEN_CHARGES, DICT_ CHARGES, 652 // MMFF94_CHARGES, USER_CHARGES 653 if (strcasecmp(dp->GetValue().c_str(),"Mulliken") == 0) 654 ofs << "MULLIKEN_CHARGES" << endl; 655 else if (strcasecmp(dp->GetValue().c_str(),"MMFF94") == 0) 656 ofs << "MMFF94_CHARGES" << endl; 657 else if (strcasecmp(dp->GetValue().c_str(),"ESP") == 0) 658 ofs << "USER_CHARGES" << endl; 659 else if (strcasecmp(dp->GetValue().c_str(),"Gasteiger") == 0) 660 ofs << "GASTEIGER" << endl; 661 else // ideally, code should pick from the Tripos types 662 ofs << "USER_CHARGES" << endl; 663 } 664 else { // No idea what these charges are... all our code sets "PartialCharges" 665 ofs << "GASTEIGER" << endl; 666 } 667 668 // ofs << "Energy = " << mol.GetEnergy() << endl; 669 670 if (mol.HasData(OBGenericDataType::CommentData)) 671 { 672 ofs << "****\n"; // comment line printed, so we need to add "no status bits set" 673 OBCommentData *cd = (OBCommentData*)mol.GetData(OBGenericDataType::CommentData); 674 ofs << cd->GetData(); 675 } 676 677 ofs << endl; 678 ofs << "@<TRIPOS>ATOM" << endl; 679 680 OBAtom *atom; 681 OBResidue *res; 682 683 vector<OBAtom*>::iterator i; 684 std::map<int, int> labelcount; 685 686 ttab.SetFromType("INT"); 687 ttab.SetToType("SYB"); 688 689 bool hasFormalCharges = false; 690 for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) 691 { 692 693 // 694 // Use sequentially numbered atom names if no residues 695 // 696 697 snprintf(label,BUFF_SIZE, "%s%d", 698 OBElements::GetSymbol(atom->GetAtomicNum()), 699 ++labelcount[atom->GetAtomicNum()]); 700 strcpy(rlabel,"<1>"); 701 strcpy(rnum,"1"); 702 703 str = atom->GetType(); 704 ttab.Translate(str1,str); 705 706 if (atom->GetFormalCharge() != 0) hasFormalCharges = true; 707 // 708 // Use original atom names if there are residues 709 // 710 711 if (!ligandsOnly && (res = atom->GetResidue()) ) 712 { 713 // use original atom names defined by residue 714 snprintf(label,BUFF_SIZE,"%s",(char*)res->GetAtomID(atom).c_str()); 715 // make sure that residue name includes its number 716 snprintf(rlabel,BUFF_SIZE,"%s%d",res->GetName().c_str(), res->GetNum()); 717 snprintf(rnum,BUFF_SIZE,"%d",res->GetNum()); 718 } 719 720 snprintf(buffer,BUFF_SIZE,"%7d %-6s %9.4f %9.4f %9.4f %-5s %3s %-8s %9.4f", 721 atom->GetIdx(),label, 722 atom->GetX(),atom->GetY(),atom->GetZ(), 723 str1.c_str(), 724 rnum,rlabel, 725 atom->GetPartialCharge()); 726 ofs << buffer << endl; 727 } 728 729 //store formal charge info; put before bonds so we don't have 730 //to read past the end of the molecule to realize it is there 731 if(hasFormalCharges && !skipFormalCharge) { 732 //dkoes - to enable roundtriping of charges 733 ofs << "@<TRIPOS>UNITY_ATOM_ATTR\n"; 734 for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) 735 { 736 int charge = atom->GetFormalCharge(); 737 if (charge != 0) 738 { 739 ofs << atom->GetIdx() << " 1\n"; //one attribute 740 ofs << "charge " << charge << "\n"; //namely charge 741 } 742 } 743 } 744 745 ofs << "@<TRIPOS>BOND" << endl; 746 OBBond *bond; 747 vector<OBBond*>::iterator j; 748 string s1, s2; 749 for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j)) 750 { 751 s1 = bond->GetBeginAtom()->GetType(); 752 s2 = bond->GetEndAtom()->GetType(); 753 if (bond->IsAromatic() || s1 == "O.co2" || s2 == "O.co2") 754 strcpy(label,"ar"); 755 else if (bond->IsAmide()) 756 strcpy(label,"am"); 757 else 758 snprintf(label,BUFF_SIZE,"%d",bond->GetBondOrder()); 759 760 snprintf(buffer, BUFF_SIZE,"%6d %5d %5d %2s", 761 bond->GetIdx()+1,bond->GetBeginAtomIdx(),bond->GetEndAtomIdx(), 762 label); 763 ofs << buffer << endl; 764 } 765 // NO trailing blank line (PR#1868929). 766 // ofs << endl; 767 768 return(true); 769 } 770 SkipObjects(int n,OBConversion * pConv)771 int MOL2Format::SkipObjects(int n, OBConversion* pConv) 772 { 773 const char txt[] = "@<TRIPOS>MOLECULE"; 774 istream& ifs = *pConv->GetInStream(); 775 if(!ifs) 776 return -1; 777 if(n>0 && ifs.peek()==txt[0]) 778 ifs.ignore(); // move past '@' so that next mol will be found 779 do { 780 ignore(ifs, txt); 781 } while(ifs && (--n)>0); 782 783 if(!ifs.eof()) 784 ifs.seekg(1-sizeof(txt), ios::cur);//1 for '/0' 785 char ch = ifs.peek(); 786 return 1; 787 } 788 789 } //namespace OpenBabel 790