1 /********************************************************************** 2 data.cpp - Global data and resource file parsers. 3 4 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. 5 Some portions Copyright (C) 2001-2008 by Geoffrey R. Hutchison 6 7 This file is part of the Open Babel project. 8 For more information, see <http://openbabel.org/> 9 10 This program is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation version 2 of the License. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 ***********************************************************************/ 19 20 #ifdef WIN32 21 #pragma warning (disable : 4786) 22 #endif 23 #include <cstdlib> 24 #include <openbabel/babelconfig.h> 25 #include <openbabel/data.h> 26 #include <openbabel/data_utilities.h> 27 #include <openbabel/mol.h> 28 #include <openbabel/atom.h> 29 #include <openbabel/bond.h> 30 #include <openbabel/locale.h> 31 #include <openbabel/oberror.h> 32 #include <openbabel/elements.h> 33 34 // data headers with default parameters 35 #include "types.h" 36 #include "resdata.h" 37 38 39 #if !HAVE_STRNCASECMP 40 extern "C" int strncasecmp(const char *s1, const char *s2, size_t n); 41 #endif 42 43 using namespace std; 44 45 namespace OpenBabel 46 { 47 // Initialize the globals (declared in data.h) 48 OBTypeTable ttab; 49 OBResidueData resdat; 50 OBAtomicHeatOfFormationTable(void)51 OBAtomicHeatOfFormationTable::OBAtomicHeatOfFormationTable(void) 52 { 53 _init = false; 54 _dir = BABEL_DATADIR; 55 _envvar = "BABEL_DATADIR"; 56 _filename = "atomization-energies.txt"; 57 _subdir = "data"; 58 Init(); 59 } 60 UnitNameToConversionFactor(const char * unit)61 static double UnitNameToConversionFactor(const char* unit) { 62 const char* p = unit; 63 switch(p[0]) { 64 case 'e': 65 if (p[1]=='V' && p[2]=='\0') 66 return ELECTRONVOLT_TO_KCALPERMOL; // eV 67 if (p[1]=='l' && p[2]=='e' && p[3]=='c' && p[4]=='t' && p[5]=='r' && p[6]=='o' && p[7]=='n' && 68 p[8]=='v' && p[9]=='o' && p[10]=='l' && p[11]=='t' && p[12]=='\0') 69 return ELECTRONVOLT_TO_KCALPERMOL; // electronvolt 70 break; 71 case 'k': 72 if (p[1]=='J' && p[2]=='/' && p[3]=='m' && p[4]=='o' && p[5]=='l' && p[6]=='\0') 73 return KJPERMOL_TO_KCALPERMOL; // kJ/mol 74 if (p[1]=='c' && p[2]=='a' && p[3]=='l' && p[4]=='/' && p[5]=='m' && p[6]=='o' && p[7]=='l' && p[8]=='\0') 75 return 1.0; // kcal/mol 76 break; 77 case 'H': 78 if (p[1]=='a' && p[2]=='r' && p[3]=='t' && p[4]=='r' && p[5]=='e' && p[6]=='e' && p[7]=='\0') 79 return HARTEE_TO_KCALPERMOL; // Hartree 80 break; 81 case 'J': 82 if (p[1]=='/' && p[2]=='m' && p[3]=='o' && p[4]=='l' && p[5]==' ' && p[6]=='K' && p[7]=='\0') 83 return KJPERMOL_TO_KCALPERMOL; // J/mol K 84 break; 85 case 'R': 86 if (p[1]=='y' && p[2]=='d' && p[3]=='b' && p[4]=='e' && p[5]=='r' && p[6]=='g' && p[7]=='\0') 87 return RYDBERG_TO_KCALPERMOL; // Rydberg 88 break; 89 } 90 91 std::stringstream errorMsg; 92 errorMsg << "WARNING: Unknown energy unit in thermochemistry file\n"; 93 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning); 94 95 return 1.0; 96 } 97 ParseLine(const char * line)98 void OBAtomicHeatOfFormationTable::ParseLine(const char *line) 99 { 100 char *ptr; 101 vector<string> vs; 102 OBAtomHOF *oba; 103 104 ptr = const_cast<char*>( strchr(line,'#')); 105 if (nullptr != ptr) 106 ptr[0] = '\0'; 107 if (strlen(line) > 0) 108 { 109 tokenize(vs,line,"|"); 110 if (vs.size() >= 8) 111 { 112 oba = new OBAtomHOF(vs[0], 113 atoi(vs[1].c_str()), 114 vs[2], 115 vs[3], 116 atof(vs[4].c_str()), 117 atof(vs[5].c_str()), 118 atoi(vs[6].c_str()), 119 vs[7]); 120 _atomhof.push_back(*oba); 121 } 122 } 123 } 124 GetHeatOfFormation(std::string elem,int charge,std::string meth,double T,double * dhof0,double * dhofT,double * S0T)125 int OBAtomicHeatOfFormationTable::GetHeatOfFormation(std::string elem, 126 int charge, 127 std::string meth, 128 double T, 129 double *dhof0, 130 double *dhofT, 131 double *S0T) 132 { 133 int found; 134 double Ttol = 0.05; /* Kelvin */ 135 double Vmodel, Vdhf, S0, HexpT; 136 std::vector<OBAtomHOF>::iterator it; 137 char desc[128]; 138 139 found = 0; 140 Vmodel = Vdhf = S0 = HexpT = 0; 141 snprintf(desc,sizeof(desc),"%s(0K)",meth.c_str()); 142 143 for(it = _atomhof.begin(); it != _atomhof.end(); ++it) 144 { 145 if ((0 == it->Element().compare(elem)) && 146 (it->Charge() == charge)) 147 { 148 double eFac = UnitNameToConversionFactor(it->Unit().c_str()); 149 if (fabs(T - it->T()) < Ttol) 150 { 151 if (0 == it->Method().compare("exp")) 152 { 153 if (0 == it->Desc().compare("H(0)-H(T)")) 154 { 155 HexpT += it->Value()*eFac; 156 found++; 157 } 158 else if (0 == it->Desc().compare("S0(T)")) 159 { 160 S0 += it->Value(); 161 found++; 162 } 163 } 164 } 165 else if (0 == it->T()) 166 { 167 if ((0 == it->Method().compare(meth)) && 168 (0 == it->Desc().compare(desc))) 169 { 170 Vmodel += it->Value()*eFac; 171 found++; 172 } 173 if (0 == it->Method().compare("exp")) 174 { 175 if (0 == it->Desc().compare("DHf(T)")) 176 { 177 Vdhf += it->Value()*eFac; 178 found++; 179 } 180 } 181 } 182 } 183 } 184 185 if (found == 4) 186 { 187 *dhof0 = Vdhf-Vmodel; 188 *dhofT = Vdhf-Vmodel-HexpT; 189 *S0T = -S0/4.184; 190 return 1; 191 } 192 return 0; 193 } 194 195 /** \class OBTypeTable data.h <openbabel/data.h> 196 \brief Atom Type Translation Table 197 198 Molecular file formats frequently store information about atoms in an 199 atom type field. Some formats store only the element for each atom, 200 while others include hybridization and local environments, such as the 201 Sybyl mol2 atom type field. The OBTypeTable class acts as a translation 202 table to convert atom types between a number of different molecular 203 file formats. The constructor for OBTypeTable automatically reads the 204 text file types.txt. An instance of 205 OBTypeTable (ttab) is declared external in data.cpp and is referenced as 206 extern OBTypeTable ttab in mol.h. The following code demonstrates how 207 to use the OBTypeTable class to translate the internal representation 208 of atom types in an OBMol Internal to Sybyl Mol2 atom types. 209 210 \code 211 ttab.SetFromType("INT"); 212 ttab.SetToType("SYB"); 213 OBAtom *atom; 214 vector<OBAtom*>::iterator i; 215 string src,dst; 216 for (atom = mol.BeginAtom(i);atom;atom = mol.EndAtom(i)) 217 { 218 src = atom->GetType(); 219 ttab.Translate(dst,src); 220 cout << "atom number " << atom->GetIdx() << "has mol2 type " << dst << endl; 221 } 222 \endcode 223 224 Current atom types include (defined in the top line of the data file types.txt): 225 - INT (Open Babel internal codes) 226 - ATN (atomic numbers) 227 - HYB (hybridization) 228 - MMD (MacroModel) 229 - MM2 (MM2 force field) 230 - XYZ (element symbols from XYZ file format) 231 - ALC (Alchemy) 232 - HAD (H added) 233 - MCML (MacMolecule) 234 - C3D (Chem3D) 235 - SYB (Sybyl mol2) 236 - MOL (Sybyl mol) 237 - MAP (Gasteiger partial charge types) 238 - DRE (Dreiding) 239 - XED (XED format) 240 - DOK (Dock) 241 - M3D (Molecular Arts M3D) 242 - SBN (Sybyl descriptor types for MPD files) 243 - PCM (PC Model) 244 */ 245 OBTypeTable()246 OBTypeTable::OBTypeTable() 247 { 248 _init = false; 249 _dir = BABEL_DATADIR; 250 _envvar = "BABEL_DATADIR"; 251 _filename = "types.txt"; 252 _subdir = "data"; 253 _dataptr = TypesData; 254 _linecount = 0; 255 _from = _to = -1; 256 } 257 ParseLine(const char * buffer)258 void OBTypeTable::ParseLine(const char *buffer) 259 { 260 if (buffer[0] == '#') 261 return; // just a comment line 262 263 if (_linecount == 0) { 264 tokenize(_colnames,buffer); 265 _ncols = _colnames.size(); 266 } 267 else 268 { 269 vector<string> vc; 270 tokenize(vc,buffer); 271 if (vc.size() == (unsigned)_ncols) 272 _table.push_back(vc); 273 else 274 { 275 stringstream errorMsg; 276 errorMsg << " Could not parse line in type translation table types.txt -- incorect number of columns"; 277 errorMsg << " found " << vc.size() << " expected " << _ncols << "."; 278 obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo); 279 } 280 } 281 _linecount++; 282 } 283 SetFromType(const char * from)284 bool OBTypeTable::SetFromType(const char* from) 285 { 286 if (!_init) 287 Init(); 288 289 string tmp = from; 290 291 unsigned int i; 292 for (i = 0;i < _colnames.size();++i) 293 if (tmp == _colnames[i]) 294 { 295 _from = i; 296 return(true); 297 } 298 299 obErrorLog.ThrowError(__FUNCTION__, "Requested type column not found", obInfo); 300 301 return(false); 302 } 303 SetToType(const char * to)304 bool OBTypeTable::SetToType(const char* to) 305 { 306 if (!_init) 307 Init(); 308 309 string tmp = to; 310 311 unsigned int i; 312 for (i = 0;i < _colnames.size();++i) 313 if (tmp == _colnames[i]) 314 { 315 _to = i; 316 return(true); 317 } 318 319 obErrorLog.ThrowError(__FUNCTION__, "Requested type column not found", obInfo); 320 321 return(false); 322 } 323 324 //! Translates atom types (to, from), checking for size of destination 325 //! string and null-terminating as needed 326 //! \deprecated Because there is no guarantee on the length of an atom type 327 //! you should consider using std::string instead Translate(char * to,const char * from)328 bool OBTypeTable::Translate(char *to, const char *from) 329 { 330 if (!_init) 331 Init(); 332 333 bool rval; 334 string sto,sfrom; 335 sfrom = from; 336 rval = Translate(sto,sfrom); 337 strncpy(to,(char*)sto.c_str(), OBATOM_TYPE_LEN - 1); 338 to[OBATOM_TYPE_LEN - 1] = '\0'; 339 340 return(rval); 341 } 342 Translate(string & to,const string & from)343 bool OBTypeTable::Translate(string &to, const string &from) 344 { 345 if (!_init) 346 Init(); 347 348 if (from == "") 349 return(false); 350 351 if (_from >= 0 && _to >= 0 && 352 _from < (signed)_table.size() && _to < (signed)_table.size()) 353 { 354 vector<vector<string> >::iterator i; 355 for (i = _table.begin();i != _table.end();++i) 356 if ((signed)(*i).size() > _from && (*i)[_from] == from) 357 { 358 to = (*i)[_to]; 359 return(true); 360 } 361 } 362 363 // Throw an error, copy the string and return false 364 obErrorLog.ThrowError(__FUNCTION__, "Cannot perform atom type translation: table cannot find requested types.", obWarning); 365 to = from; 366 return(false); 367 } 368 Translate(const string & from)369 std::string OBTypeTable::Translate(const string &from) 370 { 371 if (!_init) 372 Init(); 373 374 if (from.empty()) 375 return(""); 376 377 if (_from >= 0 && _to >= 0 && 378 _from < (signed)_table.size() && _to < (signed)_table.size()) 379 { 380 vector<vector<string> >::iterator i; 381 for (i = _table.begin();i != _table.end();++i) 382 if ((signed)(*i).size() > _from && (*i)[_from] == from) 383 { 384 return (*i)[_to]; 385 } 386 } 387 388 // Throw an error, copy the string and return false 389 obErrorLog.ThrowError(__FUNCTION__, "Cannot perform atom type translation: table cannot find requested types.", obWarning); 390 return(""); 391 } 392 GetFromType()393 std::string OBTypeTable::GetFromType() 394 { 395 if (!_init) 396 Init(); 397 398 if (_from > 0 && _from < (signed)_table.size()) 399 return( _colnames[_from] ); 400 else 401 return( _colnames[0] ); 402 } 403 GetToType()404 std::string OBTypeTable::GetToType() 405 { 406 if (!_init) 407 Init(); 408 409 if (_to > 0 && _to < (signed)_table.size()) 410 return( _colnames[_to] ); 411 else 412 return( _colnames[0] ); 413 } 414 Toupper(string & s)415 void Toupper(string &s) 416 { 417 unsigned int i; 418 for (i = 0;i < s.size();++i) 419 s[i] = toupper(s[i]); 420 } 421 Tolower(string & s)422 void Tolower(string &s) 423 { 424 unsigned int i; 425 for (i = 0;i < s.size();++i) 426 s[i] = tolower(s[i]); 427 } 428 429 /////////////////////////////////////////////////////////////////////// OBResidueData()430 OBResidueData::OBResidueData() 431 { 432 _init = false; 433 _dir = BABEL_DATADIR; 434 _envvar = "BABEL_DATADIR"; 435 _filename = "resdata.txt"; 436 _subdir = "data"; 437 _dataptr = ResidueData; 438 } 439 AssignBonds(OBMol & mol)440 bool OBResidueData::AssignBonds(OBMol &mol) 441 { 442 if (!_init) 443 Init(); 444 445 OBAtom *a1,*a2; 446 OBResidue *r1,*r2; 447 vector<OBAtom*>::iterator i,j; 448 vector3 v; 449 450 int bo; 451 string skipres = ""; // Residue Number to skip 452 string rname = ""; 453 //assign residue bonds 454 for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) 455 { 456 r1 = a1->GetResidue(); 457 if (r1 == nullptr) // atoms may not have residues 458 continue; 459 460 if (skipres.length() && r1->GetNumString() == skipres) 461 continue; 462 463 if (r1->GetName() != rname) 464 { 465 skipres = SetResName(r1->GetName()) ? "" : r1->GetNumString(); 466 rname = r1->GetName(); 467 } 468 //assign bonds for each atom 469 for (j=i,a2 = mol.NextAtom(j);a2;a2 = mol.NextAtom(j)) 470 { 471 r2 = a2->GetResidue(); 472 if (r2 == nullptr) // atoms may not have residues 473 continue; 474 475 if (r1->GetNumString() != r2->GetNumString()) 476 break; 477 if (r1->GetName() != r2->GetName()) 478 break; 479 if (r1->GetChain() != r2->GetChain()) 480 break; // Fixes PR#2889763 - Fabian 481 482 if ((bo = LookupBO(r1->GetAtomID(a1),r2->GetAtomID(a2)))) 483 { 484 // Suggested by Liu Zhiguo 2007-08-13 485 // for predefined residues, don't perceive connection 486 // by distance 487 // v = a1->GetVector() - a2->GetVector(); 488 // if (v.length_2() < 3.5) //check by distance 489 mol.AddBond(a1->GetIdx(),a2->GetIdx(),bo); 490 } 491 } 492 } 493 494 int hyb; 495 string type; 496 497 //types and hybridization 498 rname = ""; // name of current residue 499 skipres = ""; // don't skip any residues right now 500 for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i)) 501 { 502 if (a1->GetAtomicNum() == OBElements::Oxygen && !a1->GetExplicitDegree()) 503 { 504 a1->SetType("O3"); 505 continue; 506 } 507 if (a1->GetAtomicNum() == OBElements::Hydrogen) 508 { 509 a1->SetType("H"); 510 continue; 511 } 512 513 //***valence rule for O- 514 if (a1->GetAtomicNum() == OBElements::Oxygen && a1->GetExplicitDegree() == 1) 515 { 516 OBBond *bond; 517 bond = (OBBond*)*(a1->BeginBonds()); 518 if (bond->GetBondOrder() == 2) 519 { 520 a1->SetType("O2"); 521 a1->SetHyb(2); 522 } 523 else if (bond->GetBondOrder() == 1) 524 { 525 // Leave the protonation/deprotonation to phmodel.txt 526 a1->SetType("O3"); 527 a1->SetHyb(3); 528 // PR#3203039 -- Fix from Magnus Lundborg 529 // a1->SetFormalCharge(0); 530 } 531 continue; 532 } 533 534 r1 = a1->GetResidue(); 535 if (r1 == nullptr) continue; // atoms may not have residues 536 if (skipres.length() && r1->GetNumString() == skipres) 537 continue; 538 539 if (r1->GetName() != rname) 540 { 541 // if SetResName fails, skip this residue 542 skipres = SetResName(r1->GetName()) ? "" : r1->GetNumString(); 543 rname = r1->GetName(); 544 } 545 546 if (LookupType(r1->GetAtomID(a1),type,hyb)) 547 { 548 a1->SetType(type); 549 a1->SetHyb(hyb); 550 } 551 else // try to figure it out by bond order ??? 552 {} 553 } 554 555 return(true); 556 } 557 ParseLine(const char * buffer)558 void OBResidueData::ParseLine(const char *buffer) 559 { 560 int bo; 561 string s; 562 vector<string> vs; 563 564 if (buffer[0] == '#') 565 return; 566 567 tokenize(vs,buffer); 568 if (!vs.empty()) 569 { 570 if (vs[0] == "BOND") 571 { 572 s = (vs[1] < vs[2]) ? vs[1] + " " + vs[2] : 573 vs[2] + " " + vs[1]; 574 bo = atoi(vs[3].c_str()); 575 _vtmp.push_back(pair<string,int> (s,bo)); 576 } 577 578 if (vs[0] == "ATOM" && vs.size() == 4) 579 { 580 _vatmtmp.push_back(vs[1]); 581 _vatmtmp.push_back(vs[2]); 582 _vatmtmp.push_back(vs[3]); 583 } 584 585 if (vs[0] == "RES") 586 _resname.push_back(vs[1]); 587 588 if (vs[0]== "END") 589 { 590 _resatoms.push_back(_vatmtmp); 591 _resbonds.push_back(_vtmp); 592 _vtmp.clear(); 593 _vatmtmp.clear(); 594 } 595 } 596 } 597 SetResName(const string & s)598 bool OBResidueData::SetResName(const string &s) 599 { 600 if (!_init) 601 Init(); 602 603 unsigned int i; 604 605 for (i = 0;i < _resname.size();++i) 606 if (_resname[i] == s) 607 { 608 _resnum = i; 609 return(true); 610 } 611 612 _resnum = -1; 613 return(false); 614 } 615 LookupBO(const string & s)616 int OBResidueData::LookupBO(const string &s) 617 { 618 if (_resnum == -1) 619 return(0); 620 621 unsigned int i; 622 for (i = 0;i < _resbonds[_resnum].size();++i) 623 if (_resbonds[_resnum][i].first == s) 624 return(_resbonds[_resnum][i].second); 625 626 return(0); 627 } 628 LookupBO(const string & s1,const string & s2)629 int OBResidueData::LookupBO(const string &s1, const string &s2) 630 { 631 if (_resnum == -1) 632 return(0); 633 string s; 634 635 s = (s1 < s2) ? s1 + " " + s2 : s2 + " " + s1; 636 637 unsigned int i; 638 for (i = 0;i < _resbonds[_resnum].size();++i) 639 if (_resbonds[_resnum][i].first == s) 640 return(_resbonds[_resnum][i].second); 641 642 return(0); 643 } 644 LookupType(const string & atmid,string & type,int & hyb)645 bool OBResidueData::LookupType(const string &atmid,string &type,int &hyb) 646 { 647 if (_resnum == -1) 648 return(false); 649 650 string s; 651 vector<string>::iterator i; 652 653 for (i = _resatoms[_resnum].begin();i != _resatoms[_resnum].end();i+=3) 654 if (atmid == *i) 655 { 656 ++i; 657 type = *i; 658 ++i; 659 hyb = atoi((*i).c_str()); 660 return(true); 661 } 662 663 return(false); 664 } 665 Init()666 void OBGlobalDataBase::Init() 667 { 668 if (_init) 669 return; 670 _init = true; 671 672 ifstream ifs; 673 char charBuffer[BUFF_SIZE]; 674 675 // Set the locale for number parsing to avoid locale issues: PR#1785463 676 obLocale.SetLocale(); 677 678 // Check return value from OpenDatafile 679 // Suggestion from Zhiguo Liu 680 string fn_open = OpenDatafile(ifs, _filename, _envvar); 681 682 // Check _subdir directory 683 if (fn_open == "") 684 string fn_open = OpenDatafile(ifs, _filename, _subdir); 685 686 if (fn_open != "" && (ifs)) 687 { 688 while(ifs.getline(charBuffer,BUFF_SIZE)) 689 ParseLine(charBuffer); 690 } 691 692 else 693 // If all else fails, use the compiled in values 694 if (_dataptr) 695 { 696 obErrorLog.ThrowError(__FUNCTION__, "Cannot open " + _filename + " defaulting to compiled data.", obDebug); 697 698 const char *p1,*p2; 699 for (p1 = p2 = _dataptr;*p2 != '\0';++p2) 700 if (*p2 == '\n') 701 { 702 strncpy(charBuffer, p1, (p2 - p1)); 703 charBuffer[(p2 - p1)] = '\0'; 704 ParseLine(charBuffer); 705 p1 = ++p2; 706 } 707 } 708 else 709 { 710 string s = "Unable to open data file '"; 711 s += _filename; 712 s += "'"; 713 obErrorLog.ThrowError(__FUNCTION__, s, obWarning); 714 } 715 716 // return the locale to the original one 717 obLocale.RestoreLocale(); 718 719 if (ifs) 720 ifs.close(); 721 722 if (GetSize() == 0) 723 { 724 string s = "Cannot initialize database '"; 725 s += _filename; 726 s += "' which may cause further errors."; 727 obErrorLog.ThrowError(__FUNCTION__, s, obWarning); 728 } 729 730 } 731 732 } // end namespace OpenBabel 733 734 //! \file data.cpp 735 //! \brief Global data and resource file parsers. 736