1 /**********************************************************************
2 data.cpp - Global data and resource file parsers.
3 
4 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5 Some portions Copyright (C) 2001-2008 by Geoffrey R. Hutchison
6 
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.org/>
9 
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19 
20 #ifdef WIN32
21 #pragma warning (disable : 4786)
22 #endif
23 #include <cstdlib>
24 #include <openbabel/babelconfig.h>
25 #include <openbabel/data.h>
26 #include <openbabel/data_utilities.h>
27 #include <openbabel/mol.h>
28 #include <openbabel/atom.h>
29 #include <openbabel/bond.h>
30 #include <openbabel/locale.h>
31 #include <openbabel/oberror.h>
32 #include <openbabel/elements.h>
33 
34 // data headers with default parameters
35 #include "types.h"
36 #include "resdata.h"
37 
38 
39 #if !HAVE_STRNCASECMP
40 extern "C" int strncasecmp(const char *s1, const char *s2, size_t n);
41 #endif
42 
43 using namespace std;
44 
45 namespace OpenBabel
46 {
47   // Initialize the globals (declared in data.h)
48   OBTypeTable ttab;
49   OBResidueData resdat;
50 
OBAtomicHeatOfFormationTable(void)51   OBAtomicHeatOfFormationTable::OBAtomicHeatOfFormationTable(void)
52   {
53     _init = false;
54     _dir = BABEL_DATADIR;
55     _envvar = "BABEL_DATADIR";
56     _filename = "atomization-energies.txt";
57     _subdir = "data";
58     Init();
59   }
60 
UnitNameToConversionFactor(const char * unit)61   static double UnitNameToConversionFactor(const char* unit) {
62     const char* p = unit;
63     switch(p[0]) {
64     case 'e':
65       if (p[1]=='V' && p[2]=='\0')
66         return ELECTRONVOLT_TO_KCALPERMOL; // eV
67       if (p[1]=='l' && p[2]=='e' && p[3]=='c' && p[4]=='t' && p[5]=='r' && p[6]=='o' && p[7]=='n' &&
68           p[8]=='v' && p[9]=='o' && p[10]=='l' && p[11]=='t' && p[12]=='\0')
69         return ELECTRONVOLT_TO_KCALPERMOL; // electronvolt
70       break;
71     case 'k':
72       if (p[1]=='J' && p[2]=='/' && p[3]=='m' && p[4]=='o' && p[5]=='l' && p[6]=='\0')
73         return KJPERMOL_TO_KCALPERMOL; // kJ/mol
74       if (p[1]=='c' && p[2]=='a' && p[3]=='l' && p[4]=='/' && p[5]=='m' && p[6]=='o' && p[7]=='l' && p[8]=='\0')
75         return 1.0; // kcal/mol
76       break;
77     case 'H':
78       if (p[1]=='a' && p[2]=='r' && p[3]=='t' && p[4]=='r' && p[5]=='e' && p[6]=='e' && p[7]=='\0')
79         return HARTEE_TO_KCALPERMOL; // Hartree
80       break;
81     case 'J':
82       if (p[1]=='/' && p[2]=='m' && p[3]=='o' && p[4]=='l' && p[5]==' ' && p[6]=='K' && p[7]=='\0')
83         return KJPERMOL_TO_KCALPERMOL; // J/mol K
84       break;
85     case 'R':
86       if (p[1]=='y' && p[2]=='d' && p[3]=='b' && p[4]=='e' && p[5]=='r' && p[6]=='g' && p[7]=='\0')
87         return RYDBERG_TO_KCALPERMOL; // Rydberg
88       break;
89     }
90 
91     std::stringstream errorMsg;
92     errorMsg << "WARNING: Unknown energy unit in thermochemistry file\n";
93     obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
94 
95     return 1.0;
96   }
97 
ParseLine(const char * line)98   void OBAtomicHeatOfFormationTable::ParseLine(const char *line)
99   {
100     char *ptr;
101     vector<string> vs;
102     OBAtomHOF *oba;
103 
104     ptr = const_cast<char*>( strchr(line,'#'));
105     if (nullptr != ptr)
106       ptr[0] = '\0';
107     if (strlen(line) > 0)
108       {
109         tokenize(vs,line,"|");
110         if (vs.size() >= 8)
111           {
112               oba = new OBAtomHOF(vs[0],
113                                   atoi(vs[1].c_str()),
114                                   vs[2],
115                                   vs[3],
116                                   atof(vs[4].c_str()),
117                                   atof(vs[5].c_str()),
118                                   atoi(vs[6].c_str()),
119                                   vs[7]);
120             _atomhof.push_back(*oba);
121           }
122       }
123   }
124 
GetHeatOfFormation(std::string elem,int charge,std::string meth,double T,double * dhof0,double * dhofT,double * S0T)125   int OBAtomicHeatOfFormationTable::GetHeatOfFormation(std::string elem,
126                                                        int charge,
127                                                        std::string meth,
128                                                        double T,
129                                                        double *dhof0,
130                                                        double *dhofT,
131                                                        double *S0T)
132   {
133     int    found;
134     double Ttol = 0.05; /* Kelvin */
135     double Vmodel, Vdhf, S0, HexpT;
136     std::vector<OBAtomHOF>::iterator it;
137     char desc[128];
138 
139     found = 0;
140     Vmodel = Vdhf = S0 = HexpT = 0;
141     snprintf(desc,sizeof(desc),"%s(0K)",meth.c_str());
142 
143     for(it = _atomhof.begin(); it != _atomhof.end(); ++it)
144     {
145         if ((0 == it->Element().compare(elem)) &&
146             (it->Charge() == charge))
147         {
148             double eFac = UnitNameToConversionFactor(it->Unit().c_str());
149             if (fabs(T - it->T()) < Ttol)
150             {
151                 if (0 == it->Method().compare("exp"))
152                 {
153                     if (0 == it->Desc().compare("H(0)-H(T)"))
154                     {
155                         HexpT += it->Value()*eFac;
156                         found++;
157                     }
158                     else if (0 == it->Desc().compare("S0(T)"))
159                     {
160                         S0 += it->Value();
161                         found++;
162                     }
163                 }
164             }
165             else if (0 == it->T())
166             {
167                 if ((0 == it->Method().compare(meth)) &&
168                     (0 == it->Desc().compare(desc)))
169                 {
170                     Vmodel += it->Value()*eFac;
171                     found++;
172                 }
173                 if (0 == it->Method().compare("exp"))
174                 {
175                     if (0 == it->Desc().compare("DHf(T)"))
176                     {
177                         Vdhf += it->Value()*eFac;
178                         found++;
179                     }
180                 }
181             }
182         }
183     }
184 
185     if (found == 4)
186     {
187         *dhof0 = Vdhf-Vmodel;
188         *dhofT = Vdhf-Vmodel-HexpT;
189         *S0T   = -S0/4.184;
190         return 1;
191     }
192     return 0;
193   }
194 
195   /** \class OBTypeTable data.h <openbabel/data.h>
196       \brief Atom Type Translation Table
197 
198       Molecular file formats frequently store information about atoms in an
199       atom type field. Some formats store only the element for each atom,
200       while others include hybridization and local environments, such as the
201       Sybyl mol2 atom type field. The OBTypeTable class acts as a translation
202       table to convert atom types between a number of different molecular
203       file formats. The constructor for OBTypeTable automatically reads the
204       text file types.txt. An instance of
205       OBTypeTable (ttab) is declared external in data.cpp and is referenced as
206       extern OBTypeTable ttab in mol.h.  The following code demonstrates how
207       to use the OBTypeTable class to translate the internal representation
208       of atom types in an OBMol Internal to Sybyl Mol2 atom types.
209 
210       \code
211       ttab.SetFromType("INT");
212       ttab.SetToType("SYB");
213       OBAtom *atom;
214       vector<OBAtom*>::iterator i;
215       string src,dst;
216       for (atom = mol.BeginAtom(i);atom;atom = mol.EndAtom(i))
217       {
218          src = atom->GetType();
219          ttab.Translate(dst,src);
220          cout << "atom number " << atom->GetIdx() << "has mol2 type " << dst << endl;
221       }
222       \endcode
223 
224       Current atom types include (defined in the top line of the data file types.txt):
225       - INT (Open Babel internal codes)
226       - ATN (atomic numbers)
227       - HYB (hybridization)
228       - MMD (MacroModel)
229       - MM2 (MM2 force field)
230       - XYZ (element symbols from XYZ file format)
231       - ALC (Alchemy)
232       - HAD (H added)
233       - MCML (MacMolecule)
234       - C3D (Chem3D)
235       - SYB (Sybyl mol2)
236       - MOL (Sybyl mol)
237       - MAP (Gasteiger partial charge types)
238       - DRE (Dreiding)
239       - XED (XED format)
240       - DOK (Dock)
241       - M3D (Molecular Arts M3D)
242       - SBN (Sybyl descriptor types for MPD files)
243       - PCM (PC Model)
244   */
245 
OBTypeTable()246   OBTypeTable::OBTypeTable()
247   {
248     _init = false;
249     _dir = BABEL_DATADIR;
250     _envvar = "BABEL_DATADIR";
251     _filename = "types.txt";
252     _subdir = "data";
253     _dataptr = TypesData;
254     _linecount = 0;
255     _from = _to = -1;
256   }
257 
ParseLine(const char * buffer)258   void OBTypeTable::ParseLine(const char *buffer)
259   {
260     if (buffer[0] == '#')
261       return; // just a comment line
262 
263     if (_linecount == 0) {
264       tokenize(_colnames,buffer);
265       _ncols = _colnames.size();
266     }
267     else
268       {
269         vector<string> vc;
270         tokenize(vc,buffer);
271         if (vc.size() == (unsigned)_ncols)
272           _table.push_back(vc);
273         else
274           {
275             stringstream errorMsg;
276             errorMsg << " Could not parse line in type translation table types.txt -- incorect number of columns";
277             errorMsg << " found " << vc.size() << " expected " << _ncols << ".";
278             obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo);
279           }
280       }
281     _linecount++;
282   }
283 
SetFromType(const char * from)284   bool OBTypeTable::SetFromType(const char* from)
285   {
286     if (!_init)
287       Init();
288 
289     string tmp = from;
290 
291     unsigned int i;
292     for (i = 0;i < _colnames.size();++i)
293       if (tmp == _colnames[i])
294         {
295           _from = i;
296           return(true);
297         }
298 
299     obErrorLog.ThrowError(__FUNCTION__, "Requested type column not found", obInfo);
300 
301     return(false);
302   }
303 
SetToType(const char * to)304   bool OBTypeTable::SetToType(const char* to)
305   {
306     if (!_init)
307       Init();
308 
309     string tmp = to;
310 
311     unsigned int i;
312     for (i = 0;i < _colnames.size();++i)
313       if (tmp == _colnames[i])
314         {
315           _to = i;
316           return(true);
317         }
318 
319     obErrorLog.ThrowError(__FUNCTION__, "Requested type column not found", obInfo);
320 
321     return(false);
322   }
323 
324   //! Translates atom types (to, from), checking for size of destination
325   //!  string and null-terminating as needed
326   //! \deprecated Because there is no guarantee on the length of an atom type
327   //!  you should consider using std::string instead
Translate(char * to,const char * from)328   bool OBTypeTable::Translate(char *to, const char *from)
329   {
330     if (!_init)
331       Init();
332 
333     bool rval;
334     string sto,sfrom;
335     sfrom = from;
336     rval = Translate(sto,sfrom);
337     strncpy(to,(char*)sto.c_str(), OBATOM_TYPE_LEN - 1);
338     to[OBATOM_TYPE_LEN - 1] = '\0';
339 
340     return(rval);
341   }
342 
Translate(string & to,const string & from)343   bool OBTypeTable::Translate(string &to, const string &from)
344   {
345     if (!_init)
346       Init();
347 
348     if (from == "")
349       return(false);
350 
351     if (_from >= 0 && _to >= 0 &&
352         _from < (signed)_table.size() && _to < (signed)_table.size())
353       {
354         vector<vector<string> >::iterator i;
355         for (i = _table.begin();i != _table.end();++i)
356           if ((signed)(*i).size() > _from &&  (*i)[_from] == from)
357             {
358               to = (*i)[_to];
359               return(true);
360             }
361       }
362 
363     // Throw an error, copy the string and return false
364     obErrorLog.ThrowError(__FUNCTION__, "Cannot perform atom type translation: table cannot find requested types.", obWarning);
365     to = from;
366     return(false);
367   }
368 
Translate(const string & from)369   std::string OBTypeTable::Translate(const string &from)
370   {
371     if (!_init)
372       Init();
373 
374     if (from.empty())
375       return("");
376 
377     if (_from >= 0 && _to >= 0 &&
378         _from < (signed)_table.size() && _to < (signed)_table.size())
379       {
380         vector<vector<string> >::iterator i;
381         for (i = _table.begin();i != _table.end();++i)
382           if ((signed)(*i).size() > _from &&  (*i)[_from] == from)
383             {
384               return (*i)[_to];
385             }
386       }
387 
388     // Throw an error, copy the string and return false
389     obErrorLog.ThrowError(__FUNCTION__, "Cannot perform atom type translation: table cannot find requested types.", obWarning);
390     return("");
391   }
392 
GetFromType()393   std::string OBTypeTable::GetFromType()
394   {
395     if (!_init)
396       Init();
397 
398     if (_from > 0 && _from < (signed)_table.size())
399       return( _colnames[_from] );
400     else
401       return( _colnames[0] );
402   }
403 
GetToType()404   std::string OBTypeTable::GetToType()
405   {
406     if (!_init)
407       Init();
408 
409     if (_to > 0 && _to < (signed)_table.size())
410       return( _colnames[_to] );
411     else
412       return( _colnames[0] );
413   }
414 
Toupper(string & s)415   void Toupper(string &s)
416   {
417     unsigned int i;
418     for (i = 0;i < s.size();++i)
419       s[i] = toupper(s[i]);
420   }
421 
Tolower(string & s)422   void Tolower(string &s)
423   {
424     unsigned int i;
425     for (i = 0;i < s.size();++i)
426       s[i] = tolower(s[i]);
427   }
428 
429   ///////////////////////////////////////////////////////////////////////
OBResidueData()430   OBResidueData::OBResidueData()
431   {
432     _init = false;
433     _dir = BABEL_DATADIR;
434     _envvar = "BABEL_DATADIR";
435     _filename = "resdata.txt";
436     _subdir = "data";
437     _dataptr = ResidueData;
438   }
439 
AssignBonds(OBMol & mol)440   bool OBResidueData::AssignBonds(OBMol &mol)
441   {
442     if (!_init)
443       Init();
444 
445     OBAtom *a1,*a2;
446     OBResidue *r1,*r2;
447     vector<OBAtom*>::iterator i,j;
448     vector3 v;
449 
450     int bo;
451     string skipres = ""; // Residue Number to skip
452     string rname = "";
453     //assign residue bonds
454     for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
455       {
456         r1 = a1->GetResidue();
457         if (r1 == nullptr) // atoms may not have residues
458           continue;
459 
460         if (skipres.length() && r1->GetNumString() == skipres)
461           continue;
462 
463         if (r1->GetName() != rname)
464           {
465             skipres = SetResName(r1->GetName()) ? "" : r1->GetNumString();
466             rname = r1->GetName();
467           }
468         //assign bonds for each atom
469         for (j=i,a2 = mol.NextAtom(j);a2;a2 = mol.NextAtom(j))
470           {
471             r2 = a2->GetResidue();
472             if (r2 == nullptr) // atoms may not have residues
473               continue;
474 
475             if (r1->GetNumString() != r2->GetNumString())
476               break;
477             if (r1->GetName() != r2->GetName())
478               break;
479             if (r1->GetChain() != r2->GetChain())
480               break; // Fixes PR#2889763 - Fabian
481 
482             if ((bo = LookupBO(r1->GetAtomID(a1),r2->GetAtomID(a2))))
483               {
484                 // Suggested by Liu Zhiguo 2007-08-13
485                 // for predefined residues, don't perceive connection
486                 // by distance
487                 //                v = a1->GetVector() - a2->GetVector();
488                 //                if (v.length_2() < 3.5) //check by distance
489                   mol.AddBond(a1->GetIdx(),a2->GetIdx(),bo);
490               }
491           }
492       }
493 
494     int hyb;
495     string type;
496 
497     //types and hybridization
498     rname = ""; // name of current residue
499     skipres = ""; // don't skip any residues right now
500     for (a1 = mol.BeginAtom(i);a1;a1 = mol.NextAtom(i))
501       {
502         if (a1->GetAtomicNum() == OBElements::Oxygen && !a1->GetExplicitDegree())
503           {
504             a1->SetType("O3");
505             continue;
506           }
507         if (a1->GetAtomicNum() == OBElements::Hydrogen)
508           {
509             a1->SetType("H");
510             continue;
511           }
512 
513         //***valence rule for O-
514         if (a1->GetAtomicNum() == OBElements::Oxygen && a1->GetExplicitDegree() == 1)
515           {
516             OBBond *bond;
517             bond = (OBBond*)*(a1->BeginBonds());
518             if (bond->GetBondOrder() == 2)
519               {
520                 a1->SetType("O2");
521                 a1->SetHyb(2);
522               }
523             else if (bond->GetBondOrder() == 1)
524               {
525                 // Leave the protonation/deprotonation to phmodel.txt
526                 a1->SetType("O3");
527                 a1->SetHyb(3);
528                 // PR#3203039 -- Fix from Magnus Lundborg
529                 //                a1->SetFormalCharge(0);
530               }
531             continue;
532           }
533 
534         r1 = a1->GetResidue();
535         if (r1 == nullptr) continue; // atoms may not have residues
536         if (skipres.length() && r1->GetNumString() == skipres)
537           continue;
538 
539         if (r1->GetName() != rname)
540           {
541             // if SetResName fails, skip this residue
542             skipres = SetResName(r1->GetName()) ? "" : r1->GetNumString();
543             rname = r1->GetName();
544           }
545 
546         if (LookupType(r1->GetAtomID(a1),type,hyb))
547           {
548             a1->SetType(type);
549             a1->SetHyb(hyb);
550           }
551         else // try to figure it out by bond order ???
552           {}
553       }
554 
555     return(true);
556   }
557 
ParseLine(const char * buffer)558   void OBResidueData::ParseLine(const char *buffer)
559   {
560     int bo;
561     string s;
562     vector<string> vs;
563 
564     if (buffer[0] == '#')
565       return;
566 
567     tokenize(vs,buffer);
568     if (!vs.empty())
569       {
570         if (vs[0] == "BOND")
571           {
572             s = (vs[1] < vs[2]) ? vs[1] + " " + vs[2] :
573               vs[2] + " " + vs[1];
574             bo = atoi(vs[3].c_str());
575             _vtmp.push_back(pair<string,int> (s,bo));
576           }
577 
578         if (vs[0] == "ATOM" && vs.size() == 4)
579           {
580             _vatmtmp.push_back(vs[1]);
581             _vatmtmp.push_back(vs[2]);
582             _vatmtmp.push_back(vs[3]);
583           }
584 
585         if (vs[0] == "RES")
586           _resname.push_back(vs[1]);
587 
588         if (vs[0]== "END")
589           {
590             _resatoms.push_back(_vatmtmp);
591             _resbonds.push_back(_vtmp);
592             _vtmp.clear();
593             _vatmtmp.clear();
594           }
595       }
596   }
597 
SetResName(const string & s)598   bool OBResidueData::SetResName(const string &s)
599   {
600     if (!_init)
601       Init();
602 
603     unsigned int i;
604 
605     for (i = 0;i < _resname.size();++i)
606       if (_resname[i] == s)
607         {
608           _resnum = i;
609           return(true);
610         }
611 
612     _resnum = -1;
613     return(false);
614   }
615 
LookupBO(const string & s)616   int OBResidueData::LookupBO(const string &s)
617   {
618     if (_resnum == -1)
619       return(0);
620 
621     unsigned int i;
622     for (i = 0;i < _resbonds[_resnum].size();++i)
623       if (_resbonds[_resnum][i].first == s)
624         return(_resbonds[_resnum][i].second);
625 
626     return(0);
627   }
628 
LookupBO(const string & s1,const string & s2)629   int OBResidueData::LookupBO(const string &s1, const string &s2)
630   {
631     if (_resnum == -1)
632       return(0);
633     string s;
634 
635     s = (s1 < s2) ? s1 + " " + s2 : s2 + " " + s1;
636 
637     unsigned int i;
638     for (i = 0;i < _resbonds[_resnum].size();++i)
639       if (_resbonds[_resnum][i].first == s)
640         return(_resbonds[_resnum][i].second);
641 
642     return(0);
643   }
644 
LookupType(const string & atmid,string & type,int & hyb)645   bool OBResidueData::LookupType(const string &atmid,string &type,int &hyb)
646   {
647     if (_resnum == -1)
648       return(false);
649 
650     string s;
651     vector<string>::iterator i;
652 
653     for (i = _resatoms[_resnum].begin();i != _resatoms[_resnum].end();i+=3)
654       if (atmid == *i)
655         {
656           ++i;
657           type = *i;
658           ++i;
659           hyb = atoi((*i).c_str());
660           return(true);
661         }
662 
663     return(false);
664   }
665 
Init()666   void OBGlobalDataBase::Init()
667   {
668     if (_init)
669       return;
670     _init = true;
671 
672     ifstream ifs;
673     char charBuffer[BUFF_SIZE];
674 
675     // Set the locale for number parsing to avoid locale issues: PR#1785463
676     obLocale.SetLocale();
677 
678     // Check return value from OpenDatafile
679     // Suggestion from Zhiguo Liu
680     string fn_open = OpenDatafile(ifs, _filename, _envvar);
681 
682     // Check _subdir directory
683     if (fn_open == "")
684       string fn_open = OpenDatafile(ifs, _filename, _subdir);
685 
686     if (fn_open != "" && (ifs))
687       {
688         while(ifs.getline(charBuffer,BUFF_SIZE))
689           ParseLine(charBuffer);
690       }
691 
692     else
693       // If all else fails, use the compiled in values
694       if (_dataptr)
695         {
696           obErrorLog.ThrowError(__FUNCTION__, "Cannot open " + _filename + " defaulting to compiled data.", obDebug);
697 
698           const char *p1,*p2;
699           for (p1 = p2 = _dataptr;*p2 != '\0';++p2)
700             if (*p2 == '\n')
701               {
702                 strncpy(charBuffer, p1, (p2 - p1));
703                 charBuffer[(p2 - p1)] = '\0';
704                 ParseLine(charBuffer);
705                 p1 = ++p2;
706               }
707         }
708       else
709         {
710           string s = "Unable to open data file '";
711           s += _filename;
712           s += "'";
713           obErrorLog.ThrowError(__FUNCTION__, s, obWarning);
714         }
715 
716     // return the locale to the original one
717     obLocale.RestoreLocale();
718 
719     if (ifs)
720       ifs.close();
721 
722     if (GetSize() == 0)
723       {
724         string s = "Cannot initialize database '";
725         s += _filename;
726         s += "' which may cause further errors.";
727         obErrorLog.ThrowError(__FUNCTION__, s, obWarning);
728       }
729 
730   }
731 
732 } // end namespace OpenBabel
733 
734 //! \file data.cpp
735 //! \brief Global data and resource file parsers.
736