1 /**********************************************************************
2 Copyright (C) 2000 by OpenEye Scientific Software, Inc.
3 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
4 Some portions Copyright (C) 2004 by Chris Morley
5 
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 ***********************************************************************/
15 #include <openbabel/babelconfig.h>
16 
17 #include <openbabel/obmolecformat.h>
18 #include <openbabel/mol.h>
19 #include <openbabel/atom.h>
20 #include <openbabel/elements.h>
21 #include <openbabel/bond.h>
22 #include <openbabel/data.h>
23 #include <openbabel/generic.h>
24 
25 #include <openbabel/forcefield.h>
26 #include <cstdlib>
27 
28 using namespace std;
29 namespace OpenBabel
30 {
31 
32   class TinkerFormat : public OBMoleculeFormat
33   {
34   public:
35     //Register this format type ID
TinkerFormat()36     TinkerFormat()
37     {
38       OBConversion::RegisterFormat("txyz",this);
39     }
40 
Description()41     virtual const char* Description() //required
42     {
43       return
44         "Tinker XYZ format\n"
45         "The cartesian XYZ file format used by the molecular mechanics package TINKER.\n"
46         "By default, the MM2 atom types are used for writing files but MM3 atom types\n"
47         "are provided as an option. Another option provides the ability to take the\n"
48         "atom type from the atom class (e.g. as used in SMILES, or set via the API).\n\n"
49 
50         "Read Options e.g. -as\n"
51         "  s  Generate single bonds only\n\n"
52         "Write Options e.g. -xm\n"
53         "  m  Write an input file for the CNDO/INDO program.\n"
54         "  c  Write atom types using custom atom classes, if available\n"
55         "  3  Write atom types for the MM3 forcefield.\n\n";
56     };
57 
SpecificationURL()58     virtual const char* SpecificationURL()
59     {return "http://dasher.wustl.edu/tinker/";}; //optional
60 
61     //Flags() can return be any the following combined by | or be omitted if none apply
62     // NOTREADABLE  READONEONLY  NOTWRITABLE  WRITEONEONLY
Flags()63     virtual unsigned int Flags()
64     {
65       return READONEONLY | WRITEONEONLY;
66     };
67 
68     ////////////////////////////////////////////////////
69     /// The "API" interface functions
70     virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
71     virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
72 
73   };
74 
75   //Make an instance of the format class
76   TinkerFormat theTinkerFormat;
77 
78   // Sets the MM3 atom type based on the MM2 atom type
79   int SetMM3Type(OBAtom *atom);
80 
81   /////////////////////////////////////////////////////////////////
82 
ReadMolecule(OBBase * pOb,OBConversion * pConv)83   bool TinkerFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
84   {
85     OBMol* pmol = pOb->CastAndClear<OBMol>();
86     if (pmol == nullptr)
87         return false;
88 
89     //Define some references so we can use the old parameter names
90     istream &ifs = *pConv->GetInStream();
91     OBMol &mol = *pmol;
92     const char* title = pConv->GetTitle();
93 
94     int natoms;
95     char buffer[BUFF_SIZE];
96     vector<string> vs;
97 
98     ifs.getline(buffer, BUFF_SIZE);
99     tokenize(vs,buffer);
100     if (vs.size() < 2)
101       return false;
102     natoms = atoi(vs[0].c_str());
103 
104     // title is 2nd token (usually add tokens for the atom types)
105     mol.SetTitle(vs[1]);
106 
107     mol.ReserveAtoms(natoms);
108     mol.BeginModify();
109 
110     string str;
111     double x,y,z;
112     OBAtom *atom;
113 
114     for (int i = 1; i <= natoms; ++i)
115     {
116         if (!ifs.getline(buffer,BUFF_SIZE))
117             return(false);
118         tokenize(vs,buffer);
119         // e.g. "2  C      2.476285    0.121331   -0.001070     2     1     3    14"
120         if (vs.size() < 5)
121             return(false);
122 
123         atom = mol.NewAtom();
124         x = atof((char*)vs[2].c_str());
125         y = atof((char*)vs[3].c_str());
126         z = atof((char*)vs[4].c_str());
127         atom->SetVector(x,y,z); //set coordinates
128 
129         //set atomic number
130         atom->SetAtomicNum(OBElements::GetAtomicNum(vs[1].c_str()));
131 
132         // add bonding
133         if (vs.size() > 6)
134           for (unsigned int j = 6; j < vs.size(); ++j)
135             mol.AddBond(mol.NumAtoms(), atoi((char *)vs[j].c_str()), 1); // we don't know the bond order
136 
137     }
138     if (!pConv->IsOption("s",OBConversion::INOPTIONS))
139       mol.PerceiveBondOrders();
140 
141     // clean out remaining blank lines
142     std::streampos ipos;
143     do
144     {
145       ipos = ifs.tellg();
146       ifs.getline(buffer,BUFF_SIZE);
147     }
148     while(strlen(buffer) == 0 && !ifs.eof() );
149     ifs.seekg(ipos);
150 
151     mol.EndModify();
152     mol.SetTitle(title);
153     return(true);
154   }
155 
WriteMolecule(OBBase * pOb,OBConversion * pConv)156   bool TinkerFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
157   {
158     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
159     if (pmol == nullptr)
160       return false;
161 
162     //Define some references so we can use the old parameter names
163     ostream &ofs = *pConv->GetOutStream();
164     OBMol &mol = *pmol;
165     bool mm2Types = false;
166     bool mmffTypes = pConv->IsOption("m", OBConversion::OUTOPTIONS) != nullptr;
167     bool mm3Types = pConv->IsOption("3", OBConversion::OUTOPTIONS) != nullptr;
168     bool classTypes = pConv->IsOption("c", OBConversion::OUTOPTIONS) != nullptr;
169 
170     unsigned int i;
171     char buffer[BUFF_SIZE];
172     OBBond *bond;
173     vector<OBBond*>::iterator j;
174 
175     // Before we try output of MMFF94 atom types, check if it works
176     OBForceField *ff = OpenBabel::OBForceField::FindForceField("MMFF94");
177     if (mmffTypes && ff && ff->Setup(mol))
178       mmffTypes = ff->GetAtomTypes(mol);
179     else
180       mmffTypes = false; // either the force field isn't available, or it doesn't work
181 
182     if (!mmffTypes && !mm3Types && !classTypes) {
183       snprintf(buffer, BUFF_SIZE, "%6d %-20s   MM2 parameters\n",mol.NumAtoms(),mol.GetTitle());
184       mm2Types = true;
185     }
186     else if (mm3Types)
187       snprintf(buffer, BUFF_SIZE, "%6d %-20s   MM3 parameters\n",mol.NumAtoms(),mol.GetTitle());
188     else if (classTypes)
189       snprintf(buffer, BUFF_SIZE, "%6d %-20s   Custom parameters\n",mol.NumAtoms(),mol.GetTitle());
190     else
191       snprintf(buffer, BUFF_SIZE, "%6d %-20s   MMFF94 parameters\n",mol.NumAtoms(),mol.GetTitle());
192     ofs << buffer;
193 
194     ttab.SetFromType("INT");
195 
196     OBAtom *atom;
197     string str,str1;
198     int atomType;
199     for(i = 1;i <= mol.NumAtoms(); i++)
200       {
201         atom = mol.GetAtom(i);
202         str = atom->GetType();
203         atomType = 0; // Something is very wrong if this doesn't get set below
204 
205         if (mm2Types) {
206           ttab.SetToType("MM2");
207           ttab.Translate(str1,str);
208           atomType = atoi((char*)str1.c_str());
209         }
210         if (mmffTypes) {
211           // Override the MM2 typing
212           OBPairData *type = (OpenBabel::OBPairData*)atom->GetData("FFAtomType");
213           if (type) {
214             str1 = type->GetValue().c_str();
215             atomType = atoi((char*)str1.c_str());
216           }
217         }
218         if (mm3Types) {
219           // convert to integer for MM3 typing
220           atomType = SetMM3Type(atom);
221         }
222         if (classTypes) {
223           // Atom classes are set by the user, so use those
224           OBGenericData *data = atom->GetData("Atom Class");
225           if (data) {
226             OBPairInteger* acdata = dynamic_cast<OBPairInteger*>(data); // Could replace with C-style cast if willing to live dangerously
227             if (acdata) {
228               int ac = acdata->GetGenericValue();
229               if (ac >= 0)
230                 atomType = ac;
231             }
232           }
233         }
234 
235         snprintf(buffer, BUFF_SIZE, "%6d %2s  %12.6f%12.6f%12.6f %5d",
236                  i,
237                  OBElements::GetSymbol(atom->GetAtomicNum()),
238                  atom->GetX(),
239                  atom->GetY(),
240                  atom->GetZ(),
241                  atomType);
242         ofs << buffer;
243 
244         for (bond = atom->BeginBond(j); bond; bond = atom->NextBond(j))
245           {
246             snprintf(buffer, BUFF_SIZE, "%6d", (bond->GetNbrAtom(atom))->GetIdx());
247             ofs << buffer;
248           }
249 
250         ofs << endl;
251       }
252 
253     return(true);
254   }
255 
SetMM3Type(OBAtom * atom)256   int SetMM3Type(OBAtom *atom)
257   {
258     OBAtom *b; // neighbor
259     OBBondIterator i, j;
260     int countNeighborO, countNeighborS, countNeighborN, countNeighborC;
261     countNeighborO = countNeighborS = countNeighborN = countNeighborC = 0;
262 
263     // The MM2 typing isn't very good, so we do this ourselves for the most common atom types
264     switch (atom->GetAtomicNum()) {
265     case 1: // Hydrogen
266       b = atom->BeginNbrAtom(j);
267       if (b->IsCarboxylOxygen())
268         return 24;
269       if (b->GetAtomicNum() == OBElements::Sulfur)
270         return 44;
271       if (b->GetAtomicNum() == OBElements::Nitrogen) {
272         if (b->IsAmideNitrogen())
273           return 28;
274         if (b->GetExplicitDegree() > 3)
275           return 48;// ammonium
276         return 23; // default amine/imine
277       }
278       if (b->GetAtomicNum() == OBElements::Carbon && b->GetHyb() == 1)
279         return 124; // acetylene
280 
281       if (b->GetAtomicNum() == OBElements::Oxygen) {
282         if (b->HasAlphaBetaUnsat())
283           return 73; // includes non-enol/phenol, but has the right spirit
284         return 21; // default alcohol
285       }
286 
287       return 5; // default H
288       break;
289 
290     case 2: // Helium
291       return 51; break;
292     case 3: // Li
293       return 163; break;
294 
295     case 5: // B
296       if (atom->GetExplicitDegree() >= 4)
297         return 27; // tetrahedral
298       return 26; break;
299 
300     case 6: // C
301       if (atom->IsInRingSize(3)) { // cyclopropane / cyclopropene
302         if (atom->GetHyb() == 3)
303           return 22;
304         if (atom->GetHyb() == 2) {
305           if (atom->CountFreeOxygens() == 1) // propanone
306             return 67;
307           return 38; // propane
308         }
309       }
310       if (atom->IsInRingSize(4)) { // cyclobutane or cyclobutene
311         if (atom->GetHyb() == 3)
312           return 56;
313         if (atom->GetHyb() == 2) {
314           if (atom->CountFreeOxygens() == 1) // butanone
315             return 58;
316           return 57; // regular cyclobutane
317         }
318       }
319 
320       if (atom->CountBondsOfOrder(2) == 2) { // allene
321         if (atom->CountFreeOxygens() == 1) // ketene
322           return 106;
323         return 68;
324       }
325 
326       if (atom->GetFormalCharge() == +1)
327         return 30;
328       if (atom->GetSpinMultiplicity() == 2)
329         return 29;
330 
331       if (atom->GetHyb() == 3)
332         return 1;
333       else if (atom->GetHyb() == 2) {
334         if (atom->CountFreeOxygens() >= 1)
335           return 3;
336         return 2;
337       }
338       else if (atom->GetHyb() == 1)
339         return 4;
340       break;
341 
342     case 7: // N
343       // TODO
344       if (atom->IsAmideNitrogen())
345         return 151;
346       if (atom->IsAromatic()) {
347         if (atom->GetFormalCharge() == 1)
348           return 111;
349         if (atom->IsInRingSize(5)) // pyrrole
350           return 40;
351         if (atom->IsInRingSize(6)) // pyridine
352           return 37;
353       }
354 
355       if (atom->CountFreeOxygens() == 2) // nitro
356         return 46;
357 
358       if (atom->GetHyb() == 3) {
359         if (atom->GetExplicitDegree() > 3)
360           return 39; // ammonium
361         return 8;
362       }
363       else if (atom->GetHyb() == 2)
364         return 9;
365       else if (atom->GetHyb() == 1)
366         return 10;
367       break;
368 
369     case 8: // O
370       //TODO
371       if (atom->IsPhosphateOxygen())
372         return 159;
373       if (atom->IsCarboxylOxygen())
374         return 75;
375       if (atom->IsInRingSize(3))
376         return 49; // epoxy
377 
378       b = atom->BeginNbrAtom(j);
379       if (atom->HasBondOfOrder(2) && b->GetAtomicNum() == OBElements::Carbon) { // O=C
380         return 7;
381       }
382 
383       if (atom->IsAromatic())
384         return 41; // furan
385       return 6;
386       break;
387 
388     case 9: // F
389       return 11; break;
390     case 10: // Ne
391       return 52; break;
392     case 12: // Mg
393       return 59; break;
394     case 14: // Si
395       return 19; break;
396 
397     case 15: // P
398       if (atom->CountFreeOxygens() > 0)
399         return 153; // phosphate
400       if (atom->GetExplicitValence() > 3)
401         return 60; // phosphorus V
402       return 25; break;
403 
404     case 16: // S
405       if (atom->IsAromatic())
406         return 42; // thiophene
407       if (atom->GetFormalCharge() == 1)
408         return 16; // sulfonium
409 
410     // look at the neighbors
411     for (b = atom->BeginNbrAtom(j); b; b = atom->NextNbrAtom(j))
412       {
413         switch (b->GetAtomicNum()) {
414         case 6:
415           if (b->GetHyb() == 2) // S=C
416             countNeighborC++; break;
417         case 7:
418           countNeighborN++; break;
419         case 8:
420           if (b->GetHvyDegree() == 1)
421             countNeighborO++;
422           break;
423         case 16:
424           countNeighborS++; break;
425         default:
426           continue;
427         }
428       }
429 
430       if (countNeighborO == 1)
431         return 17; // sulfoxide
432       if (countNeighborO >= 2) {
433         if (countNeighborN)
434           return 154; // sulfonamide
435         return 18; // sulfone or sulfate
436       }
437       if (countNeighborC)
438         return 74; // S=C
439       if (countNeighborS == 1)
440         return 104; // S-S disulfide
441       else if (countNeighborS > 1)
442         return 105;
443 
444       return 15; break;
445 
446     case 17: // Cl
447       return 12; break;
448     case 18: // Ar
449       return 153; break;
450     case 20: // Ca
451       return 125; break;
452     case 26: // Fe
453       if (atom->GetFormalCharge() == 2)
454         return 61;
455       return 62; break;
456     case 27: // Co
457       if (atom->GetFormalCharge() == 2)
458         return 65;
459       return 66; break;
460     case 28: // Ni
461       if (atom->GetFormalCharge() == 2)
462         return 63;
463       return 64; break;
464 
465     case 32: // Ge
466       return 31; break;
467     case 34: // Se
468       return 34; break;
469     case 35: // Br
470       return 13; break;
471     case 36: // Kr
472       return 54; break;
473     case 38: // Sr
474       return 126; break;
475     case 50: // Sn
476       return 32; break;
477     case 52: // Te
478       return 35; break;
479     case 53: // I
480       return 14; break;
481     case 54: // Xe
482       return 55; break;
483 
484     case 56: // Ba
485       return 127; break;
486     case 57:
487       return 128; break;
488     case 58:
489       return 129; break;
490     case 59:
491       return 130; break;
492     case 60:
493       return 131; break;
494     case 61:
495       return 132; break;
496     case 62:
497       return 133; break;
498     case 63:
499       return 134; break;
500     case 64:
501       return 135; break;
502     case 65:
503       return 136; break;
504     case 66:
505       return 137; break;
506     case 67:
507       return 138; break;
508     case 68:
509       return 139; break;
510     case 69:
511       return 140; break;
512     case 70:
513       return 141; break;
514     case 71:
515       return 142; break;
516 
517     case 82: // Pb
518       return 33; break;
519 
520 
521     default:
522       break;
523     }
524     return 0;
525   }
526 
527 } //namespace OpenBabel
528