1 /**********************************************************************
2 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
3 Some portions Copyright (C) 2001-2007 by Geoffrey R. Hutchison
4 Some portions Copyright (C) 2004 by Chris Morley
5 
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 GNU General Public License for more details.
14 ***********************************************************************/
15 #include <openbabel/babelconfig.h>
16 
17 #include <openbabel/obmolecformat.h>
18 #include <openbabel/mol.h>
19 #include <openbabel/atom.h>
20 #include <openbabel/bond.h>
21 #include <openbabel/obiter.h>
22 #include <openbabel/elements.h>
23 #include <openbabel/generic.h>
24 #include <openbabel/kekulize.h>
25 #include <openbabel/obfunctions.h>
26 #include <openbabel/data.h>
27 #include <cstdlib>
28 
29 using namespace std;
30 namespace OpenBabel
31 {
32   //The routine WriteSmiOrderedMol2() in the original mol2.cpp is presumably
33   //another output format, but was not made available in version 100.1.2, nor
34   //is it here.
35 
36   class MOL2Format : public OBMoleculeFormat
37   {
38   public:
39     //Register this format type ID
MOL2Format()40     MOL2Format()
41     {
42       OBConversion::RegisterFormat("mol2",this, "chemical/x-mol2");
43       OBConversion::RegisterFormat("ml2",this);
44       OBConversion::RegisterFormat("sy2",this);
45       OBConversion::RegisterOptionParam("c", this, 0, OBConversion::INOPTIONS);
46       OBConversion::RegisterOptionParam("c", this, 0, OBConversion::OUTOPTIONS);
47       OBConversion::RegisterOptionParam("l", this, 0, OBConversion::OUTOPTIONS);
48       OBConversion::RegisterOptionParam("u", this, 0, OBConversion::OUTOPTIONS);
49     }
50 
Description()51     virtual const char* Description() //required
52     {
53       return
54         "Sybyl Mol2 format\n"
55         "Read Options e.g. -ac\n"
56         "  c               Read UCSF Dock scores saved in comments preceding molecules\n\n"
57         "Write Options e.g. -xl\n"
58         "  l               Output ignores residue information (only ligands)\n"
59         "  c               Write UCSF Dock scores saved in comments preceding molecules\n"
60         "  u               Do not write formal charge information in UNITY records\n\n";
61     };
62 
SpecificationURL()63     virtual const char* SpecificationURL()
64     {
65       return "http://www.tripos.com/data/support/mol2.pdf";
66     }; //optional
67 
GetMIMEType()68     virtual const char* GetMIMEType()
69     { return "chemical/x-mol2"; };
70 
71     virtual int SkipObjects(int n, OBConversion* pConv);
72 
73     //*** This section identical for most OBMol conversions ***
74     ////////////////////////////////////////////////////
75     /// The "API" interface functions
76     virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
77     virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
78   };
79   //***
80 
81   //Make an instance of the format class
82   MOL2Format theMOL2Format;
83 
84   // Helper function for ReadMolecule
85   // \return Is this atom a sulfur in a (di)thiocarboxyl (-CS2, -COS, CS2H or COSH) group?
IsThiocarboxylSulfur(OBAtom * queryatom)86   static bool IsThiocarboxylSulfur(OBAtom* queryatom)
87   {
88     if (queryatom->GetAtomicNum() != OBElements::Sulfur)
89       return(false);
90     if (queryatom->GetHvyDegree() != 1)
91       return(false);
92 
93     OBAtom *atom = nullptr;
94     OBBond *bond;
95     OBBondIterator i;
96 
97     for (bond = queryatom->BeginBond(i); bond; bond = queryatom->NextBond(i))
98       if ((bond->GetNbrAtom(queryatom))->GetAtomicNum() == OBElements::Carbon)
99       {
100         atom = bond->GetNbrAtom(queryatom);
101         break;
102       }
103     if (!atom)
104       return(false);
105     if (!(atom->CountFreeSulfurs() == 2)
106       && !(atom->CountFreeOxygens() == 1 && atom->CountFreeSulfurs() == 1))
107       return(false);
108 
109     //atom is connected to a carbon that has a total
110     //of 2 attached free sulfurs or 1 free oxygen and 1 free sulfur
111     return(true);
112   }
113 
IsOxygenOrSulfur(OBAtom * atom)114   static bool IsOxygenOrSulfur(OBAtom *atom)
115   {
116     switch (atom->GetAtomicNum()) {
117     case 8: case 16: return true;
118     default: return false;
119     }
120   }
121 
GetAtomicNumAndIsotope(const char * symbol,int * isotope)122   static unsigned int GetAtomicNumAndIsotope(const char* symbol, int *isotope)
123   {
124     const char* p = symbol;
125     switch (p[0]) {
126     case 'D':
127       if (p[1] == '\0') {
128         *isotope = 2;
129         return 1;
130       }
131       break;
132     case 'T':
133       if (p[1] == '\0') {
134         *isotope = 3;
135         return 1;
136       }
137       break;
138     }
139     return OBElements::GetAtomicNum(symbol);
140   }
141 
142   //read from ifs until next rti is found and return it
read_until_rti(istream & ifs)143   static string read_until_rti(istream & ifs)
144   {
145       char buffer[BUFF_SIZE];
146       for (;;)
147       {
148         if (!ifs.getline(buffer,BUFF_SIZE))
149           return "";
150         if (!strncmp(buffer,"@<TRIPOS>",9))
151           return string(buffer);
152       }
153   }
154   /////////////////////////////////////////////////////////////////
ReadMolecule(OBBase * pOb,OBConversion * pConv)155   bool MOL2Format::ReadMolecule(OBBase* pOb, OBConversion* pConv)
156   {
157 
158     OBMol* pmol = pOb->CastAndClear<OBMol>();
159     if (pmol == nullptr)
160       return false;
161 
162     //Define some references so we can use the old parameter names
163     istream &ifs = *pConv->GetInStream();
164     OBMol &mol = *pmol;
165 
166     //Old code follows...
167     bool foundAtomLine = false;
168     char buffer[BUFF_SIZE];
169     char *comment = nullptr;
170     string str,str1;
171     vector<string> vstr;
172     int len;
173 
174     // Prevent reperception
175     mol.SetChainsPerceived();
176 
177     mol.BeginModify();
178 
179     for (;;)
180       {
181         if (!ifs.getline(buffer,BUFF_SIZE))
182           return(false);
183         if (pConv->IsOption("c", OBConversion::INOPTIONS) != nullptr && EQn(buffer, "###########", 10))
184           {
185             char attr[32], val[32];
186             sscanf(buffer, "########## %[^:]:%s", attr, val);
187             OBPairData *dd = new OBPairData;
188             dd->SetAttribute(attr);
189             dd->SetValue(val);
190             dd->SetOrigin(fileformatInput);
191             mol.SetData(dd);
192           }
193         if (EQn(buffer,"@<TRIPOS>MOLECULE",17))
194           break;
195       }
196 
197     // OK, just read MOLECULE line
198     int lcount;
199     int natoms,nbonds;
200     bool hasPartialCharges = true;
201     for (lcount=0;;lcount++)
202       {
203         if (!ifs.getline(buffer,BUFF_SIZE))
204           return(false);
205         if (EQn(buffer,"@<TRIPOS>ATOM",13))
206           {
207             foundAtomLine = true;
208             break;
209           }
210 
211         if (lcount == 0)
212           {
213             tokenize(vstr,buffer);
214             if (!vstr.empty())
215               mol.SetTitle(buffer);
216           }
217         else if (lcount == 1)
218           sscanf(buffer,"%d%d",&natoms,&nbonds);
219         else if (lcount == 3) // charge descriptions
220           {
221             // Annotate origin of partial charges
222             OBPairData *dp = new OBPairData;
223             dp->SetAttribute("PartialCharges");
224             dp->SetValue(buffer);
225             dp->SetOrigin(fileformatInput);
226             mol.SetData(dp);
227 
228             if (strncasecmp(buffer, "NO_CHARGES", 10) == 0)
229               hasPartialCharges = false;
230           }
231         else if (lcount == 4) //energy (?)
232           {
233             tokenize(vstr,buffer);
234             if (!vstr.empty() && vstr.size() == 3)
235               if (vstr[0] == "Energy")
236                 mol.SetEnergy(atof(vstr[2].c_str()));
237           }
238         else if (lcount == 5) //comment
239           {
240             if ( buffer[0] )
241               {
242                 len = (int) strlen(buffer)+1;
243                 //! @todo allow better multi-line comments
244                 // which don't allow ill-formed data to consume memory
245                 // Thanks to Andrew Dalke for the pointer
246                 if (comment != nullptr)
247                   delete [] comment;
248                 comment = new char [len];
249                 memcpy(comment,buffer,len);
250               }
251           }
252       }
253 
254     if (!foundAtomLine)
255       {
256         mol.EndModify();
257         mol.Clear();
258         obErrorLog.ThrowError(__FUNCTION__, "Unable to read Mol2 format file. No atoms found.", obWarning);
259         return(false);
260       }
261 
262     mol.ReserveAtoms(natoms);
263 
264     int i;
265     vector3 v;
266     OBAtom atom;
267     double x,y,z,pcharge;
268     char temp_type[BUFF_SIZE], resname[BUFF_SIZE], atmid[BUFF_SIZE];
269     int elemno, resnum = -1;
270     int isotope = 0;
271     bool has_explicit_hydrogen = false;
272     bool has_residue_information = false;
273 
274     ttab.SetFromType("SYB");
275     for (i = 0;i < natoms;i++)
276       {
277         if (!ifs.getline(buffer,BUFF_SIZE))
278           return(false);
279         sscanf(buffer," %*s %1024s %lf %lf %lf %1024s %d %1024s %lf",
280                atmid, &x,&y,&z, temp_type, &resnum, resname, &pcharge);
281 
282         atom.SetVector(x, y, z);
283         atom.SetFormalCharge(0);
284 
285         // Handle "CL" and "BR" and other mis-typed atoms
286         str = temp_type;
287         if (strncmp(temp_type, "CL", 2) == 0) {
288           str = "Cl";
289         } else  if (strncmp(temp_type,"BR",2) == 0) {
290           str = "Br";
291         } else if (strncmp(temp_type,"S.o2", 4) == 0) {
292           str = "S.O2";
293         } else if (strncmp(temp_type,"S.o", 3) == 0) {
294           str = "S.O";
295         } else if (strncmp(temp_type,"SI", 2) == 0) {
296           str = "Si";
297         // The following cases are entries which are not in openbabel/data/types.txt
298         // and should probably be added there
299         } else if (strncmp(temp_type,"S.1", 3) == 0) {
300           str = "S.2"; // no idea what the best type might be here
301         } else if (strncmp(temp_type,"P.", 2) == 0) {
302           str = "P.3";
303         } else if (strncasecmp(temp_type,"Ti.", 3) == 0) { // e.g. Ti.th
304           str = "Ti";
305         } else if (strncasecmp(temp_type,"Ru.", 3) == 0) { // e.g. Ru.oh
306           str = "Ru";
307         // Fixes PR#3557898
308         } else if (strncmp(temp_type, "N.4", 3) == 0) {
309           atom.SetFormalCharge(1);
310         }
311 
312         ttab.SetToType("ATN");
313         ttab.Translate(str1,str);
314         elemno = atoi(str1.c_str());
315         ttab.SetToType("IDX");
316 
317         // We might have missed some SI or FE type things above, so here's
318         // another check
319         if( !elemno && isupper(temp_type[1]) )
320           {
321             temp_type[1] = (char)tolower(temp_type[1]);
322             str = temp_type;
323             ttab.SetToType("ATN");
324             ttab.Translate(str1,str);
325             elemno = atoi(str1.c_str());
326             ttab.SetToType("IDX");
327           }
328         // One last check if there isn't a period in the type,
329         // it's a malformed atom type, but it may be the element symbol
330         // GaussView does this (PR#1739905)
331         if ( !elemno ) {
332           // check if it's "Du" or "Xx" and the element is in the atom name
333           if (str == "Du" || str == "Xx") {
334             str = atmid;
335             for (unsigned int i = 0; i < str.length(); ++i)
336               if (!isalpha(str[i])) {
337                 str.erase(i);
338                 break; // we've erased the end of the string
339               }
340           }
341 
342 
343           std::stringstream errorMsg;
344           errorMsg << "This Mol2 file is non-standard. Problem with molecule: "
345                    << mol.GetTitle()
346                    << " Cannot interpret atom types correctly, instead attempting to interpret atom type: "
347                    << str << " as elements instead.";
348           obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
349 
350           string::size_type dotPos = str.find('.');
351           if (dotPos == string::npos) {
352             elemno = GetAtomicNumAndIsotope(str.c_str(), &isotope);
353           }
354         }
355 
356         atom.SetAtomicNum(elemno);
357         if (isotope)
358           atom.SetIsotope(isotope);
359         else if (elemno == 1)
360           has_explicit_hydrogen = true;
361         ttab.SetToType("INT");
362         ttab.Translate(str1,str);
363         atom.SetType(str1);
364         atom.SetPartialCharge(pcharge);
365         // MMFF94 has different atom types for Cu(I) and Cu(II)
366         // as well as for Fe(II) and Fe(III), so the correct formal
367         // charge is needed for correct atom type assignment
368         if (str1 == "Cu" || str1 == "Fe")
369           atom.SetFormalCharge((int)pcharge);
370         if (!mol.AddAtom(atom))
371           return(false);
372         if (!IsNearZero(pcharge))
373           hasPartialCharges = true;
374 
375         // Add residue information if it exists
376         if (resnum != -1 && resnum != 0 &&
377             strlen(resname) != 0 && strncmp(resname,"<1>", 3) != 0)
378           {
379             has_residue_information = true;
380             OBResidue *res  = (mol.NumResidues() > 0) ?
381               mol.GetResidue(mol.NumResidues()-1) : nullptr;
382             if (res == nullptr || res->GetName() != resname ||
383                 res->GetNum() != resnum)
384               {
385                 vector<OBResidue*>::iterator ri;
386                 for (res = mol.BeginResidue(ri) ; res ; res = mol.NextResidue(ri))
387                   if (res->GetName() == resname &&
388                       res->GetNum() == resnum)
389                     break;
390 
391                 if (res == nullptr)
392                   {
393                     res = mol.NewResidue();
394                     res->SetName(resname);
395                     res->SetNum(resnum);
396                   }
397               }
398             OBAtom *atomPtr = mol.GetAtom(mol.NumAtoms());
399             res->AddAtom(atomPtr);
400             res->SetAtomID(atomPtr, atmid);
401           } // end adding residue info
402       }
403 
404     string nextrti;
405     do { nextrti = read_until_rti(ifs); }
406     while(nextrti != "@<TRIPOS>UNITY_ATOM_ATTR" && nextrti != "@<TRIPOS>BOND" && nextrti.length() > 0);
407 
408     if(nextrti == "@<TRIPOS>UNITY_ATOM_ATTR")
409     { //read in formal charge information, must be done before Kekulization
410         int aid = 0, num = 0;
411         while (ifs.peek() != '@' && ifs.getline(buffer,BUFF_SIZE))
412         {
413           sscanf(buffer,"%d %d",&aid, &num);
414           for(int i = 0; i < num; i++)
415           {
416             if (!ifs.getline(buffer,BUFF_SIZE))
417               return(false);
418             if(strncmp(buffer, "charge", 6) == 0)
419             {
420               int charge = 0;
421               sscanf(buffer,"%*s %d",&charge);
422               if(aid <= mol.NumAtoms())
423               {
424                 OBAtom *atom = mol.GetAtom(aid);
425                 atom->SetFormalCharge(charge);
426               }
427             }
428           }
429         }
430     }
431 
432     while(nextrti != "@<TRIPOS>BOND" && nextrti.length() > 0)
433       nextrti = read_until_rti(ifs);
434 
435     if(nextrti != "@<TRIPOS>BOND")
436       return false;
437 
438     int start, end;
439     bool needs_kekulization = false;
440     for (i = 0; i < nbonds; i++)
441       {
442         if (!ifs.getline(buffer,BUFF_SIZE))
443           return(false);
444 
445         sscanf(buffer,"%*d %d %d %1024s",&start,&end,temp_type);
446         str = temp_type;
447         unsigned int flags = 0;
448         int order;
449         if (str == "ar" || str == "AR" || str == "Ar") {
450           order = 1;
451           flags = OB_AROMATIC_BOND;
452           needs_kekulization = true;
453         }
454         else if (str == "AM" || str == "am" || str == "Am")
455           order = 1;
456         else
457           order = atoi(str.c_str());
458 
459         mol.AddBond(start, end, order, flags);
460       }
461 
462     // TODO: Add a test case for the statement below of Paolo Tosco
463     //       - I am currently assuming that is not a problem for the
464     //         the current kekulization code, but it needs to be
465     //         checked
466     //   "Make a pass to ensure that there are no double bonds
467     //    between atoms which are also involved in aromatic bonds
468     //    as that may ill-condition kekulization (fixes potential
469     //    issues with molecules like CEWYIM30 (MMFF94 validation suite)"
470 
471     mol.SetAromaticPerceived(); // don't trigger reperception
472 
473     if (has_explicit_hydrogen) {
474       FOR_ATOMS_OF_MOL(atom, mol) {
475         unsigned int total_valence = atom->GetTotalDegree();
476         switch (atom->GetAtomicNum()) {
477         case 8:
478           if (total_valence != 1) continue;
479           if (strcmp(atom->GetType(), "O2") != 0) continue; // TODO: the O.co2 type is lost by this point
480           {
481             OBAtomBondIter bit(&*atom);
482             if (!bit->IsAromatic() && bit->GetBondOrder() == 1)
483               atom->SetFormalCharge(-1); // set -1 charge on dangling O.co2
484           }
485           break;
486         case 17: // Cl
487           if (total_valence == 0)
488             atom->SetFormalCharge(-1);
489           break;
490         }
491       }
492     }
493 
494     // Kekulization is necessary if an aromatic bond is present
495     if (needs_kekulization) {
496       // "de-aromatize" carboxylates and (di)thiocarboxylates
497       // The typical case (in our test suite anyway) is a carboxylate binding to
498       // a metal ion. The two O's have charges of -0.5 and the bond orders are aromatic
499       FOR_ATOMS_OF_MOL(atom, mol) {
500         OBAtom* oxygenOrSulfur = &*atom;
501         // Look first for a terminal O/S
502         if (!IsOxygenOrSulfur(oxygenOrSulfur) || oxygenOrSulfur->GetTotalDegree() != 1) continue;
503         OBAtomBondIter bitA(oxygenOrSulfur);
504         OBBond *bondA = &*bitA;
505         if (!bondA->IsAromatic()) continue;
506         // Look for the carbon
507         OBAtom *carbon = bondA->GetNbrAtom(oxygenOrSulfur);
508         if (carbon->GetAtomicNum() != 6) continue;
509         // Look for the other oxygen or sulfur
510         OBAtom* otherOxygenOrSulfur = nullptr;
511         OBBond* bondB = nullptr;
512         FOR_BONDS_OF_ATOM(bitB, carbon) {
513           if (&*bitB == bondA || !bitB->IsAromatic()) continue;
514           OBAtom* nbr = bitB->GetNbrAtom(carbon);
515           if (IsOxygenOrSulfur(nbr) && nbr->GetTotalDegree() == 1) {
516             otherOxygenOrSulfur = nbr;
517             bondB = &*bitB;
518           }
519         }
520         if (!otherOxygenOrSulfur) continue;
521         if(otherOxygenOrSulfur->GetFormalCharge() != 0) continue; //formal charge already set on one
522 
523         // Now set as C(=O)O
524         bondA->SetAromatic(false);
525         oxygenOrSulfur->SetFormalCharge(-1);
526 
527         bondB->SetAromatic(false);
528         bondB->SetBondOrder(2);
529       }
530 
531       // First of all, set the atoms at the ends of the aromatic bonds to also
532       // be aromatic. This information is required for OBKekulize.
533       FOR_BONDS_OF_MOL(bond, mol) {
534         if (bond->IsAromatic()) {
535           bond->GetBeginAtom()->SetAromatic();
536           bond->GetEndAtom()->SetAromatic();
537         }
538       }
539       bool ok = OBKekulize(&mol);
540       if (!ok) {
541         stringstream errorMsg;
542         errorMsg << "Failed to kekulize aromatic bonds in MOL2 file";
543         std::string title = mol.GetTitle();
544         if (!title.empty())
545           errorMsg << " (title is " << title << ")";
546         errorMsg << endl;
547         obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning);
548         // return false; Should we return false for a kekulization failure?
549       }
550     }
551 
552     mol.EndModify();
553 
554     // Suggestion by Liu Zhiguo 2008-01-26
555     // Mol2 files define atom types -- there is no need to re-perceive
556     mol.SetAtomTypesPerceived();
557 
558     if (has_residue_information)
559       mol.SetChainsPerceived();
560 
561     if (!has_explicit_hydrogen) {
562       // Guess how many hydrogens are present on each atom based on typical valencies
563       // TODO: implement the MOL2 valence model (if it exists)
564       FOR_ATOMS_OF_MOL(matom, mol) {
565         if (matom->GetImplicitHCount() == 0)
566           OBAtomAssignTypicalImplicitHydrogens(&*matom);
567       }
568     }
569 
570     //must add generic data after end modify - otherwise it will be blown away
571     if (comment)
572       {
573         OBCommentData *cd = new OBCommentData;
574         cd->SetData(comment);
575         cd->SetOrigin(fileformatInput);
576         mol.SetData(cd);
577         delete [] comment;
578         comment = nullptr;
579       }
580     if (hasPartialCharges)
581       mol.SetPartialChargesPerceived();
582 
583     /* Disabled due to PR#3048758 -- seekg is very slow with gzipped mol2
584     // continue untill EOF or untill next molecule record
585     streampos pos;
586     for(;;)
587       {
588         pos = ifs.tellg();
589         if (!ifs.getline(buffer,BUFF_SIZE))
590           break;
591         if (EQn(buffer,"@<TRIPOS>MOLECULE",17))
592           break;
593       }
594 
595     ifs.seekg(pos); // go back to the end of the molecule
596     */
597 
598     return(true);
599   }
600 
601   ////////////////////////////////////////////////////////////////
602 
WriteMolecule(OBBase * pOb,OBConversion * pConv)603   bool MOL2Format::WriteMolecule(OBBase* pOb, OBConversion* pConv)
604   {
605     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
606     if (pmol == nullptr)
607       return false;
608 
609     //Define some references so we can use the old parameter names
610     ostream &ofs = *pConv->GetOutStream();
611     OBMol &mol = *pmol;
612     bool ligandsOnly = pConv->IsOption("l", OBConversion::OUTOPTIONS) != nullptr;
613     bool skipFormalCharge = pConv->IsOption("u", OBConversion::OUTOPTIONS) != nullptr;
614 
615     //The old code follows....
616     string str,str1;
617     char buffer[BUFF_SIZE],label[BUFF_SIZE];
618     char rnum[BUFF_SIZE],rlabel[BUFF_SIZE];
619 
620     //Check if UCSF Dock style coments are on
621     if (pConv->IsOption("c", OBConversion::OUTOPTIONS) != nullptr) {
622         vector<OBGenericData*>::iterator k;
623         vector<OBGenericData*> vdata = mol.GetData();
624         ofs << endl;
625         for (k = vdata.begin();k != vdata.end();++k) {
626             if ((*k)->GetDataType() == OBGenericDataType::PairData
627             && (*k)->GetOrigin()!=local //internal OBPairData is not written
628             && (*k)->GetAttribute()!="PartialCharges")
629             {
630                 ofs << "##########\t" << (*k)->GetAttribute() << ":\t" << ((OBPairData*)(*k))->GetValue() << endl;
631             }
632         }
633         ofs << endl;
634     }
635 
636     ofs << "@<TRIPOS>MOLECULE" << endl;
637     str = mol.GetTitle();
638     if (str.empty())
639       ofs << "*****" << endl;
640     else
641       ofs << str << endl;
642 
643     snprintf(buffer, BUFF_SIZE," %d %d 0 0 0", mol.NumAtoms(),mol.NumBonds());
644     ofs << buffer << endl;
645     ofs << "SMALL" << endl; // TODO: detect if we have protein, biopolymer, etc.
646 
647     OBPairData *dp = (OBPairData*)mol.GetData("PartialCharges");
648     if (dp != nullptr) {
649         // Tripos spec says:
650         // NO_CHARGES, DEL_RE, GASTEIGER, GAST_HUCK, HUCKEL, PULLMAN,
651         // GAUSS80_CHARGES, AMPAC_CHARGES, MULLIKEN_CHARGES, DICT_ CHARGES,
652         // MMFF94_CHARGES, USER_CHARGES
653       if (strcasecmp(dp->GetValue().c_str(),"Mulliken") == 0)
654         ofs << "MULLIKEN_CHARGES" << endl;
655       else if (strcasecmp(dp->GetValue().c_str(),"MMFF94") == 0)
656         ofs << "MMFF94_CHARGES" << endl;
657       else if (strcasecmp(dp->GetValue().c_str(),"ESP") == 0)
658         ofs << "USER_CHARGES" << endl;
659       else if (strcasecmp(dp->GetValue().c_str(),"Gasteiger") == 0)
660         ofs << "GASTEIGER" << endl;
661       else // ideally, code should pick from the Tripos types
662         ofs << "USER_CHARGES" << endl;
663     }
664     else { // No idea what these charges are... all our code sets "PartialCharges"
665         ofs << "GASTEIGER" << endl;
666     }
667 
668     //    ofs << "Energy = " << mol.GetEnergy() << endl;
669 
670     if (mol.HasData(OBGenericDataType::CommentData))
671       {
672         ofs << "****\n"; // comment line printed, so we need to add "no status bits set"
673         OBCommentData *cd = (OBCommentData*)mol.GetData(OBGenericDataType::CommentData);
674         ofs << cd->GetData();
675       }
676 
677     ofs << endl;
678     ofs << "@<TRIPOS>ATOM" << endl;
679 
680     OBAtom *atom;
681     OBResidue *res;
682 
683     vector<OBAtom*>::iterator i;
684     std::map<int, int> labelcount;
685 
686     ttab.SetFromType("INT");
687     ttab.SetToType("SYB");
688 
689     bool hasFormalCharges = false;
690     for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
691       {
692 
693         //
694         //  Use sequentially numbered atom names if no residues
695         //
696 
697         snprintf(label,BUFF_SIZE, "%s%d",
698                  OBElements::GetSymbol(atom->GetAtomicNum()),
699                  ++labelcount[atom->GetAtomicNum()]);
700         strcpy(rlabel,"<1>");
701         strcpy(rnum,"1");
702 
703         str = atom->GetType();
704         ttab.Translate(str1,str);
705 
706         if (atom->GetFormalCharge() != 0) hasFormalCharges = true;
707         //
708         //  Use original atom names if there are residues
709         //
710 
711         if (!ligandsOnly && (res = atom->GetResidue()) )
712           {
713             // use original atom names defined by residue
714             snprintf(label,BUFF_SIZE,"%s",(char*)res->GetAtomID(atom).c_str());
715             // make sure that residue name includes its number
716             snprintf(rlabel,BUFF_SIZE,"%s%d",res->GetName().c_str(), res->GetNum());
717             snprintf(rnum,BUFF_SIZE,"%d",res->GetNum());
718           }
719 
720         snprintf(buffer,BUFF_SIZE,"%7d %-6s   %9.4f %9.4f %9.4f %-5s %3s  %-8s %9.4f",
721                  atom->GetIdx(),label,
722                  atom->GetX(),atom->GetY(),atom->GetZ(),
723                  str1.c_str(),
724                  rnum,rlabel,
725                  atom->GetPartialCharge());
726         ofs << buffer << endl;
727       }
728 
729     //store formal charge info; put before bonds so we don't have
730     //to read past the end of the molecule to realize it is there
731     if(hasFormalCharges && !skipFormalCharge) {
732       //dkoes - to enable roundtriping of charges
733       ofs << "@<TRIPOS>UNITY_ATOM_ATTR\n";
734       for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
735       {
736         int charge = atom->GetFormalCharge();
737         if (charge != 0)
738         {
739           ofs << atom->GetIdx() << " 1\n"; //one attribute
740           ofs << "charge " << charge << "\n"; //namely charge
741         }
742       }
743     }
744 
745     ofs << "@<TRIPOS>BOND" << endl;
746     OBBond *bond;
747     vector<OBBond*>::iterator j;
748     string s1, s2;
749     for (bond = mol.BeginBond(j);bond;bond = mol.NextBond(j))
750       {
751         s1 = bond->GetBeginAtom()->GetType();
752         s2 = bond->GetEndAtom()->GetType();
753         if (bond->IsAromatic() || s1 == "O.co2" || s2 == "O.co2")
754           strcpy(label,"ar");
755         else if (bond->IsAmide())
756           strcpy(label,"am");
757         else
758           snprintf(label,BUFF_SIZE,"%d",bond->GetBondOrder());
759 
760         snprintf(buffer, BUFF_SIZE,"%6d %5d %5d   %2s",
761                  bond->GetIdx()+1,bond->GetBeginAtomIdx(),bond->GetEndAtomIdx(),
762                  label);
763         ofs << buffer << endl;
764       }
765     // NO trailing blank line (PR#1868929).
766     //    ofs << endl;
767 
768     return(true);
769   }
770 
SkipObjects(int n,OBConversion * pConv)771   int MOL2Format::SkipObjects(int n, OBConversion* pConv)
772   {
773     const char txt[] = "@<TRIPOS>MOLECULE";
774     istream& ifs = *pConv->GetInStream();
775     if(!ifs)
776       return -1;
777     if(n>0 && ifs.peek()==txt[0])
778       ifs.ignore(); // move past '@' so that next mol will be found
779     do {
780       ignore(ifs, txt);
781     } while(ifs && (--n)>0);
782 
783     if(!ifs.eof())
784       ifs.seekg(1-sizeof(txt), ios::cur);//1 for '/0'
785     char ch = ifs.peek();
786    return 1;
787   }
788 
789 } //namespace OpenBabel
790