1 /***********************************************************************
2 mpdformat.cpp - Write only format to produce descriptors of molecules
3 
4 Copyright (C) 2005 Nick England
5 
6 This file is part of the Open Babel project.
7 For more information, see <http://openbabel.org/>
8 
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation version 2 of the License.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 ***********************************************************************/
18 // Output format is #Origatomtype;#layer-#frequency-#atomtype;#l-#f-#aty;...<tab>Next atom<newline>next molecule
19 
20 #include <openbabel/babelconfig.h>
21 
22 #include <openbabel/obmolecformat.h>
23 #include <openbabel/mol.h>
24 #include <openbabel/atom.h>
25 #include <openbabel/elements.h>
26 #include <openbabel/data.h>
27 #include <cstdlib>
28 
29 
30 #define LAYER_DEPTH 2 // cannot increase past 2 without adding more *nbr atom pointers and loops
31 #define LAYER_SIZE 184 // number of types needed for types system used
32 #define SEP_0 ";"  // separator between types
33 #define SEP_1 "-"  // separator for data layer-freq-type
34 #define SEP_2 '\t' // separator for atoms
35 
36 using namespace std;
37 namespace OpenBabel
38 {
39   class MPDFormat : public OBMoleculeFormat
40   {
41   public:
42     //Register this format type ID
MPDFormat()43     MPDFormat()
44     {
45       OBConversion::RegisterFormat("mpd",this);
46       OBConversion::RegisterOptionParam("n", this);
47       OBConversion::RegisterOptionParam("c", this);
48       OBConversion::RegisterOptionParam("i", this);
49     }
50 
Description()51     virtual const char* Description() //required
52     {
53       return
54         "MolPrint2D format\n"
55         "An implementation of the circular fingerprint MolPrint2D\n"
56         "MolPrint2D is an atom-environment fingerprint developed by Bender et al [bmg2004]_\n"
57         "which has been used in QSAR studies and for measuring molecular similarity.\n\n"
58 
59         "The format of the output is as follows::\n\n"
60         "   [Molec_name]\\t[atomtype];[layer]-[frequency]-[neighbour_type];\n\n"
61         "Example for the SMILES string ``CC(=O)Cl``::\n\n"
62         "   acid chloride   1;1-1-2;2-1-9;2-1-15;   2;1-1-1;1-1-9;1-1-15;\n"
63         "                   9;1-1-2;2-1-1;2-1-15;   15;1-1-2;2-1-1;2-1-9;\n\n"
64 
65 ".. [bmg2004] Andreas Bender, Hamse Y. Mussa, and Robert C. Glen. **Molecular\n"
66 "             Similarity Searching Using Atom Environments, Information-Based\n"
67 "             Feature Selection, and a Naive Bayesian Classifier.**\n"
68 "             *J. Chem. Inf. Comput. Sci.* **2004**, *44*, 170-178.\n"
69 "             [`Link <https://doi.org/10.1021/ci034207y>`_]\n\n"
70 
71            " Write Options: e.g. -xnc\n"
72            "  n prefix molecule names with name of file \n"
73            "  c use XML style separators instead \n"
74            "  i use IDX atom types of babel internal \n\n";
75     };
76 
SpecificationURL()77     virtual const char* SpecificationURL()
78     {
79       return "https://doi.org/10.1021/ci034207y";
80     }; //optional
81 
82 
Flags()83     virtual unsigned int Flags() //Flags() can return be any the following combined by | or be omitted if none apply
84     {                            // NOTREADABLE  READONEONLY  NOTWRITABLE  WRITEONEONLY
85       return NOTREADABLE;
86     };
87 
88     //*** This section identical for most OBMol conversions ***
89     ////////////////////////////////////////////////////
90     /// The "API" interface functions
91     virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
92     void ClearLayer(int a[][LAYER_SIZE]);
93     void PrintLayer(int a[][LAYER_SIZE],ostream &ofs);
94     void PrintXML(int layer_a[][LAYER_SIZE],ostream &ofs);
95     int MyType(string a);
96   };
97   //***
98 
99   //Make an instance of the format class
100   MPDFormat theMPDFormat;
101 
ClearLayer(int layer_a[][LAYER_SIZE])102   void MPDFormat::ClearLayer(int layer_a[][LAYER_SIZE])
103   {
104     for(int n=0;n<LAYER_DEPTH;n++)
105       {
106         for(int m=0;m<LAYER_SIZE;m++)
107           {
108             layer_a[n][m]=0;
109           }
110       }
111   }
112 
PrintLayer(int layer_a[][LAYER_SIZE],ostream & ofs)113   void MPDFormat::PrintLayer(int layer_a[][LAYER_SIZE],ostream &ofs)
114   {
115     int freq=0;
116     for(int n=0;n<LAYER_DEPTH;n++)
117       {
118         for(int m=0;m<LAYER_SIZE;m++)
119           {
120             freq=layer_a[n][m];
121             if (freq == 0) continue;
122             ofs << n+1 << SEP_1 << freq << SEP_1 << m << SEP_0;
123             layer_a[n][m]=0;
124           }
125       }
126     ofs << SEP_2;
127   }
PrintXML(int layer_a[][LAYER_SIZE],ostream & ofs)128   void MPDFormat::PrintXML(int layer_a[][LAYER_SIZE],ostream &ofs)
129   {
130     int freq=0;
131     string outType;
132     for(int n=0;n<LAYER_DEPTH;n++)
133       {
134         for(int m=0;m<LAYER_SIZE;m++)
135           {
136             freq=layer_a[n][m];
137             if (freq == 0) continue;
138             ofs << "<layer depth=\"" << n+1 << "\" "
139                 << "frequency=\"" << freq <<"\" "<<"type=\""<< m <<"\"/>";
140             layer_a[n][m]=0;
141           }
142       }
143     ofs << "</atom>";
144   }
145   /*int MPDFormat::MyType(string a)
146     {
147     int o=0;
148     if (strcmp("C.3",a.c_str())==0) o=1;
149     else if(strcmp("C.2",a.c_str())==0) o=2;
150     else if(strcmp( "C.1",a.c_str())==0) o=4;
151     else if(strcmp( "C.ar",a.c_str())==0) o=3;
152     else if(strcmp( "C.cat",a.c_str())==0) o=33;
153     else if(strcmp( "N.3",a.c_str())==0) o=5;
154     else if(strcmp( "N.2",a.c_str())==0) o=6;
155     else if(strcmp( "N.1",a.c_str())==0) o=7;
156     else if(strcmp( "N.ar",a.c_str())==0) o=11;
157     else if(strcmp( "N.am",a.c_str())==0) o=28;
158     else if(strcmp( "N.pl3",a.c_str())==0) o=19;
159     else if(strcmp( "N.4",a.c_str())==0) o=31;
160     else if(strcmp( "O.3",a.c_str())==0) o=8;
161     else if(strcmp( "O.2",a.c_str())==0) o=9;
162     else if(strcmp( "O.co2",a.c_str())==0) o=32;
163     else if(strcmp( "O.spc",a.c_str())==0) o=8;
164     else if(strcmp( "O.t3p",a.c_str())==0) o=8;
165     else if(strcmp( "S.3",a.c_str())==0) o=10;
166     else if(strcmp( "S.2",a.c_str())==0) o=18;
167     else if(strcmp( "S.o",a.c_str())==0) o=29;
168     else if(strcmp( "S.o2",a.c_str())==0) o=30;
169     else if(strcmp( "P.3",a.c_str())==0) o=12;
170     else if(strcmp( "H",a.c_str())==0) o=13;
171     else if(strcmp( "H.spc",a.c_str())==0) o=13;
172     else if(strcmp( "H.t3p",a.c_str())==0) o=13;
173     else if(strcmp( "F",a.c_str())==0) o=16;
174     else if(strcmp( "Cl",a.c_str())==0) o=15;
175     else if(strcmp( "Br",a.c_str())==0) o=14;
176     else if(strcmp( "I",a.c_str())==0) o=17;
177     else if(strcmp( "Si",a.c_str())==0) o=27;
178     else if(strcmp( "LP",a.c_str())==0) o=20;
179     else if(strcmp( "Du",a.c_str())==0) o=26;
180     else if(strcmp( "Na",a.c_str())==0) o=21;
181     else if(strcmp( "K",a.c_str())==0) o=22;
182     else if(strcmp( "Ca",a.c_str())==0) o=23;
183     else if(strcmp( "Li",a.c_str())==0) o=24;
184     else if(strcmp( "Al",a.c_str())==0) o=25;
185     else o=26;
186     return (o);
187     }
188   */
189 
190 
191   ///////////////////////////////////////////////////
192   /* Now the Write molecule code */
193 
WriteMolecule(OBBase * pOb,OBConversion * pConv)194   bool MPDFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
195   {
196     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
197     if (pmol == nullptr)
198       return false;
199 
200 
201 
202     //Define some references so we can use the old parameter names
203     ostream &ofs = *pConv->GetOutStream();
204     OBMol &mol = *pmol;
205 
206     OBAtom *atom,*nbr,*nbr2; // define atom and neghbour atom pointers
207     string str,src,name;     // str used for output, src for handling
208     unsigned int orig,otyp;  // orig holds first index for removal from layer 2, otype for output
209     //    char buffer[BUFF_SIZE];
210     bool xml_true=false, pre_true=false, idx_true=false;
211     ttab.SetFromType("INT");
212     ttab.SetToType("SBN");
213     int layer[LAYER_DEPTH][LAYER_SIZE]; // layer stores the frequencies of each atom type
214     ClearLayer(layer);
215 
216 		if(pConv->IsOption("n")) // appending file name to molecule names
217       {
218         name = pConv->GetInFilename();     // string name holds the filename for appending
219         unsigned int dotpos=name.find(".");         // removes the extension(s) from the filename
220         if (dotpos < name.length())name.erase(dotpos);
221         pre_true = true;
222       }
223 
224 		if(pConv->IsOption("c")) // outputting in XML format
225 			xml_true=true;
226 
227 		if(pConv->IsOption("i")) // using IDX not SBN
228       {
229         idx_true=true;
230         ttab.SetToType("IDX");
231       }
232 
233     str = mol.GetTitle();
234     if(xml_true==true) // <xml>
235       {
236         ofs << "<molecule id=\"";
237         if(pre_true==true)ofs << name;
238         if (str.empty())
239           {
240             ofs << pConv->GetOutputIndex() << "\">";
241           }
242         else ofs << str << pConv->GetOutputIndex() << "\">";
243       } // </xml>
244     else{
245       if (str.empty())
246         {
247           if (pre_true==true) {ofs << name << SEP_1;}
248           ofs << "***" << pConv->GetOutputIndex()<< SEP_2;
249         }
250       else
251         { if (pre_true==true){ofs << name << SEP_1;}
252         ofs << str << SEP_2;
253         }
254     }
255     vector<OBAtom*>::iterator i; // iterate over all atoms
256     for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
257       {
258         src = atom->GetType();
259         ttab.Translate(str,src);
260         // if (idx_true==true){
261         otyp = atoi(str.c_str());
262         //}
263         //  else {otyp=MyType(str);}
264         orig = atom->GetIdx();
265         if(xml_true==true){ ofs << "<atom type=\"" << otyp << "\">";}
266         else ofs << otyp << SEP_0;
267 
268         vector<OBBond*>::iterator j; // iterate over its neighbours
269         for (nbr = atom->BeginNbrAtom(j);nbr;nbr = atom->NextNbrAtom(j))
270           {
271             src = nbr->GetType();
272             ttab.Translate(str,src);
273             // if (idx_true==true){
274             otyp = atoi(str.c_str());
275             //}
276             //  else {otyp=MyType(str);}
277             layer[0][otyp]=layer[0][otyp]+1;
278 
279             vector<OBBond*>::iterator k; // iterate again over neighbours
280             for (nbr2 = nbr->BeginNbrAtom(k);nbr2;nbr2 = nbr->NextNbrAtom(k))
281               {
282                 if (nbr2->GetIdx()==orig) continue;
283                 src = nbr2->GetType();
284                 ttab.Translate(str,src);
285                 // if (idx_true==true){
286                 otyp = atoi(str.c_str());
287                 //}
288                 //  else {otyp=MyType(str);}
289                 layer[1][otyp]=layer[1][otyp]+1;
290               } // end k
291           } // end j
292         if(xml_true==true)PrintXML(layer,ofs);
293         else PrintLayer(layer,ofs);
294         //ClearLayer(layer);
295       } // end i
296     if(xml_true==true)ofs << "</molecule>";
297     ofs << endl;
298     return(true);
299   } // writemolecule
300 
301 } // namespace openbabel
302