1 /*********************************************************************** 2 mpdformat.cpp - Write only format to produce descriptors of molecules 3 4 Copyright (C) 2005 Nick England 5 6 This file is part of the Open Babel project. 7 For more information, see <http://openbabel.org/> 8 9 This program is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation version 2 of the License. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 ***********************************************************************/ 18 // Output format is #Origatomtype;#layer-#frequency-#atomtype;#l-#f-#aty;...<tab>Next atom<newline>next molecule 19 20 #include <openbabel/babelconfig.h> 21 22 #include <openbabel/obmolecformat.h> 23 #include <openbabel/mol.h> 24 #include <openbabel/atom.h> 25 #include <openbabel/elements.h> 26 #include <openbabel/data.h> 27 #include <cstdlib> 28 29 30 #define LAYER_DEPTH 2 // cannot increase past 2 without adding more *nbr atom pointers and loops 31 #define LAYER_SIZE 184 // number of types needed for types system used 32 #define SEP_0 ";" // separator between types 33 #define SEP_1 "-" // separator for data layer-freq-type 34 #define SEP_2 '\t' // separator for atoms 35 36 using namespace std; 37 namespace OpenBabel 38 { 39 class MPDFormat : public OBMoleculeFormat 40 { 41 public: 42 //Register this format type ID MPDFormat()43 MPDFormat() 44 { 45 OBConversion::RegisterFormat("mpd",this); 46 OBConversion::RegisterOptionParam("n", this); 47 OBConversion::RegisterOptionParam("c", this); 48 OBConversion::RegisterOptionParam("i", this); 49 } 50 Description()51 virtual const char* Description() //required 52 { 53 return 54 "MolPrint2D format\n" 55 "An implementation of the circular fingerprint MolPrint2D\n" 56 "MolPrint2D is an atom-environment fingerprint developed by Bender et al [bmg2004]_\n" 57 "which has been used in QSAR studies and for measuring molecular similarity.\n\n" 58 59 "The format of the output is as follows::\n\n" 60 " [Molec_name]\\t[atomtype];[layer]-[frequency]-[neighbour_type];\n\n" 61 "Example for the SMILES string ``CC(=O)Cl``::\n\n" 62 " acid chloride 1;1-1-2;2-1-9;2-1-15; 2;1-1-1;1-1-9;1-1-15;\n" 63 " 9;1-1-2;2-1-1;2-1-15; 15;1-1-2;2-1-1;2-1-9;\n\n" 64 65 ".. [bmg2004] Andreas Bender, Hamse Y. Mussa, and Robert C. Glen. **Molecular\n" 66 " Similarity Searching Using Atom Environments, Information-Based\n" 67 " Feature Selection, and a Naive Bayesian Classifier.**\n" 68 " *J. Chem. Inf. Comput. Sci.* **2004**, *44*, 170-178.\n" 69 " [`Link <https://doi.org/10.1021/ci034207y>`_]\n\n" 70 71 " Write Options: e.g. -xnc\n" 72 " n prefix molecule names with name of file \n" 73 " c use XML style separators instead \n" 74 " i use IDX atom types of babel internal \n\n"; 75 }; 76 SpecificationURL()77 virtual const char* SpecificationURL() 78 { 79 return "https://doi.org/10.1021/ci034207y"; 80 }; //optional 81 82 Flags()83 virtual unsigned int Flags() //Flags() can return be any the following combined by | or be omitted if none apply 84 { // NOTREADABLE READONEONLY NOTWRITABLE WRITEONEONLY 85 return NOTREADABLE; 86 }; 87 88 //*** This section identical for most OBMol conversions *** 89 //////////////////////////////////////////////////// 90 /// The "API" interface functions 91 virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); 92 void ClearLayer(int a[][LAYER_SIZE]); 93 void PrintLayer(int a[][LAYER_SIZE],ostream &ofs); 94 void PrintXML(int layer_a[][LAYER_SIZE],ostream &ofs); 95 int MyType(string a); 96 }; 97 //*** 98 99 //Make an instance of the format class 100 MPDFormat theMPDFormat; 101 ClearLayer(int layer_a[][LAYER_SIZE])102 void MPDFormat::ClearLayer(int layer_a[][LAYER_SIZE]) 103 { 104 for(int n=0;n<LAYER_DEPTH;n++) 105 { 106 for(int m=0;m<LAYER_SIZE;m++) 107 { 108 layer_a[n][m]=0; 109 } 110 } 111 } 112 PrintLayer(int layer_a[][LAYER_SIZE],ostream & ofs)113 void MPDFormat::PrintLayer(int layer_a[][LAYER_SIZE],ostream &ofs) 114 { 115 int freq=0; 116 for(int n=0;n<LAYER_DEPTH;n++) 117 { 118 for(int m=0;m<LAYER_SIZE;m++) 119 { 120 freq=layer_a[n][m]; 121 if (freq == 0) continue; 122 ofs << n+1 << SEP_1 << freq << SEP_1 << m << SEP_0; 123 layer_a[n][m]=0; 124 } 125 } 126 ofs << SEP_2; 127 } PrintXML(int layer_a[][LAYER_SIZE],ostream & ofs)128 void MPDFormat::PrintXML(int layer_a[][LAYER_SIZE],ostream &ofs) 129 { 130 int freq=0; 131 string outType; 132 for(int n=0;n<LAYER_DEPTH;n++) 133 { 134 for(int m=0;m<LAYER_SIZE;m++) 135 { 136 freq=layer_a[n][m]; 137 if (freq == 0) continue; 138 ofs << "<layer depth=\"" << n+1 << "\" " 139 << "frequency=\"" << freq <<"\" "<<"type=\""<< m <<"\"/>"; 140 layer_a[n][m]=0; 141 } 142 } 143 ofs << "</atom>"; 144 } 145 /*int MPDFormat::MyType(string a) 146 { 147 int o=0; 148 if (strcmp("C.3",a.c_str())==0) o=1; 149 else if(strcmp("C.2",a.c_str())==0) o=2; 150 else if(strcmp( "C.1",a.c_str())==0) o=4; 151 else if(strcmp( "C.ar",a.c_str())==0) o=3; 152 else if(strcmp( "C.cat",a.c_str())==0) o=33; 153 else if(strcmp( "N.3",a.c_str())==0) o=5; 154 else if(strcmp( "N.2",a.c_str())==0) o=6; 155 else if(strcmp( "N.1",a.c_str())==0) o=7; 156 else if(strcmp( "N.ar",a.c_str())==0) o=11; 157 else if(strcmp( "N.am",a.c_str())==0) o=28; 158 else if(strcmp( "N.pl3",a.c_str())==0) o=19; 159 else if(strcmp( "N.4",a.c_str())==0) o=31; 160 else if(strcmp( "O.3",a.c_str())==0) o=8; 161 else if(strcmp( "O.2",a.c_str())==0) o=9; 162 else if(strcmp( "O.co2",a.c_str())==0) o=32; 163 else if(strcmp( "O.spc",a.c_str())==0) o=8; 164 else if(strcmp( "O.t3p",a.c_str())==0) o=8; 165 else if(strcmp( "S.3",a.c_str())==0) o=10; 166 else if(strcmp( "S.2",a.c_str())==0) o=18; 167 else if(strcmp( "S.o",a.c_str())==0) o=29; 168 else if(strcmp( "S.o2",a.c_str())==0) o=30; 169 else if(strcmp( "P.3",a.c_str())==0) o=12; 170 else if(strcmp( "H",a.c_str())==0) o=13; 171 else if(strcmp( "H.spc",a.c_str())==0) o=13; 172 else if(strcmp( "H.t3p",a.c_str())==0) o=13; 173 else if(strcmp( "F",a.c_str())==0) o=16; 174 else if(strcmp( "Cl",a.c_str())==0) o=15; 175 else if(strcmp( "Br",a.c_str())==0) o=14; 176 else if(strcmp( "I",a.c_str())==0) o=17; 177 else if(strcmp( "Si",a.c_str())==0) o=27; 178 else if(strcmp( "LP",a.c_str())==0) o=20; 179 else if(strcmp( "Du",a.c_str())==0) o=26; 180 else if(strcmp( "Na",a.c_str())==0) o=21; 181 else if(strcmp( "K",a.c_str())==0) o=22; 182 else if(strcmp( "Ca",a.c_str())==0) o=23; 183 else if(strcmp( "Li",a.c_str())==0) o=24; 184 else if(strcmp( "Al",a.c_str())==0) o=25; 185 else o=26; 186 return (o); 187 } 188 */ 189 190 191 /////////////////////////////////////////////////// 192 /* Now the Write molecule code */ 193 WriteMolecule(OBBase * pOb,OBConversion * pConv)194 bool MPDFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) 195 { 196 OBMol* pmol = dynamic_cast<OBMol*>(pOb); 197 if (pmol == nullptr) 198 return false; 199 200 201 202 //Define some references so we can use the old parameter names 203 ostream &ofs = *pConv->GetOutStream(); 204 OBMol &mol = *pmol; 205 206 OBAtom *atom,*nbr,*nbr2; // define atom and neghbour atom pointers 207 string str,src,name; // str used for output, src for handling 208 unsigned int orig,otyp; // orig holds first index for removal from layer 2, otype for output 209 // char buffer[BUFF_SIZE]; 210 bool xml_true=false, pre_true=false, idx_true=false; 211 ttab.SetFromType("INT"); 212 ttab.SetToType("SBN"); 213 int layer[LAYER_DEPTH][LAYER_SIZE]; // layer stores the frequencies of each atom type 214 ClearLayer(layer); 215 216 if(pConv->IsOption("n")) // appending file name to molecule names 217 { 218 name = pConv->GetInFilename(); // string name holds the filename for appending 219 unsigned int dotpos=name.find("."); // removes the extension(s) from the filename 220 if (dotpos < name.length())name.erase(dotpos); 221 pre_true = true; 222 } 223 224 if(pConv->IsOption("c")) // outputting in XML format 225 xml_true=true; 226 227 if(pConv->IsOption("i")) // using IDX not SBN 228 { 229 idx_true=true; 230 ttab.SetToType("IDX"); 231 } 232 233 str = mol.GetTitle(); 234 if(xml_true==true) // <xml> 235 { 236 ofs << "<molecule id=\""; 237 if(pre_true==true)ofs << name; 238 if (str.empty()) 239 { 240 ofs << pConv->GetOutputIndex() << "\">"; 241 } 242 else ofs << str << pConv->GetOutputIndex() << "\">"; 243 } // </xml> 244 else{ 245 if (str.empty()) 246 { 247 if (pre_true==true) {ofs << name << SEP_1;} 248 ofs << "***" << pConv->GetOutputIndex()<< SEP_2; 249 } 250 else 251 { if (pre_true==true){ofs << name << SEP_1;} 252 ofs << str << SEP_2; 253 } 254 } 255 vector<OBAtom*>::iterator i; // iterate over all atoms 256 for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i)) 257 { 258 src = atom->GetType(); 259 ttab.Translate(str,src); 260 // if (idx_true==true){ 261 otyp = atoi(str.c_str()); 262 //} 263 // else {otyp=MyType(str);} 264 orig = atom->GetIdx(); 265 if(xml_true==true){ ofs << "<atom type=\"" << otyp << "\">";} 266 else ofs << otyp << SEP_0; 267 268 vector<OBBond*>::iterator j; // iterate over its neighbours 269 for (nbr = atom->BeginNbrAtom(j);nbr;nbr = atom->NextNbrAtom(j)) 270 { 271 src = nbr->GetType(); 272 ttab.Translate(str,src); 273 // if (idx_true==true){ 274 otyp = atoi(str.c_str()); 275 //} 276 // else {otyp=MyType(str);} 277 layer[0][otyp]=layer[0][otyp]+1; 278 279 vector<OBBond*>::iterator k; // iterate again over neighbours 280 for (nbr2 = nbr->BeginNbrAtom(k);nbr2;nbr2 = nbr->NextNbrAtom(k)) 281 { 282 if (nbr2->GetIdx()==orig) continue; 283 src = nbr2->GetType(); 284 ttab.Translate(str,src); 285 // if (idx_true==true){ 286 otyp = atoi(str.c_str()); 287 //} 288 // else {otyp=MyType(str);} 289 layer[1][otyp]=layer[1][otyp]+1; 290 } // end k 291 } // end j 292 if(xml_true==true)PrintXML(layer,ofs); 293 else PrintLayer(layer,ofs); 294 //ClearLayer(layer); 295 } // end i 296 if(xml_true==true)ofs << "</molecule>"; 297 ofs << endl; 298 return(true); 299 } // writemolecule 300 301 } // namespace openbabel 302