1 /**********************************************************************
2 obrms = Returns the rms between two chemically identical structures
3 Derived from obfit.
4 *
5 This file is part of the Open Babel project.
6 For more information, see <http://openbabel.org/>
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation version 2 of the License.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 ***********************************************************************/
17
18 /*
19 Require a fixed molecule and a set of molecules to compare against.
20 example of command line:
21 obrms ref.sdf test.sdf
22 */
23
24 #undef _GLIBCXX_DEBUG /* unless you have boost libraries built with this, you do _not_ want this defined */
25 // used to set import/export for Cygwin DLLs
26 #ifdef WIN32
27 #define USING_OBDLL
28 #endif
29 #include <cstdlib>
30 #include <openbabel/babelconfig.h>
31 #include <openbabel/mol.h>
32 #include <openbabel/parsmart.h>
33 #include <openbabel/obconversion.h>
34 #include <openbabel/query.h>
35 #include <openbabel/isomorphism.h>
36 #include <openbabel/shared_ptr.h>
37 #include <openbabel/obutil.h>
38
39 #include "getopt.h"
40
41 #include <sstream>
42
43 using namespace std;
44 using namespace OpenBabel;
45
46 class AtomDistanceSorter
47 {
48 vector3 ref;
49 public:
AtomDistanceSorter(OBAtom * r)50 AtomDistanceSorter(OBAtom *r) :
51 ref(r->GetVector())
52 {
53
54 }
operator ()(OBAtom * l,OBAtom * r) const55 bool operator()(OBAtom *l, OBAtom *r) const
56 {
57 double ld = ref.distSq(l->GetVector());
58 double rd = ref.distSq(r->GetVector());
59
60 return ld < rd;
61 }
62 };
63 /* class to perform graph matching between two molecules.
64 * Is initialized with the reference molecule.
65 * Will figure out the atom correspondences and compute the rmsd between the
66 * ref mol and a provided test mol.
67 */
68 class Matcher
69 {
70 const OBMol *ref;
71 obsharedptr<OBQuery> query;
72 obsharedptr<OBIsomorphismMapper> mapper;
73
74 class MapRMSDFunctor: public OBIsomorphismMapper::Functor
75 {
76 private:
77 const OBMol *ref;
78 OBMol& test;
79 double bestRMSD;
80 bool minimize;
81 public:
82 //will modify t if min is true
MapRMSDFunctor(const OBMol * r,OBMol & t,bool min=false)83 MapRMSDFunctor(const OBMol* r, OBMol& t, bool min = false) :
84 ref(r), test(t), bestRMSD(HUGE_VAL), minimize(min)
85 {
86 }
87
operator ()(OBIsomorphismMapper::Mapping & map)88 bool operator()(OBIsomorphismMapper::Mapping &map)
89 {
90 unsigned N = map.size();
91 double *refcoord = (double*)alloca(sizeof(double)*N * 3);
92 double *testcoord = (double*)alloca(sizeof(double)*N * 3);
93
94 for (unsigned i = 0; i < N; i++)
95 {
96 //obmol indices are 1-indexed while the mapper is zero indexed
97 const OBAtom *ratom = ref->GetAtom(map[i].first + 1);
98 const OBAtom *tatom = test.GetAtom(map[i].second + 1);
99 assert(ratom && tatom);
100
101 for (unsigned c = 0; c < 3; c++)
102 {
103 refcoord[3 * i + c] = ratom->GetVector()[c];
104 testcoord[3 * i + c] = tatom->GetVector()[c];
105 }
106 }
107
108 if (minimize)
109 {
110 double rmatrix[3][3] =
111 { 0 };
112
113 double rave[3] =
114 { 0, 0, 0 };
115 double tave[3] =
116 { 0, 0, 0 };
117 //center
118 for (unsigned i = 0; i < N; i++)
119 {
120 for (unsigned c = 0; c < 3; c++)
121 {
122 rave[c] += refcoord[3 * i + c];
123 tave[c] += testcoord[3 * i + c];
124 }
125 }
126
127 for (unsigned c = 0; c < 3; c++)
128 {
129 rave[c] /= N;
130 tave[c] /= N;
131 }
132
133 for (unsigned i = 0; i < N; i++)
134 {
135 for (unsigned c = 0; c < 3; c++)
136 {
137 refcoord[3 * i + c] -= rave[c];
138 testcoord[3 * i + c] -= tave[c];
139 }
140 }
141
142 qtrfit(refcoord, testcoord, N, rmatrix);
143 rotate_coords(testcoord, rmatrix, N);
144
145 for (unsigned i = 0; i < N; i++)
146 {
147 //with minimize on, change coordinates
148 OBAtom *tatom = test.GetAtom(map[i].second + 1);
149 tatom->SetVector(testcoord[3*i]+rave[0], testcoord[3*i+1]+rave[1], testcoord[3*i+2]+rave[2]);
150 }
151 }
152
153 double rmsd = calc_rms(refcoord, testcoord, N);
154
155 if (rmsd < bestRMSD)
156 bestRMSD = rmsd;
157 // check all possible mappings
158 return false;
159 }
160
getRMSD() const161 double getRMSD() const
162 {
163 return bestRMSD;
164 }
165 };
166
167 public:
Matcher(OBMol & mol)168 Matcher(OBMol& mol) : ref(&mol)
169 {
170 query = obsharedptr<OBQuery>(CompileMoleculeQuery(&mol));
171 mapper = obsharedptr<OBIsomorphismMapper>(OBIsomorphismMapper::GetInstance(query.get()));
172 }
173
174
175 //computes a correspondence between the ref mol and test (exhaustively)
176 //and returns the rmsd; returns infinity if unmatchable
computeRMSD(OBMol & test,bool minimize=false)177 double computeRMSD(OBMol& test, bool minimize = false)
178 {
179 MapRMSDFunctor funct(ref, test, minimize);
180
181 mapper->MapGeneric(funct, &test);
182 return funct.getRMSD();
183 }
184 };
185
186 //preprocess molecule into a standardized state for heavy atom rmsd computation
processMol(OBMol & mol)187 static void processMol(OBMol& mol)
188 {
189 //isomorphismmapper wants isomorphic atoms to have the same aromatic and ring state,
190 //but these proporties aren't reliable enough to be trusted in evaluating molecules
191 //should be considered the same based solely on connectivity
192
193 mol.DeleteHydrogens(); //heavy atom rmsd
194 for (OBAtomIterator aitr = mol.BeginAtoms(); aitr != mol.EndAtoms(); aitr++)
195 {
196 OBAtom *a = *aitr;
197 a->SetAromatic(false);
198 a->SetInRing();
199 }
200 for (OBBondIterator bitr = mol.BeginBonds(); bitr != mol.EndBonds(); bitr++)
201 {
202 OBBond *b = *bitr;
203 b->SetAromatic(false);
204 b->SetBondOrder(1);
205 b->SetInRing();
206 }
207 //avoid recomputations
208 mol.SetHybridizationPerceived();
209 mol.SetRingAtomsAndBondsPerceived();
210 mol.SetAromaticPerceived();
211 }
212 ///////////////////////////////////////////////////////////////////////////////
213 //! \brief compute rms between chemically identical molecules
main(int argc,char ** argv)214 int main(int argc, char **argv)
215 {
216 bool firstOnly = false;
217 bool minimize = false;
218 bool separate = false;
219 bool help = false;
220 bool docross = false;
221 string fileRef;
222 string fileTest;
223 string fileOut;
224
225 const char *helpmsg =
226 "obrms: Computes the heavy-atom RMSD of identical compound structures.\n"
227 "Usage: obrms reference_file [test_file]\n"
228 "Options:\n"
229 "\t -o, --out re-oriented test structure output\n"
230 "\t -f, --firstonly use only the first structure in the reference file\n"
231 "\t -m, --minimize compute minimum RMSD\n"
232 "\t -x, --cross compute all n^2 RMSDs between molecules of reference file\n"
233 "\t -s, --separate separate reference file into constituent molecules and report best RMSD\n"
234 "\t -h, --help help message\n";
235 struct option long_options[] = {
236 {"firstonly", no_argument, nullptr, 'f'},
237 {"minimize", no_argument, nullptr, 'm'},
238 {"cross", no_argument, nullptr, 'x'},
239 {"separate", no_argument, nullptr, 's'},
240 {"out", required_argument, nullptr, 'o'},
241 {"help", no_argument, nullptr, 'h'},
242 {nullptr, 0, nullptr, 0}
243 };
244 int option_index = 0;
245 int c = 0;
246 while ((c = getopt_long(argc, argv, "hfmxso:", long_options, &option_index) ) != -1) {
247 switch(c) {
248 case 'o':
249 fileOut = optarg;
250 break;
251 case 'f':
252 firstOnly = true;
253 break;
254 case 'm':
255 minimize = true;
256 break;
257 case 'x':
258 docross = true;
259 break;
260 case 's':
261 separate = true;
262 break;
263 case 'h':
264 cout << helpmsg;
265 exit(0);
266 break;
267 default:
268 cerr << "Unrecognized option: " << c << "\n";
269 exit(-1);
270 }
271 }
272
273 if(optind < argc) {
274 fileRef = argv[optind];
275 optind++;
276 }
277 if(optind < argc) {
278 fileTest = argv[optind];
279 optind++;
280 }
281
282 if(optind < argc) {
283 cerr << "Unrecognized argument: " << argv[optind];
284 exit(-1);
285 }
286
287 if(!docross && fileTest.size() == 0) {
288 cerr << helpmsg;
289 cerr << "Command line parse error: test file is required but missing\n";
290 exit(-1);
291 }
292
293 //open mols
294 OBConversion refconv(fileRef);
295
296 OBConversion outconv;
297 OBFormat *outFormat = outconv.FormatFromExt(fileOut);
298 if(fileOut.size() > 0)
299 {
300 if(!outFormat || !outconv.SetInAndOutFormats(outFormat, outFormat))
301 {
302 cerr << "Do not understand output format!" << endl;
303 exit(-1);
304 }
305 }
306
307 std::ofstream out;
308 if(fileOut.size() > 0)
309 {
310 out.open(fileOut.c_str());
311 }
312
313
314 //read reference
315 OBMol molref;
316
317 if(docross) {
318 //load in the entire reference file
319 vector<OBMol> refmols;
320 while (refconv.Read(&molref))
321 {
322 processMol(molref);
323 refmols.push_back(molref);
324 }
325
326 for(unsigned i = 0, n = refmols.size() ; i < n; i++) {
327 OBMol& ref = refmols[i];
328 Matcher matcher(ref);
329 cout << ref.GetTitle();
330 for(unsigned j = 0; j < n; j++) {
331 OBMol& moltest = refmols[j];
332 double rmsd = matcher.computeRMSD(moltest, minimize);
333 cout << ", " << rmsd;
334 }
335 cout << "\n";
336 }
337
338 } else {
339
340 //check comparison file
341 while (refconv.Read(&molref))
342 {
343 vector<OBMol> refmols;
344 if(separate) {
345 refmols = molref.Separate();
346 } else {
347 refmols.push_back(molref);
348 }
349
350 vector<Matcher> matchers;
351 for(unsigned i = 0, n = refmols.size(); i < n; i++) {
352 processMol(refmols[i]);
353 Matcher matcher(refmols[i]); // create the matcher
354 matchers.push_back(matcher);
355 }
356
357 OBConversion testconv(fileTest);
358 OBMol moltest;
359 while (testconv.Read(&moltest))
360 {
361 if (moltest.Empty())
362 break;
363
364 processMol(moltest);
365
366 double bestRMSD = HUGE_VAL;
367 for(unsigned i = 0, n = matchers.size(); i < n; i++) {
368 double rmsd = matchers[i].computeRMSD(moltest, minimize);
369 if(rmsd < bestRMSD) bestRMSD = rmsd;
370 }
371
372 cout << "RMSD " << molref.GetTitle() << ":" << moltest.GetTitle() << " " << bestRMSD << "\n";
373
374 if(out)
375 {
376 outconv.Write(&moltest, &out);
377 }
378 if (!firstOnly) //one test molecule will be read for each reference molecule
379 break;
380 }
381 if(firstOnly) //done with first reference mol
382 break;
383 }
384 }
385 return (0);
386 }
387