1 /**********************************************************************
2  obrms = Returns the rms between two chemically identical structures
3  Derived from obfit.
4  *
5  This file is part of the Open Babel project.
6  For more information, see <http://openbabel.org/>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation version 2 of the License.
11 
12  This program is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  GNU General Public License for more details.
16  ***********************************************************************/
17 
18 /*
19  Require a fixed molecule and a set of molecules to compare against.
20  example of command line:
21  obrms ref.sdf test.sdf
22  */
23 
24 #undef _GLIBCXX_DEBUG /* unless you have boost libraries built with this, you do _not_ want this defined */
25 // used to set import/export for Cygwin DLLs
26 #ifdef WIN32
27 #define USING_OBDLL
28 #endif
29 #include <cstdlib>
30 #include <openbabel/babelconfig.h>
31 #include <openbabel/mol.h>
32 #include <openbabel/parsmart.h>
33 #include <openbabel/obconversion.h>
34 #include <openbabel/query.h>
35 #include <openbabel/isomorphism.h>
36 #include <openbabel/shared_ptr.h>
37 #include <openbabel/obutil.h>
38 
39 #include "getopt.h"
40 
41 #include <sstream>
42 
43 using namespace std;
44 using namespace OpenBabel;
45 
46 class AtomDistanceSorter
47 {
48 	vector3 ref;
49 	public:
AtomDistanceSorter(OBAtom * r)50 	AtomDistanceSorter(OBAtom *r) :
51 			ref(r->GetVector())
52 	{
53 
54 	}
operator ()(OBAtom * l,OBAtom * r) const55 	bool operator()(OBAtom *l, OBAtom *r) const
56 			{
57 		double ld = ref.distSq(l->GetVector());
58 		double rd = ref.distSq(r->GetVector());
59 
60 		return ld < rd;
61 	}
62 };
63 /* class to perform graph matching between two molecules.
64  * Is initialized with the reference molecule.
65  * Will figure out the atom correspondences and compute the rmsd between the
66  * ref mol and a provided test mol.
67  */
68 class Matcher
69 {
70 	const OBMol *ref;
71 	obsharedptr<OBQuery> query;
72 	obsharedptr<OBIsomorphismMapper> mapper;
73 
74 	class MapRMSDFunctor: public OBIsomorphismMapper::Functor
75 	{
76 	private:
77 		const OBMol *ref;
78 		OBMol& test;
79 		double bestRMSD;
80 		bool minimize;
81 		public:
82 		//will modify t if min is true
MapRMSDFunctor(const OBMol * r,OBMol & t,bool min=false)83 		MapRMSDFunctor(const OBMol* r, OBMol& t, bool min = false) :
84 				ref(r), test(t), bestRMSD(HUGE_VAL), minimize(min)
85 		{
86 		}
87 
operator ()(OBIsomorphismMapper::Mapping & map)88 		bool operator()(OBIsomorphismMapper::Mapping &map)
89 		{
90 			unsigned N = map.size();
91 			double *refcoord = (double*)alloca(sizeof(double)*N * 3);
92 			double *testcoord = (double*)alloca(sizeof(double)*N * 3);
93 
94 			for (unsigned i = 0; i < N; i++)
95 			{
96 				//obmol indices are 1-indexed while the mapper is zero indexed
97 				const OBAtom *ratom = ref->GetAtom(map[i].first + 1);
98 				const OBAtom *tatom = test.GetAtom(map[i].second + 1);
99 				assert(ratom && tatom);
100 
101 				for (unsigned c = 0; c < 3; c++)
102 				{
103 					refcoord[3 * i + c] = ratom->GetVector()[c];
104 					testcoord[3 * i + c] = tatom->GetVector()[c];
105 				}
106 			}
107 
108 			if (minimize)
109 			{
110 				double rmatrix[3][3] =
111 						{ 0 };
112 
113 				double rave[3] =
114 						{ 0, 0, 0 };
115 				double tave[3] =
116 						{ 0, 0, 0 };
117 				//center
118 				for (unsigned i = 0; i < N; i++)
119 				{
120 					for (unsigned c = 0; c < 3; c++)
121 					{
122 						rave[c] += refcoord[3 * i + c];
123 						tave[c] += testcoord[3 * i + c];
124 					}
125 				}
126 
127 				for (unsigned c = 0; c < 3; c++)
128 				{
129 					rave[c] /= N;
130 					tave[c] /= N;
131 				}
132 
133 				for (unsigned i = 0; i < N; i++)
134 				{
135 					for (unsigned c = 0; c < 3; c++)
136 					{
137 						refcoord[3 * i + c] -= rave[c];
138 						testcoord[3 * i + c] -= tave[c];
139 					}
140 				}
141 
142 				qtrfit(refcoord, testcoord, N, rmatrix);
143 				rotate_coords(testcoord, rmatrix, N);
144 
145 				for (unsigned i = 0; i < N; i++)
146 				{
147 					//with minimize on, change coordinates
148 					OBAtom *tatom = test.GetAtom(map[i].second + 1);
149 					tatom->SetVector(testcoord[3*i]+rave[0], testcoord[3*i+1]+rave[1], testcoord[3*i+2]+rave[2]);
150 				}
151 			}
152 
153 			double rmsd = calc_rms(refcoord, testcoord, N);
154 
155 			if (rmsd < bestRMSD)
156 				bestRMSD = rmsd;
157 			// check all possible mappings
158 			return false;
159 		}
160 
getRMSD() const161 		double getRMSD() const
162 		{
163 			return bestRMSD;
164 		}
165 	};
166 
167 public:
Matcher(OBMol & mol)168 	Matcher(OBMol& mol) : ref(&mol)
169 	{
170 		query = obsharedptr<OBQuery>(CompileMoleculeQuery(&mol));
171 		mapper = obsharedptr<OBIsomorphismMapper>(OBIsomorphismMapper::GetInstance(query.get()));
172 	}
173 
174 
175 	//computes a correspondence between the ref mol and test (exhaustively)
176 	//and returns the rmsd; returns infinity if unmatchable
computeRMSD(OBMol & test,bool minimize=false)177 	double computeRMSD(OBMol& test, bool minimize = false)
178 	{
179 		MapRMSDFunctor funct(ref, test, minimize);
180 
181 		mapper->MapGeneric(funct, &test);
182 		return funct.getRMSD();
183 	}
184 };
185 
186 //preprocess molecule into a standardized state for heavy atom rmsd computation
processMol(OBMol & mol)187 static void processMol(OBMol& mol)
188 {
189 	//isomorphismmapper wants isomorphic atoms to have the same aromatic and ring state,
190 	//but these proporties aren't reliable enough to be trusted in evaluating molecules
191 	//should be considered the same based solely on connectivity
192 
193 	mol.DeleteHydrogens(); //heavy atom rmsd
194 	for (OBAtomIterator aitr = mol.BeginAtoms(); aitr != mol.EndAtoms(); aitr++)
195 	{
196 		OBAtom *a = *aitr;
197 		a->SetAromatic(false);
198 		a->SetInRing();
199 	}
200 	for (OBBondIterator bitr = mol.BeginBonds(); bitr != mol.EndBonds(); bitr++)
201 	{
202 		OBBond *b = *bitr;
203 		b->SetAromatic(false);
204 		b->SetBondOrder(1);
205 		b->SetInRing();
206 	}
207 	//avoid recomputations
208 	mol.SetHybridizationPerceived();
209 	mol.SetRingAtomsAndBondsPerceived();
210 	mol.SetAromaticPerceived();
211 }
212 ///////////////////////////////////////////////////////////////////////////////
213 //! \brief compute rms between chemically identical molecules
main(int argc,char ** argv)214 int main(int argc, char **argv)
215 {
216 	bool firstOnly = false;
217 	bool minimize = false;
218 	bool separate = false;
219 	bool help = false;
220 	bool docross = false;
221 	string fileRef;
222 	string fileTest;
223 	string fileOut;
224 
225 	const char *helpmsg =
226 	 "obrms: Computes the heavy-atom RMSD of identical compound structures.\n"
227 	  "Usage: obrms reference_file [test_file]\n"
228 	  "Options:\n"
229     "\t -o, --out        re-oriented test structure output\n"
230 	  "\t -f, --firstonly  use only the first structure in the reference file\n"
231 	  "\t -m, --minimize   compute minimum RMSD\n"
232 	  "\t -x, --cross      compute all n^2 RMSDs between molecules of reference file\n"
233 	  "\t -s, --separate   separate reference file into constituent molecules and report best RMSD\n"
234 	  "\t -h, --help       help message\n";
235 	struct option long_options[] = {
236 	    {"firstonly", no_argument, nullptr, 'f'},
237 	    {"minimize", no_argument, nullptr, 'm'},
238 	    {"cross", no_argument, nullptr, 'x'},
239 	    {"separate", no_argument, nullptr, 's'},
240 	    {"out", required_argument, nullptr, 'o'},
241 	    {"help", no_argument, nullptr, 'h'},
242 	    {nullptr, 0, nullptr, 0}
243 	};
244 	int option_index = 0;
245 	int c = 0;
246 	while ((c = getopt_long(argc, argv, "hfmxso:", long_options, &option_index) ) != -1) {
247 	  switch(c) {
248 	    case 'o':
249 	      fileOut = optarg;
250 	      break;
251 	    case 'f':
252 	      firstOnly = true;
253 	      break;
254 	    case 'm':
255 	      minimize = true;
256 	      break;
257 	    case 'x':
258 	      docross = true;
259 	      break;
260 	    case 's':
261 	      separate = true;
262 	      break;
263 	    case 'h':
264 	      cout << helpmsg;
265 	      exit(0);
266 	      break;
267 	    default:
268 	      cerr << "Unrecognized option: " << c << "\n";
269 	      exit(-1);
270 	  }
271 	}
272 
273 	if(optind < argc) {
274 	  fileRef = argv[optind];
275 	  optind++;
276 	}
277 	if(optind < argc) {
278 	  fileTest = argv[optind];
279 	  optind++;
280 	}
281 
282 	if(optind < argc) {
283 	  cerr << "Unrecognized argument: " << argv[optind];
284 	  exit(-1);
285 	}
286 
287 	if(!docross && fileTest.size() == 0) {
288     cerr << helpmsg;
289 	  cerr << "Command line parse error: test file is required but missing\n";
290 	  exit(-1);
291 	}
292 
293 	//open mols
294 	OBConversion refconv(fileRef);
295 
296 	OBConversion outconv;
297 	OBFormat *outFormat = outconv.FormatFromExt(fileOut);
298 	if(fileOut.size() > 0)
299 	{
300 		if(!outFormat || !outconv.SetInAndOutFormats(outFormat, outFormat))
301 		{
302 			cerr << "Do not understand output format!" << endl;
303 			exit(-1);
304 		}
305 	}
306 
307 	std::ofstream out;
308 	if(fileOut.size() > 0)
309 	{
310 		out.open(fileOut.c_str());
311 	}
312 
313 
314   //read reference
315   OBMol molref;
316 
317 	if(docross) {
318 	  //load in the entire reference file
319     vector<OBMol> refmols;
320     while (refconv.Read(&molref))
321     {
322        processMol(molref);
323        refmols.push_back(molref);
324     }
325 
326     for(unsigned i = 0, n = refmols.size() ; i < n; i++) {
327       OBMol& ref = refmols[i];
328       Matcher matcher(ref);
329       cout << ref.GetTitle();
330       for(unsigned j = 0; j < n; j++) {
331         OBMol& moltest = refmols[j];
332         double rmsd = matcher.computeRMSD(moltest, minimize);
333         cout << ", " << rmsd;
334       }
335       cout << "\n";
336     }
337 
338 	} else {
339 
340 	  //check comparison file
341     while (refconv.Read(&molref))
342     {
343       vector<OBMol> refmols;
344       if(separate) {
345         refmols = molref.Separate();
346       } else {
347         refmols.push_back(molref);
348       }
349 
350       vector<Matcher> matchers;
351       for(unsigned i = 0, n = refmols.size(); i < n; i++) {
352         processMol(refmols[i]);
353         Matcher matcher(refmols[i]); // create the matcher
354         matchers.push_back(matcher);
355       }
356 
357       OBConversion testconv(fileTest);
358       OBMol moltest;
359       while (testconv.Read(&moltest))
360       {
361         if (moltest.Empty())
362           break;
363 
364         processMol(moltest);
365 
366         double bestRMSD = HUGE_VAL;
367         for(unsigned i = 0, n = matchers.size(); i < n; i++) {
368           double rmsd = matchers[i].computeRMSD(moltest, minimize);
369           if(rmsd < bestRMSD) bestRMSD = rmsd;
370         }
371 
372         cout << "RMSD " << molref.GetTitle() << ":" <<  moltest.GetTitle() << " " << bestRMSD << "\n";
373 
374         if(out)
375         {
376           outconv.Write(&moltest, &out);
377         }
378         if (!firstOnly) //one test molecule will be read for each reference molecule
379           break;
380       }
381       if(firstOnly) //done with first reference mol
382         break;
383     }
384 	}
385 	return (0);
386 }
387