1 // $Id$
2 //
3 // Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #include <GraphMol/ROMol.h>
12 #include <GraphMol/Atom.h>
13 #include <GraphMol/Bond.h>
14 #include <GraphMol/BondIterators.h>
15 #include <GraphMol/MolOps.h>
16 #include <memory>
17 #include <boost/shared_array.hpp>
18 #include <algorithm>
19 #include <cstring>
20
21 namespace RDKit {
22
23 const int LOCAL_INF = (int)1e8;
24
25 // local utility namespace
26 namespace {
27 /* ----------------------------------------------
28
29 This implements the Floyd-Warshall all-pairs-shortest-paths
30 algorithm as described on pg 564 of the White Book (Cormen, Leiserson, Rivest)
31
32 Arguments:
33 adjMat: the adjacency matrix. This is overwritten with the shortest
34 paths, should be dim x dim
35 dim: the size of adjMat
36 pathMat: the path matrix, should be dim x dim
37
38 -----------------------------------------------*/
39 template <class T>
FloydWarshall(int dim,T * adjMat,int * pathMat)40 void FloydWarshall(int dim, T *adjMat, int *pathMat) {
41 int k, i, j;
42 T *currD, *lastD, *tTemp;
43 int *currP, *lastP, *iTemp;
44
45 currD = new T[dim * dim];
46 currP = new int[dim * dim];
47 lastD = new T[dim * dim];
48 lastP = new int[dim * dim];
49
50 memcpy(static_cast<void *>(lastD), static_cast<void *>(adjMat),
51 dim * dim * sizeof(T));
52
53 // initialize the paths
54 for (i = 0; i < dim; i++) {
55 int itab = i * dim;
56 for (j = 0; j < dim; j++) {
57 if (i == j || adjMat[itab + j] == LOCAL_INF) {
58 pathMat[itab + j] = -1;
59 } else {
60 pathMat[itab + j] = i;
61 }
62 }
63 }
64 memcpy(static_cast<void *>(lastP), static_cast<void *>(pathMat),
65 dim * dim * sizeof(int));
66
67 for (k = 0; k < dim; k++) {
68 int ktab = k * dim;
69 for (i = 0; i < dim; i++) {
70 int itab = i * dim;
71 for (j = 0; j < dim; j++) {
72 T v1 = lastD[itab + j];
73 T v2 = lastD[itab + k] + lastD[ktab + j];
74 if (v1 <= v2) {
75 currD[itab + j] = v1;
76 currP[itab + j] = lastP[itab + j];
77 } else {
78 currD[itab + j] = v2;
79 currP[itab + j] = lastP[ktab + j];
80 }
81 }
82 }
83 tTemp = currD;
84 currD = lastD;
85 lastD = tTemp;
86
87 iTemp = currP;
88 currP = lastP;
89 lastP = iTemp;
90 }
91 memcpy(static_cast<void *>(adjMat), static_cast<void *>(lastD),
92 dim * dim * sizeof(T));
93 memcpy(static_cast<void *>(pathMat), static_cast<void *>(lastP),
94 dim * dim * sizeof(int));
95
96 delete[] currD;
97 delete[] currP;
98 delete[] lastD;
99 delete[] lastP;
100 }
101
102 template <class T>
FloydWarshall(int dim,T * adjMat,int * pathMat,const std::vector<int> & activeAtoms)103 void FloydWarshall(int dim, T *adjMat, int *pathMat,
104 const std::vector<int> &activeAtoms) {
105 T *currD, *lastD, *tTemp;
106 int *currP, *lastP, *iTemp;
107
108 currD = new T[dim * dim];
109 currP = new int[dim * dim];
110 lastD = new T[dim * dim];
111 lastP = new int[dim * dim];
112
113 memcpy(static_cast<void *>(lastD), static_cast<void *>(adjMat),
114 dim * dim * sizeof(T));
115
116 // initialize the paths
117 for (auto ai : activeAtoms) {
118 int itab = ai * dim;
119 for (int activeAtom : activeAtoms) {
120 if (ai == activeAtom || adjMat[itab + activeAtom] == LOCAL_INF) {
121 pathMat[itab + activeAtom] = -1;
122 } else {
123 pathMat[itab + activeAtom] = ai;
124 }
125 }
126 }
127 memcpy(static_cast<void *>(lastP), static_cast<void *>(pathMat),
128 dim * dim * sizeof(int));
129
130 for (auto ak : activeAtoms) {
131 int ktab = ak * dim;
132 for (auto ai : activeAtoms) {
133 int itab = ai * dim;
134 for (int activeAtom : activeAtoms) {
135 T v1 = lastD[itab + activeAtom];
136 T v2 = lastD[itab + ak] + lastD[ktab + activeAtom];
137 if (v1 <= v2) {
138 currD[itab + activeAtom] = v1;
139 currP[itab + activeAtom] = lastP[itab + activeAtom];
140 } else {
141 currD[itab + activeAtom] = v2;
142 currP[itab + activeAtom] = lastP[ktab + activeAtom];
143 }
144 }
145 }
146 tTemp = currD;
147 currD = lastD;
148 lastD = tTemp;
149
150 iTemp = currP;
151 currP = lastP;
152 lastP = iTemp;
153 }
154 memcpy(static_cast<void *>(adjMat), static_cast<void *>(lastD),
155 dim * dim * sizeof(T));
156 memcpy(static_cast<void *>(pathMat), static_cast<void *>(lastP),
157 dim * dim * sizeof(int));
158
159 delete[] currD;
160 delete[] currP;
161 delete[] lastD;
162 delete[] lastP;
163 }
164 } // end of local utility namespace
165
166 namespace MolOps {
getDistanceMat(const ROMol & mol,bool useBO,bool useAtomWts,bool force,const char * propNamePrefix)167 double *getDistanceMat(const ROMol &mol, bool useBO, bool useAtomWts,
168 bool force, const char *propNamePrefix) {
169 std::string propName;
170 boost::shared_array<double> sptr;
171 if (propNamePrefix) {
172 propName = propNamePrefix;
173 } else {
174 propName = "";
175 }
176 propName += "DistanceMatrix";
177 // make sure we don't use the nonBO cache for the BO matrix and vice versa:
178 if (useBO) {
179 propName += "BO";
180 }
181 if (!force && mol.hasProp(propName)) {
182 mol.getProp(propName, sptr);
183 return sptr.get();
184 }
185 int nAts = mol.getNumAtoms();
186 auto *dMat = new double[nAts * nAts];
187 int i, j;
188 // initialize off diagonals to LOCAL_INF and diagonals to 0
189 for (i = 0; i < nAts * nAts; i++) {
190 dMat[i] = LOCAL_INF;
191 }
192 for (i = 0; i < nAts; i++) {
193 dMat[i * nAts + i] = 0.0;
194 }
195
196 ROMol::EDGE_ITER firstB, lastB;
197 boost::tie(firstB, lastB) = mol.getEdges();
198 while (firstB != lastB) {
199 const Bond* bond = mol[*firstB];
200 i = bond->getBeginAtomIdx();
201 j = bond->getEndAtomIdx();
202 double contrib;
203 if (useBO) {
204 if (!bond->getIsAromatic()) {
205 contrib = 1. / bond->getBondTypeAsDouble();
206 } else {
207 contrib = 2. / 3.;
208 }
209 } else {
210 contrib = 1.0;
211 }
212 dMat[i * nAts + j] = contrib;
213 dMat[j * nAts + i] = contrib;
214 ++firstB;
215 }
216
217 auto *pathMat = new int[nAts * nAts];
218 memset(static_cast<void *>(pathMat), 0, nAts * nAts * sizeof(int));
219 FloydWarshall(nAts, dMat, pathMat);
220
221 if (useAtomWts) {
222 for (i = 0; i < nAts; i++) {
223 int anum = mol.getAtomWithIdx(i)->getAtomicNum();
224 dMat[i * nAts + i] = 6.0 / anum;
225 }
226 }
227 sptr.reset(dMat);
228 mol.setProp(propName, sptr, true);
229 boost::shared_array<int> iSptr(pathMat);
230 mol.setProp(propName + "_Paths", iSptr, true);
231
232 return dMat;
233 };
234
getDistanceMat(const ROMol & mol,const std::vector<int> & activeAtoms,const std::vector<const Bond * > & bonds,bool useBO,bool useAtomWts)235 double *getDistanceMat(const ROMol &mol, const std::vector<int> &activeAtoms,
236 const std::vector<const Bond *> &bonds, bool useBO,
237 bool useAtomWts) {
238 const int nAts = rdcast<int>(activeAtoms.size());
239
240 auto *dMat = new double[nAts * nAts];
241 int i, j;
242 // initialize off diagonals to LOCAL_INF and diagonals to 0
243 for (i = 0; i < nAts * nAts; i++) {
244 dMat[i] = LOCAL_INF;
245 }
246 for (i = 0; i < nAts; i++) {
247 dMat[i * nAts + i] = 0.0;
248 }
249
250 for (auto bond : bonds) {
251 i = rdcast<int>(std::find(activeAtoms.begin(), activeAtoms.end(),
252 static_cast<int>(bond->getBeginAtomIdx())) -
253 activeAtoms.begin());
254 j = rdcast<int>(std::find(activeAtoms.begin(), activeAtoms.end(),
255 static_cast<int>(bond->getEndAtomIdx())) -
256 activeAtoms.begin());
257 double contrib;
258 if (useBO) {
259 if (!bond->getIsAromatic()) {
260 contrib = 1. / bond->getBondTypeAsDouble();
261 } else {
262 contrib = 2. / 3.;
263 }
264 } else {
265 contrib = 1.0;
266 }
267 dMat[i * nAts + j] = contrib;
268 dMat[j * nAts + i] = contrib;
269 }
270
271 auto *pathMat = new int[nAts * nAts];
272 memset(static_cast<void *>(pathMat), 0, nAts * nAts * sizeof(int));
273 FloydWarshall(nAts, dMat, pathMat);
274 delete[] pathMat;
275
276 if (useAtomWts) {
277 for (i = 0; i < nAts; i++) {
278 int anum = mol.getAtomWithIdx(activeAtoms[i])->getAtomicNum();
279 dMat[i * nAts + i] = 6.0 / anum;
280 }
281 }
282 return dMat;
283 };
284
285 // NOTE: do *not* delete results
getAdjacencyMatrix(const ROMol & mol,bool useBO,int emptyVal,bool force,const char * propNamePrefix,const boost::dynamic_bitset<> * bondsToUse)286 double *getAdjacencyMatrix(const ROMol &mol, bool useBO, int emptyVal,
287 bool force, const char *propNamePrefix,
288 const boost::dynamic_bitset<> *bondsToUse) {
289 std::string propName;
290 boost::shared_array<double> sptr;
291 if (propNamePrefix) {
292 propName = propNamePrefix;
293 } else {
294 propName = "";
295 }
296 propName += "AdjacencyMatrix";
297 if (!force && mol.hasProp(propName)) {
298 mol.getProp(propName, sptr);
299 return sptr.get();
300 }
301
302 int nAts = mol.getNumAtoms();
303 auto *res = new double[nAts * nAts];
304 memset(static_cast<void *>(res), emptyVal, nAts * nAts * sizeof(double));
305
306 for (ROMol::ConstBondIterator bondIt = mol.beginBonds();
307 bondIt != mol.endBonds(); bondIt++) {
308 if (bondsToUse && !(*bondsToUse)[(*bondIt)->getIdx()]) {
309 continue;
310 }
311 if (!useBO) {
312 int beg = (*bondIt)->getBeginAtomIdx();
313 int end = (*bondIt)->getEndAtomIdx();
314 res[beg * nAts + end] = 1;
315 res[end * nAts + beg] = 1;
316 } else {
317 int begIdx = (*bondIt)->getBeginAtomIdx();
318 int endIdx = (*bondIt)->getEndAtomIdx();
319 Atom const *beg = mol.getAtomWithIdx(begIdx);
320 Atom const *end = mol.getAtomWithIdx(endIdx);
321 res[begIdx * nAts + endIdx] = (*bondIt)->getValenceContrib(beg);
322 res[endIdx * nAts + begIdx] = (*bondIt)->getValenceContrib(end);
323 }
324 }
325 sptr.reset(res);
326 mol.setProp(propName, sptr, true);
327
328 return res;
329 };
330
getShortestPath(const ROMol & mol,int aid1,int aid2)331 INT_LIST getShortestPath(const ROMol &mol, int aid1, int aid2) {
332 int nats = mol.getNumAtoms();
333 RANGE_CHECK(0, aid1, nats - 1);
334 RANGE_CHECK(0, aid2, nats - 1);
335 CHECK_INVARIANT(aid1 != aid2, "");
336
337 INT_VECT pred(nats, -1); // set all atoms to unprocessed state
338 pred[aid1] = -2; // marks begin
339 pred[aid2] = -3; // marks end
340
341 std::deque<int> bfsQ;
342
343 bfsQ.push_back(aid1);
344 bool done = false;
345 ROMol::ADJ_ITER nbrIdx, endNbrs;
346 while ((!done) && (bfsQ.size() > 0)) {
347 int curAid = bfsQ.front();
348 boost::tie(nbrIdx, endNbrs) =
349 mol.getAtomNeighbors(mol.getAtomWithIdx(curAid));
350 while (!done && nbrIdx != endNbrs) {
351 switch (pred[*nbrIdx]) {
352 case -1:
353 pred[*nbrIdx] = curAid;
354 bfsQ.push_back(rdcast<int>(*nbrIdx));
355 break;
356 case -3: // end found
357 pred[*nbrIdx] = curAid;
358 done = true;
359 break;
360 default: // already processed (or begin)
361 break;
362 }
363 ++nbrIdx;
364 }
365 bfsQ.pop_front();
366 }
367
368 INT_LIST res;
369 if (done) {
370 done = false;
371 int prev = aid2;
372 res.push_back(aid2);
373 while (!done) {
374 prev = pred[prev];
375 if (prev != aid1) {
376 res.push_front(prev);
377 } else {
378 done = true;
379 }
380 }
381 res.push_front(aid1);
382 }
383 return res;
384 }
385
get3DDistanceMat(const ROMol & mol,int confId,bool useAtomWts,bool force,const char * propNamePrefix)386 double *get3DDistanceMat(const ROMol &mol, int confId, bool useAtomWts,
387 bool force, const char *propNamePrefix) {
388 const Conformer &conf = mol.getConformer(confId);
389 std::string propName;
390 boost::shared_array<double> sptr;
391 if (propNamePrefix) {
392 propName = propNamePrefix;
393 } else {
394 propName = "_";
395 }
396 if (propName != "") {
397 propName += "3DDistanceMatrix_Conf" + std::to_string(conf.getId());
398 if (!force && mol.hasProp(propName)) {
399 mol.getProp(propName, sptr);
400 return sptr.get();
401 }
402 }
403
404 unsigned int nAts = mol.getNumAtoms();
405 auto *dMat = new double[nAts * nAts];
406
407 for (unsigned int i = 0; i < nAts; ++i) {
408 if (useAtomWts) {
409 dMat[i * nAts + i] = 6.0 / mol.getAtomWithIdx(i)->getAtomicNum();
410 } else {
411 dMat[i * nAts + i] = 0.0;
412 }
413 for (unsigned int j = i + 1; j < nAts; ++j) {
414 double dist = (conf.getAtomPos(i) - conf.getAtomPos(j)).length();
415 dMat[i * nAts + j] = dist;
416 dMat[j * nAts + i] = dist;
417 }
418 }
419
420 if (propName != "") {
421 sptr.reset(dMat);
422 mol.setProp(propName, sptr, true);
423 }
424 return dMat;
425 }
426 } // end of namespace MolOps
427 } // end of namespace RDKit
428