1 // $Id$
2 //
3 //  Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
4 //
5 //   @@ All Rights Reserved @@
6 //  This file is part of the RDKit.
7 //  The contents are covered by the terms of the BSD license
8 //  which is included in the file license.txt, found at the root
9 //  of the RDKit source tree.
10 //
11 #include <GraphMol/ROMol.h>
12 #include <GraphMol/Atom.h>
13 #include <GraphMol/Bond.h>
14 #include <GraphMol/BondIterators.h>
15 #include <GraphMol/MolOps.h>
16 #include <memory>
17 #include <boost/shared_array.hpp>
18 #include <algorithm>
19 #include <cstring>
20 
21 namespace RDKit {
22 
23 const int LOCAL_INF = (int)1e8;
24 
25 // local utility namespace
26 namespace {
27 /* ----------------------------------------------
28 
29 This implements the Floyd-Warshall all-pairs-shortest-paths
30 algorithm as described on pg 564 of the White Book (Cormen, Leiserson, Rivest)
31 
32 Arguments:
33 adjMat: the adjacency matrix.  This is overwritten with the shortest
34 paths, should be dim x dim
35 dim: the size of adjMat
36 pathMat: the path matrix, should be dim x dim
37 
38 -----------------------------------------------*/
39 template <class T>
FloydWarshall(int dim,T * adjMat,int * pathMat)40 void FloydWarshall(int dim, T *adjMat, int *pathMat) {
41   int k, i, j;
42   T *currD, *lastD, *tTemp;
43   int *currP, *lastP, *iTemp;
44 
45   currD = new T[dim * dim];
46   currP = new int[dim * dim];
47   lastD = new T[dim * dim];
48   lastP = new int[dim * dim];
49 
50   memcpy(static_cast<void *>(lastD), static_cast<void *>(adjMat),
51          dim * dim * sizeof(T));
52 
53   // initialize the paths
54   for (i = 0; i < dim; i++) {
55     int itab = i * dim;
56     for (j = 0; j < dim; j++) {
57       if (i == j || adjMat[itab + j] == LOCAL_INF) {
58         pathMat[itab + j] = -1;
59       } else {
60         pathMat[itab + j] = i;
61       }
62     }
63   }
64   memcpy(static_cast<void *>(lastP), static_cast<void *>(pathMat),
65          dim * dim * sizeof(int));
66 
67   for (k = 0; k < dim; k++) {
68     int ktab = k * dim;
69     for (i = 0; i < dim; i++) {
70       int itab = i * dim;
71       for (j = 0; j < dim; j++) {
72         T v1 = lastD[itab + j];
73         T v2 = lastD[itab + k] + lastD[ktab + j];
74         if (v1 <= v2) {
75           currD[itab + j] = v1;
76           currP[itab + j] = lastP[itab + j];
77         } else {
78           currD[itab + j] = v2;
79           currP[itab + j] = lastP[ktab + j];
80         }
81       }
82     }
83     tTemp = currD;
84     currD = lastD;
85     lastD = tTemp;
86 
87     iTemp = currP;
88     currP = lastP;
89     lastP = iTemp;
90   }
91   memcpy(static_cast<void *>(adjMat), static_cast<void *>(lastD),
92          dim * dim * sizeof(T));
93   memcpy(static_cast<void *>(pathMat), static_cast<void *>(lastP),
94          dim * dim * sizeof(int));
95 
96   delete[] currD;
97   delete[] currP;
98   delete[] lastD;
99   delete[] lastP;
100 }
101 
102 template <class T>
FloydWarshall(int dim,T * adjMat,int * pathMat,const std::vector<int> & activeAtoms)103 void FloydWarshall(int dim, T *adjMat, int *pathMat,
104                    const std::vector<int> &activeAtoms) {
105   T *currD, *lastD, *tTemp;
106   int *currP, *lastP, *iTemp;
107 
108   currD = new T[dim * dim];
109   currP = new int[dim * dim];
110   lastD = new T[dim * dim];
111   lastP = new int[dim * dim];
112 
113   memcpy(static_cast<void *>(lastD), static_cast<void *>(adjMat),
114          dim * dim * sizeof(T));
115 
116   // initialize the paths
117   for (auto ai : activeAtoms) {
118     int itab = ai * dim;
119     for (int activeAtom : activeAtoms) {
120       if (ai == activeAtom || adjMat[itab + activeAtom] == LOCAL_INF) {
121         pathMat[itab + activeAtom] = -1;
122       } else {
123         pathMat[itab + activeAtom] = ai;
124       }
125     }
126   }
127   memcpy(static_cast<void *>(lastP), static_cast<void *>(pathMat),
128          dim * dim * sizeof(int));
129 
130   for (auto ak : activeAtoms) {
131     int ktab = ak * dim;
132     for (auto ai : activeAtoms) {
133       int itab = ai * dim;
134       for (int activeAtom : activeAtoms) {
135         T v1 = lastD[itab + activeAtom];
136         T v2 = lastD[itab + ak] + lastD[ktab + activeAtom];
137         if (v1 <= v2) {
138           currD[itab + activeAtom] = v1;
139           currP[itab + activeAtom] = lastP[itab + activeAtom];
140         } else {
141           currD[itab + activeAtom] = v2;
142           currP[itab + activeAtom] = lastP[ktab + activeAtom];
143         }
144       }
145     }
146     tTemp = currD;
147     currD = lastD;
148     lastD = tTemp;
149 
150     iTemp = currP;
151     currP = lastP;
152     lastP = iTemp;
153   }
154   memcpy(static_cast<void *>(adjMat), static_cast<void *>(lastD),
155          dim * dim * sizeof(T));
156   memcpy(static_cast<void *>(pathMat), static_cast<void *>(lastP),
157          dim * dim * sizeof(int));
158 
159   delete[] currD;
160   delete[] currP;
161   delete[] lastD;
162   delete[] lastP;
163 }
164 }  // end of local utility namespace
165 
166 namespace MolOps {
getDistanceMat(const ROMol & mol,bool useBO,bool useAtomWts,bool force,const char * propNamePrefix)167 double *getDistanceMat(const ROMol &mol, bool useBO, bool useAtomWts,
168                        bool force, const char *propNamePrefix) {
169   std::string propName;
170   boost::shared_array<double> sptr;
171   if (propNamePrefix) {
172     propName = propNamePrefix;
173   } else {
174     propName = "";
175   }
176   propName += "DistanceMatrix";
177   // make sure we don't use the nonBO cache for the BO matrix and vice versa:
178   if (useBO) {
179     propName += "BO";
180   }
181   if (!force && mol.hasProp(propName)) {
182     mol.getProp(propName, sptr);
183     return sptr.get();
184   }
185   int nAts = mol.getNumAtoms();
186   auto *dMat = new double[nAts * nAts];
187   int i, j;
188   // initialize off diagonals to LOCAL_INF and diagonals to 0
189   for (i = 0; i < nAts * nAts; i++) {
190     dMat[i] = LOCAL_INF;
191   }
192   for (i = 0; i < nAts; i++) {
193     dMat[i * nAts + i] = 0.0;
194   }
195 
196   ROMol::EDGE_ITER firstB, lastB;
197   boost::tie(firstB, lastB) = mol.getEdges();
198   while (firstB != lastB) {
199     const Bond* bond = mol[*firstB];
200     i = bond->getBeginAtomIdx();
201     j = bond->getEndAtomIdx();
202     double contrib;
203     if (useBO) {
204       if (!bond->getIsAromatic()) {
205         contrib = 1. / bond->getBondTypeAsDouble();
206       } else {
207         contrib = 2. / 3.;
208       }
209     } else {
210       contrib = 1.0;
211     }
212     dMat[i * nAts + j] = contrib;
213     dMat[j * nAts + i] = contrib;
214     ++firstB;
215   }
216 
217   auto *pathMat = new int[nAts * nAts];
218   memset(static_cast<void *>(pathMat), 0, nAts * nAts * sizeof(int));
219   FloydWarshall(nAts, dMat, pathMat);
220 
221   if (useAtomWts) {
222     for (i = 0; i < nAts; i++) {
223       int anum = mol.getAtomWithIdx(i)->getAtomicNum();
224       dMat[i * nAts + i] = 6.0 / anum;
225     }
226   }
227   sptr.reset(dMat);
228   mol.setProp(propName, sptr, true);
229   boost::shared_array<int> iSptr(pathMat);
230   mol.setProp(propName + "_Paths", iSptr, true);
231 
232   return dMat;
233 };
234 
getDistanceMat(const ROMol & mol,const std::vector<int> & activeAtoms,const std::vector<const Bond * > & bonds,bool useBO,bool useAtomWts)235 double *getDistanceMat(const ROMol &mol, const std::vector<int> &activeAtoms,
236                        const std::vector<const Bond *> &bonds, bool useBO,
237                        bool useAtomWts) {
238   const int nAts = rdcast<int>(activeAtoms.size());
239 
240   auto *dMat = new double[nAts * nAts];
241   int i, j;
242   // initialize off diagonals to LOCAL_INF and diagonals to 0
243   for (i = 0; i < nAts * nAts; i++) {
244     dMat[i] = LOCAL_INF;
245   }
246   for (i = 0; i < nAts; i++) {
247     dMat[i * nAts + i] = 0.0;
248   }
249 
250   for (auto bond : bonds) {
251     i = rdcast<int>(std::find(activeAtoms.begin(), activeAtoms.end(),
252                               static_cast<int>(bond->getBeginAtomIdx())) -
253                     activeAtoms.begin());
254     j = rdcast<int>(std::find(activeAtoms.begin(), activeAtoms.end(),
255                               static_cast<int>(bond->getEndAtomIdx())) -
256                     activeAtoms.begin());
257     double contrib;
258     if (useBO) {
259       if (!bond->getIsAromatic()) {
260         contrib = 1. / bond->getBondTypeAsDouble();
261       } else {
262         contrib = 2. / 3.;
263       }
264     } else {
265       contrib = 1.0;
266     }
267     dMat[i * nAts + j] = contrib;
268     dMat[j * nAts + i] = contrib;
269   }
270 
271   auto *pathMat = new int[nAts * nAts];
272   memset(static_cast<void *>(pathMat), 0, nAts * nAts * sizeof(int));
273   FloydWarshall(nAts, dMat, pathMat);
274   delete[] pathMat;
275 
276   if (useAtomWts) {
277     for (i = 0; i < nAts; i++) {
278       int anum = mol.getAtomWithIdx(activeAtoms[i])->getAtomicNum();
279       dMat[i * nAts + i] = 6.0 / anum;
280     }
281   }
282   return dMat;
283 };
284 
285 // NOTE: do *not* delete results
getAdjacencyMatrix(const ROMol & mol,bool useBO,int emptyVal,bool force,const char * propNamePrefix,const boost::dynamic_bitset<> * bondsToUse)286 double *getAdjacencyMatrix(const ROMol &mol, bool useBO, int emptyVal,
287                            bool force, const char *propNamePrefix,
288                            const boost::dynamic_bitset<> *bondsToUse) {
289   std::string propName;
290   boost::shared_array<double> sptr;
291   if (propNamePrefix) {
292     propName = propNamePrefix;
293   } else {
294     propName = "";
295   }
296   propName += "AdjacencyMatrix";
297   if (!force && mol.hasProp(propName)) {
298     mol.getProp(propName, sptr);
299     return sptr.get();
300   }
301 
302   int nAts = mol.getNumAtoms();
303   auto *res = new double[nAts * nAts];
304   memset(static_cast<void *>(res), emptyVal, nAts * nAts * sizeof(double));
305 
306   for (ROMol::ConstBondIterator bondIt = mol.beginBonds();
307        bondIt != mol.endBonds(); bondIt++) {
308     if (bondsToUse && !(*bondsToUse)[(*bondIt)->getIdx()]) {
309       continue;
310     }
311     if (!useBO) {
312       int beg = (*bondIt)->getBeginAtomIdx();
313       int end = (*bondIt)->getEndAtomIdx();
314       res[beg * nAts + end] = 1;
315       res[end * nAts + beg] = 1;
316     } else {
317       int begIdx = (*bondIt)->getBeginAtomIdx();
318       int endIdx = (*bondIt)->getEndAtomIdx();
319       Atom const *beg = mol.getAtomWithIdx(begIdx);
320       Atom const *end = mol.getAtomWithIdx(endIdx);
321       res[begIdx * nAts + endIdx] = (*bondIt)->getValenceContrib(beg);
322       res[endIdx * nAts + begIdx] = (*bondIt)->getValenceContrib(end);
323     }
324   }
325   sptr.reset(res);
326   mol.setProp(propName, sptr, true);
327 
328   return res;
329 };
330 
getShortestPath(const ROMol & mol,int aid1,int aid2)331 INT_LIST getShortestPath(const ROMol &mol, int aid1, int aid2) {
332   int nats = mol.getNumAtoms();
333   RANGE_CHECK(0, aid1, nats - 1);
334   RANGE_CHECK(0, aid2, nats - 1);
335   CHECK_INVARIANT(aid1 != aid2, "");
336 
337   INT_VECT pred(nats, -1);  // set all atoms to unprocessed state
338   pred[aid1] = -2;          // marks begin
339   pred[aid2] = -3;          // marks end
340 
341   std::deque<int> bfsQ;
342 
343   bfsQ.push_back(aid1);
344   bool done = false;
345   ROMol::ADJ_ITER nbrIdx, endNbrs;
346   while ((!done) && (bfsQ.size() > 0)) {
347     int curAid = bfsQ.front();
348     boost::tie(nbrIdx, endNbrs) =
349         mol.getAtomNeighbors(mol.getAtomWithIdx(curAid));
350     while (!done && nbrIdx != endNbrs) {
351       switch (pred[*nbrIdx]) {
352         case -1:
353           pred[*nbrIdx] = curAid;
354           bfsQ.push_back(rdcast<int>(*nbrIdx));
355           break;
356         case -3:  // end found
357           pred[*nbrIdx] = curAid;
358           done = true;
359           break;
360         default:  // already processed (or begin)
361           break;
362       }
363       ++nbrIdx;
364     }
365     bfsQ.pop_front();
366   }
367 
368   INT_LIST res;
369   if (done) {
370     done = false;
371     int prev = aid2;
372     res.push_back(aid2);
373     while (!done) {
374       prev = pred[prev];
375       if (prev != aid1) {
376         res.push_front(prev);
377       } else {
378         done = true;
379       }
380     }
381     res.push_front(aid1);
382   }
383   return res;
384 }
385 
get3DDistanceMat(const ROMol & mol,int confId,bool useAtomWts,bool force,const char * propNamePrefix)386 double *get3DDistanceMat(const ROMol &mol, int confId, bool useAtomWts,
387                          bool force, const char *propNamePrefix) {
388   const Conformer &conf = mol.getConformer(confId);
389   std::string propName;
390   boost::shared_array<double> sptr;
391   if (propNamePrefix) {
392     propName = propNamePrefix;
393   } else {
394     propName = "_";
395   }
396   if (propName != "") {
397     propName += "3DDistanceMatrix_Conf" + std::to_string(conf.getId());
398     if (!force && mol.hasProp(propName)) {
399       mol.getProp(propName, sptr);
400       return sptr.get();
401     }
402   }
403 
404   unsigned int nAts = mol.getNumAtoms();
405   auto *dMat = new double[nAts * nAts];
406 
407   for (unsigned int i = 0; i < nAts; ++i) {
408     if (useAtomWts) {
409       dMat[i * nAts + i] = 6.0 / mol.getAtomWithIdx(i)->getAtomicNum();
410     } else {
411       dMat[i * nAts + i] = 0.0;
412     }
413     for (unsigned int j = i + 1; j < nAts; ++j) {
414       double dist = (conf.getAtomPos(i) - conf.getAtomPos(j)).length();
415       dMat[i * nAts + j] = dist;
416       dMat[j * nAts + i] = dist;
417     }
418   }
419 
420   if (propName != "") {
421     sptr.reset(dMat);
422     mol.setProp(propName, sptr, true);
423   }
424   return dMat;
425 }
426 }  // end of namespace MolOps
427 }  // end of namespace RDKit
428