1 //
2 //  Copyright (C) 2013-2014 Paolo Tosco
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_O3AALIGNMOLECULES_H_
12 #define _RD_O3AALIGNMOLECULES_H_
13 
14 #include <RDGeneral/Invariant.h>
15 #include <Geometry/Transform3D.h>
16 #include <Geometry/point.h>
17 #include <Numerics/Vector.h>
18 #include <GraphMol/ROMol.h>
19 #include <GraphMol/Conformer.h>
20 #include <GraphMol/ForceFieldHelpers/MMFF/AtomTyper.h>
21 #include <GraphMol/MolAlign/AlignMolecules.h>
22 #include <vector>
23 #include <cmath>
24 #include <boost/shared_ptr.hpp>
25 #include <boost/multi_array.hpp>
26 #include <boost/dynamic_bitset.hpp>
27 
28 namespace RDKit {
29 namespace MolAlign {
30 struct RDKIT_MOLALIGN_EXPORT O3AFuncData {
31   const Conformer *prbConf;
32   const Conformer *refConf;
33   void *prbProp;
34   void *refProp;
35   int coeff;
36   int weight;
37   bool useMMFFSim;
38 };
isDoubleZero(const double x)39 inline bool isDoubleZero(const double x) {
40   return ((x < 1.0e-10) && (x > -1.0e-10));
41 };
42 
43 class O3AConstraintVect;
44 
45 //! A class to define alignment constraints. Each constraint
46 //! is defined by a pair of atom indexes (one for the probe,
47 //! one for the reference) and a weight. Constraints can
48 //! can be added via the O3AConstraintVect class.
49 class RDKIT_MOLALIGN_EXPORT O3AConstraint {
50   friend class O3AConstraintVect;
51 
52  public:
getPrbIdx()53   double getPrbIdx() { return d_prbIdx; }
getRefIdx()54   double getRefIdx() { return d_refIdx; }
getWeight()55   double getWeight() { return d_weight; }
56 
57  private:
58   unsigned int d_idx;
59   unsigned int d_prbIdx;
60   unsigned int d_refIdx;
61   double d_weight;
62 };
63 
64 //! A class to store a vector of alignment constraints. Each constraint
65 //! is defined by an O3AConstraint object. Each time the append()
66 //! method is invoked, the vector is sorted to make lookup faster.
67 //! Hence, constraints are not necessarily stored in the same order
68 //! they were appended.
69 class RDKIT_MOLALIGN_EXPORT O3AConstraintVect {
70  public:
O3AConstraintVect()71   O3AConstraintVect()  {};
~O3AConstraintVect()72   ~O3AConstraintVect(){};
append(unsigned int prbIdx,unsigned int refIdx,double weight)73   void append(unsigned int prbIdx, unsigned int refIdx, double weight) {
74     O3AConstraint *o3aConstraint = new O3AConstraint();
75     o3aConstraint->d_idx = d_count;
76     o3aConstraint->d_prbIdx = prbIdx;
77     o3aConstraint->d_refIdx = refIdx;
78     o3aConstraint->d_weight = weight;
79     d_o3aConstraintVect.emplace_back(o3aConstraint);
80     std::sort(d_o3aConstraintVect.begin(), d_o3aConstraintVect.end(),
81               d_compareO3AConstraint);
82     ++d_count;
83   }
size()84   std::vector<boost::shared_ptr<O3AConstraint>>::size_type size() {
85     return d_o3aConstraintVect.size();
86   }
87   O3AConstraint *operator[](unsigned int i) {
88     return d_o3aConstraintVect[i].get();
89   }
90 
91  private:
92   unsigned int d_count{0};
93   std::vector<boost::shared_ptr<O3AConstraint>> d_o3aConstraintVect;
d_compareO3AConstraint(boost::shared_ptr<O3AConstraint> a,boost::shared_ptr<O3AConstraint> b)94   static bool d_compareO3AConstraint(boost::shared_ptr<O3AConstraint> a,
95                                      boost::shared_ptr<O3AConstraint> b) {
96     return (
97         (a->d_prbIdx != b->d_prbIdx)
98             ? (a->d_prbIdx < b->d_prbIdx)
99             : ((a->d_refIdx != b->d_refIdx)
100                    ? (a->d_refIdx < b->d_refIdx)
101                    : ((a->d_weight != b->d_weight) ? (a->d_weight > b->d_weight)
102                                                    : (a->d_idx < b->d_idx))));
103   };
104 };
105 
106 const int O3_DUMMY_COST = 100000;
107 const int O3_LARGE_NEGATIVE_WEIGHT = -1e9;
108 const int O3_DEFAULT_CONSTRAINT_WEIGHT = 100.0;
109 const unsigned int O3_MAX_H_BINS = 20;
110 const unsigned int O3_MAX_SDM_ITERATIONS = 100;
111 const unsigned int O3_MAX_SDM_THRESHOLD_ITER = 3;
112 const double O3_RANDOM_TRANS_COEFF = 5.0;
113 const double O3_THRESHOLD_DIFF_DISTANCE = 0.1;
114 const double O3_SDM_THRESHOLD_START = 0.7;
115 const double O3_SDM_THRESHOLD_STEP = 0.3;
116 const double O3_CHARGE_WEIGHT = 10.0;
117 const double O3_CRIPPEN_WEIGHT = 10.0;
118 const double O3_RMSD_THRESHOLD = 1.0e-04;
119 const double O3_SCORE_THRESHOLD = 0.01;
120 const double O3_SCORING_FUNCTION_ALPHA = 5.0;
121 const double O3_SCORING_FUNCTION_BETA = 0.5;
122 const double O3_CHARGE_COEFF = 5.0;
123 const double O3_CRIPPEN_COEFF = 1.0;
124 const int O3_MAX_WEIGHT_COEFF = 5;
125 enum {
126   O3_USE_MMFF_WEIGHTS = (1 << 0),
127   O3_ACCURACY_MASK = (1 << 0 | 1 << 1),
128   O3_LOCAL_ONLY = (1 << 2)
129 };
130 
131 class RDKIT_MOLALIGN_EXPORT MolHistogram {
132  public:
133   MolHistogram(const ROMol &mol, const double *dmat, bool cleanupDmat = false);
~MolHistogram()134   ~MolHistogram(){};
get(const unsigned int y,const unsigned int x)135   inline int get(const unsigned int y, const unsigned int x) const {
136     PRECONDITION(y < d_h.shape()[0], "Invalid index on MolHistogram");
137     PRECONDITION(x < d_h.shape()[1], "Invalid index on MolHistogram");
138     return d_h[y][x];
139   }
140 
141  private:
142   boost::multi_array<int, 2> d_h;
143 };
144 
145 class RDKIT_MOLALIGN_EXPORT LAP {
146  public:
LAP(unsigned int dim)147   LAP(unsigned int dim)
148       : d_rowSol(dim),
149         d_colSol(dim),
150         d_free(dim),
151         d_colList(dim),
152         d_matches(dim),
153         d_d(dim),
154         d_v(dim),
155         d_pred(dim),
156         d_cost(boost::extents[dim][dim]){};
~LAP()157   ~LAP(){};
getCost(const unsigned int i,const unsigned int j)158   int getCost(const unsigned int i, const unsigned int j) {
159     PRECONDITION(i < d_cost.shape()[0], "Invalid index on LAP.cost");
160     PRECONDITION(j < d_cost.shape()[1], "Invalid index on LAP.cost");
161     return d_cost[i][j];
162   }
getRowSol(const unsigned int i)163   int getRowSol(const unsigned int i) {
164     PRECONDITION(i < d_rowSol.size(), "Invalid index on LAP.rowSol");
165     return d_rowSol[i];
166   }
167   void computeMinCostPath(const int dim);
168   void computeCostMatrix(const ROMol &prbMol, const MolHistogram &prbHist,
169                          const ROMol &refMol, const MolHistogram &refHist,
170                          O3AConstraintVect *o3aConstraintVect,
171                          int (*costFunc)(const unsigned int, const unsigned int,
172                                          double, void *),
173                          void *data, const unsigned int n_bins = O3_MAX_H_BINS);
174 
175  private:
176   std::vector<int> d_rowSol;
177   std::vector<int> d_colSol;
178   std::vector<int> d_free;
179   std::vector<int> d_colList;
180   std::vector<int> d_matches;
181   std::vector<int> d_d;
182   std::vector<int> d_v;
183   std::vector<int> d_pred;
184   boost::multi_array<int, 2> d_cost;
185 };
186 
187 class RDKIT_MOLALIGN_EXPORT SDM {
188  public:
189   // constructor
190   SDM(const Conformer *prbConf = nullptr, const Conformer *refConf = nullptr,
191       O3AConstraintVect *o3aConstraintVect = nullptr)
d_prbConf(prbConf)192       : d_prbConf(prbConf),
193         d_refConf(refConf),
194         d_o3aConstraintVect(o3aConstraintVect){};
195   // copy constructor
SDM(const SDM & other)196   SDM(const SDM &other)
197       : d_prbConf(other.d_prbConf),
198         d_refConf(other.d_refConf),
199         d_o3aConstraintVect(other.d_o3aConstraintVect),
200         d_SDMPtrVect(other.d_SDMPtrVect.size()) {
201     for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
202       d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
203       memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
204              sizeof(SDMElement));
205     }
206   };
207   // assignment operator
208   SDM &operator=(const SDM &other) {
209     if (this == &other) return *this;
210     d_prbConf = other.d_prbConf;
211     d_refConf = other.d_refConf;
212     d_o3aConstraintVect = other.d_o3aConstraintVect;
213     d_SDMPtrVect.resize(other.d_SDMPtrVect.size());
214     for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
215       d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
216       memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
217              sizeof(SDMElement));
218     }
219 
220     return *this;
221   };
222   // destructor
~SDM()223   ~SDM(){};
224   void fillFromDist(double threshold,
225                     const boost::dynamic_bitset<> &refHvyAtoms,
226                     const boost::dynamic_bitset<> &prbHvyAtoms);
227   void fillFromLAP(LAP &lap);
228   double scoreAlignment(double (*scoringFunc)(const unsigned int,
229                                               const unsigned int, void *),
230                         void *data);
231   void prepareMatchWeightsVect(RDKit::MatchVectType &matchVect,
232                                RDNumeric::DoubleVector &weights,
233                                double (*weightFunc)(const unsigned int,
234                                                     const unsigned int, void *),
235                                void *data);
size()236   unsigned int size() { return d_SDMPtrVect.size(); }
237 
238  private:
239   typedef struct SDMElement {
240     unsigned int idx[2];
241     int score;
242     int cost;
243     double sqDist;
244     O3AConstraint *o3aConstraint;
245   } SDMElement;
246   const Conformer *d_prbConf;
247   const Conformer *d_refConf;
248   O3AConstraintVect *d_o3aConstraintVect;
249   std::vector<boost::shared_ptr<SDMElement>> d_SDMPtrVect;
compareSDMScore(boost::shared_ptr<SDMElement> a,boost::shared_ptr<SDMElement> b)250   static bool compareSDMScore(boost::shared_ptr<SDMElement> a,
251                               boost::shared_ptr<SDMElement> b) {
252     return ((a->score != b->score)
253                 ? (a->score < b->score)
254                 : ((a->cost != b->cost)
255                        ? (a->cost < b->cost)
256                        : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
257                                                    : (a->idx[1] < b->idx[1]))));
258   };
compareSDMDist(boost::shared_ptr<SDMElement> a,boost::shared_ptr<SDMElement> b)259   static bool compareSDMDist(boost::shared_ptr<SDMElement> a,
260                              boost::shared_ptr<SDMElement> b) {
261     double aWeight = (a->o3aConstraint ? a->o3aConstraint->getWeight() : 0.0);
262     double bWeight = (b->o3aConstraint ? b->o3aConstraint->getWeight() : 0.0);
263     return ((aWeight != bWeight)
264                 ? (aWeight > bWeight)
265                 : ((a->sqDist != b->sqDist)
266                        ? (a->sqDist < b->sqDist)
267                        : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
268                                                    : (a->idx[1] < b->idx[1]))));
269   };
270 };
271 
272 class RDKIT_MOLALIGN_EXPORT O3A {
273  public:
274   //! pre-defined atom typing schemes
275   typedef enum { MMFF94 = 0, CRIPPEN } AtomTypeScheme;
276   O3A(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
277       AtomTypeScheme atomTypes = MMFF94, const int prbCid = -1,
278       const int refCid = -1, const bool reflect = false,
279       const unsigned int maxIters = 50, unsigned int options = 0,
280       const MatchVectType *constraintMap = nullptr,
281       const RDNumeric::DoubleVector *constraintWeights = nullptr,
282       LAP *extLAP = nullptr, MolHistogram *extPrbHist = nullptr,
283       MolHistogram *extRefHist = nullptr);
284   O3A(int (*costFunc)(const unsigned int, const unsigned int, double, void *),
285       double (*weightFunc)(const unsigned int, const unsigned int, void *),
286       double (*scoringFunc)(const unsigned int, const unsigned int, void *),
287       void *data, ROMol &prbMol, const ROMol &refMol, const int prbCid,
288       const int refCid, const boost::dynamic_bitset<> &prbHvyAtoms,
289       const boost::dynamic_bitset<> &refHvyAtoms, const bool reflect = false,
290       const unsigned int maxIters = 50, unsigned int options = 0,
291       O3AConstraintVect *o3aConstraintVect = nullptr,
292       ROMol *extWorkPrbMol = nullptr, LAP *extLAP = nullptr,
293       MolHistogram *extPrbHist = nullptr, MolHistogram *extRefHist = nullptr);
~O3A()294   ~O3A() {
295     if (d_o3aMatchVect) {
296       delete d_o3aMatchVect;
297     }
298     if (d_o3aWeights) {
299       delete d_o3aWeights;
300     }
301   };
302   double align();
303   double trans(RDGeom::Transform3D &trans);
score()304   double score() { return d_o3aScore; };
matches()305   const RDKit::MatchVectType *matches() { return d_o3aMatchVect; };
weights()306   const RDNumeric::DoubleVector *weights() { return d_o3aWeights; };
307 
308  private:
309   ROMol *d_prbMol;
310   const ROMol *d_refMol;
311   int d_prbCid;
312   int d_refCid;
313   bool d_reflect;
314   unsigned int d_maxIters;
315   const RDKit::MatchVectType *d_o3aMatchVect;
316   const RDNumeric::DoubleVector *d_o3aWeights;
317   double d_o3aScore;
318 };
319 
320 RDKIT_MOLALIGN_EXPORT void randomTransform(ROMol &mol, const int cid = -1,
321                                            const int seed = -1);
322 RDKIT_MOLALIGN_EXPORT const RDGeom::POINT3D_VECT *reflect(
323     const Conformer &conf);
324 RDKIT_MOLALIGN_EXPORT int o3aMMFFCostFunc(const unsigned int prbIdx,
325                                           const unsigned int refIdx,
326                                           double hSum, void *data);
327 RDKIT_MOLALIGN_EXPORT double o3aMMFFWeightFunc(const unsigned int prbIdx,
328                                                const unsigned int refIdx,
329                                                void *data);
330 RDKIT_MOLALIGN_EXPORT double o3aMMFFScoringFunc(const unsigned int prbIdx,
331                                                 const unsigned int refIdx,
332                                                 void *data);
333 RDKIT_MOLALIGN_EXPORT int o3aCrippenCostFunc(const unsigned int prbIdx,
334                                              const unsigned int refIdx,
335                                              double hSum, void *data);
336 RDKIT_MOLALIGN_EXPORT double o3aCrippenWeightFunc(const unsigned int prbIdx,
337                                                   const unsigned int refIdx,
338                                                   void *data);
339 RDKIT_MOLALIGN_EXPORT double o3aCrippenScoringFunc(const unsigned int prbIdx,
340                                                    const unsigned int refIdx,
341                                                    void *data);
342 
343 RDKIT_MOLALIGN_EXPORT void getO3AForProbeConfs(
344     ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
345     std::vector<boost::shared_ptr<O3A>> &res, int numThreads = 1,
346     O3A::AtomTypeScheme atomTypes = O3A::MMFF94, const int refCid = -1,
347     const bool reflect = false, const unsigned int maxIters = 50,
348     unsigned int options = 0, const MatchVectType *constraintMap = nullptr,
349     const RDNumeric::DoubleVector *constraintWeights = nullptr);
350 }  // namespace MolAlign
351 }  // namespace RDKit
352 #endif
353