1 //
2 // Copyright (C) 2013-2014 Paolo Tosco
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_O3AALIGNMOLECULES_H_
12 #define _RD_O3AALIGNMOLECULES_H_
13
14 #include <RDGeneral/Invariant.h>
15 #include <Geometry/Transform3D.h>
16 #include <Geometry/point.h>
17 #include <Numerics/Vector.h>
18 #include <GraphMol/ROMol.h>
19 #include <GraphMol/Conformer.h>
20 #include <GraphMol/ForceFieldHelpers/MMFF/AtomTyper.h>
21 #include <GraphMol/MolAlign/AlignMolecules.h>
22 #include <vector>
23 #include <cmath>
24 #include <boost/shared_ptr.hpp>
25 #include <boost/multi_array.hpp>
26 #include <boost/dynamic_bitset.hpp>
27
28 namespace RDKit {
29 namespace MolAlign {
30 struct RDKIT_MOLALIGN_EXPORT O3AFuncData {
31 const Conformer *prbConf;
32 const Conformer *refConf;
33 void *prbProp;
34 void *refProp;
35 int coeff;
36 int weight;
37 bool useMMFFSim;
38 };
isDoubleZero(const double x)39 inline bool isDoubleZero(const double x) {
40 return ((x < 1.0e-10) && (x > -1.0e-10));
41 };
42
43 class O3AConstraintVect;
44
45 //! A class to define alignment constraints. Each constraint
46 //! is defined by a pair of atom indexes (one for the probe,
47 //! one for the reference) and a weight. Constraints can
48 //! can be added via the O3AConstraintVect class.
49 class RDKIT_MOLALIGN_EXPORT O3AConstraint {
50 friend class O3AConstraintVect;
51
52 public:
getPrbIdx()53 double getPrbIdx() { return d_prbIdx; }
getRefIdx()54 double getRefIdx() { return d_refIdx; }
getWeight()55 double getWeight() { return d_weight; }
56
57 private:
58 unsigned int d_idx;
59 unsigned int d_prbIdx;
60 unsigned int d_refIdx;
61 double d_weight;
62 };
63
64 //! A class to store a vector of alignment constraints. Each constraint
65 //! is defined by an O3AConstraint object. Each time the append()
66 //! method is invoked, the vector is sorted to make lookup faster.
67 //! Hence, constraints are not necessarily stored in the same order
68 //! they were appended.
69 class RDKIT_MOLALIGN_EXPORT O3AConstraintVect {
70 public:
O3AConstraintVect()71 O3AConstraintVect() {};
~O3AConstraintVect()72 ~O3AConstraintVect(){};
append(unsigned int prbIdx,unsigned int refIdx,double weight)73 void append(unsigned int prbIdx, unsigned int refIdx, double weight) {
74 O3AConstraint *o3aConstraint = new O3AConstraint();
75 o3aConstraint->d_idx = d_count;
76 o3aConstraint->d_prbIdx = prbIdx;
77 o3aConstraint->d_refIdx = refIdx;
78 o3aConstraint->d_weight = weight;
79 d_o3aConstraintVect.emplace_back(o3aConstraint);
80 std::sort(d_o3aConstraintVect.begin(), d_o3aConstraintVect.end(),
81 d_compareO3AConstraint);
82 ++d_count;
83 }
size()84 std::vector<boost::shared_ptr<O3AConstraint>>::size_type size() {
85 return d_o3aConstraintVect.size();
86 }
87 O3AConstraint *operator[](unsigned int i) {
88 return d_o3aConstraintVect[i].get();
89 }
90
91 private:
92 unsigned int d_count{0};
93 std::vector<boost::shared_ptr<O3AConstraint>> d_o3aConstraintVect;
d_compareO3AConstraint(boost::shared_ptr<O3AConstraint> a,boost::shared_ptr<O3AConstraint> b)94 static bool d_compareO3AConstraint(boost::shared_ptr<O3AConstraint> a,
95 boost::shared_ptr<O3AConstraint> b) {
96 return (
97 (a->d_prbIdx != b->d_prbIdx)
98 ? (a->d_prbIdx < b->d_prbIdx)
99 : ((a->d_refIdx != b->d_refIdx)
100 ? (a->d_refIdx < b->d_refIdx)
101 : ((a->d_weight != b->d_weight) ? (a->d_weight > b->d_weight)
102 : (a->d_idx < b->d_idx))));
103 };
104 };
105
106 const int O3_DUMMY_COST = 100000;
107 const int O3_LARGE_NEGATIVE_WEIGHT = -1e9;
108 const int O3_DEFAULT_CONSTRAINT_WEIGHT = 100.0;
109 const unsigned int O3_MAX_H_BINS = 20;
110 const unsigned int O3_MAX_SDM_ITERATIONS = 100;
111 const unsigned int O3_MAX_SDM_THRESHOLD_ITER = 3;
112 const double O3_RANDOM_TRANS_COEFF = 5.0;
113 const double O3_THRESHOLD_DIFF_DISTANCE = 0.1;
114 const double O3_SDM_THRESHOLD_START = 0.7;
115 const double O3_SDM_THRESHOLD_STEP = 0.3;
116 const double O3_CHARGE_WEIGHT = 10.0;
117 const double O3_CRIPPEN_WEIGHT = 10.0;
118 const double O3_RMSD_THRESHOLD = 1.0e-04;
119 const double O3_SCORE_THRESHOLD = 0.01;
120 const double O3_SCORING_FUNCTION_ALPHA = 5.0;
121 const double O3_SCORING_FUNCTION_BETA = 0.5;
122 const double O3_CHARGE_COEFF = 5.0;
123 const double O3_CRIPPEN_COEFF = 1.0;
124 const int O3_MAX_WEIGHT_COEFF = 5;
125 enum {
126 O3_USE_MMFF_WEIGHTS = (1 << 0),
127 O3_ACCURACY_MASK = (1 << 0 | 1 << 1),
128 O3_LOCAL_ONLY = (1 << 2)
129 };
130
131 class RDKIT_MOLALIGN_EXPORT MolHistogram {
132 public:
133 MolHistogram(const ROMol &mol, const double *dmat, bool cleanupDmat = false);
~MolHistogram()134 ~MolHistogram(){};
get(const unsigned int y,const unsigned int x)135 inline int get(const unsigned int y, const unsigned int x) const {
136 PRECONDITION(y < d_h.shape()[0], "Invalid index on MolHistogram");
137 PRECONDITION(x < d_h.shape()[1], "Invalid index on MolHistogram");
138 return d_h[y][x];
139 }
140
141 private:
142 boost::multi_array<int, 2> d_h;
143 };
144
145 class RDKIT_MOLALIGN_EXPORT LAP {
146 public:
LAP(unsigned int dim)147 LAP(unsigned int dim)
148 : d_rowSol(dim),
149 d_colSol(dim),
150 d_free(dim),
151 d_colList(dim),
152 d_matches(dim),
153 d_d(dim),
154 d_v(dim),
155 d_pred(dim),
156 d_cost(boost::extents[dim][dim]){};
~LAP()157 ~LAP(){};
getCost(const unsigned int i,const unsigned int j)158 int getCost(const unsigned int i, const unsigned int j) {
159 PRECONDITION(i < d_cost.shape()[0], "Invalid index on LAP.cost");
160 PRECONDITION(j < d_cost.shape()[1], "Invalid index on LAP.cost");
161 return d_cost[i][j];
162 }
getRowSol(const unsigned int i)163 int getRowSol(const unsigned int i) {
164 PRECONDITION(i < d_rowSol.size(), "Invalid index on LAP.rowSol");
165 return d_rowSol[i];
166 }
167 void computeMinCostPath(const int dim);
168 void computeCostMatrix(const ROMol &prbMol, const MolHistogram &prbHist,
169 const ROMol &refMol, const MolHistogram &refHist,
170 O3AConstraintVect *o3aConstraintVect,
171 int (*costFunc)(const unsigned int, const unsigned int,
172 double, void *),
173 void *data, const unsigned int n_bins = O3_MAX_H_BINS);
174
175 private:
176 std::vector<int> d_rowSol;
177 std::vector<int> d_colSol;
178 std::vector<int> d_free;
179 std::vector<int> d_colList;
180 std::vector<int> d_matches;
181 std::vector<int> d_d;
182 std::vector<int> d_v;
183 std::vector<int> d_pred;
184 boost::multi_array<int, 2> d_cost;
185 };
186
187 class RDKIT_MOLALIGN_EXPORT SDM {
188 public:
189 // constructor
190 SDM(const Conformer *prbConf = nullptr, const Conformer *refConf = nullptr,
191 O3AConstraintVect *o3aConstraintVect = nullptr)
d_prbConf(prbConf)192 : d_prbConf(prbConf),
193 d_refConf(refConf),
194 d_o3aConstraintVect(o3aConstraintVect){};
195 // copy constructor
SDM(const SDM & other)196 SDM(const SDM &other)
197 : d_prbConf(other.d_prbConf),
198 d_refConf(other.d_refConf),
199 d_o3aConstraintVect(other.d_o3aConstraintVect),
200 d_SDMPtrVect(other.d_SDMPtrVect.size()) {
201 for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
202 d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
203 memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
204 sizeof(SDMElement));
205 }
206 };
207 // assignment operator
208 SDM &operator=(const SDM &other) {
209 if (this == &other) return *this;
210 d_prbConf = other.d_prbConf;
211 d_refConf = other.d_refConf;
212 d_o3aConstraintVect = other.d_o3aConstraintVect;
213 d_SDMPtrVect.resize(other.d_SDMPtrVect.size());
214 for (unsigned int i = 0; i < d_SDMPtrVect.size(); ++i) {
215 d_SDMPtrVect[i] = boost::shared_ptr<SDMElement>(new SDMElement());
216 memcpy(d_SDMPtrVect[i].get(), other.d_SDMPtrVect[i].get(),
217 sizeof(SDMElement));
218 }
219
220 return *this;
221 };
222 // destructor
~SDM()223 ~SDM(){};
224 void fillFromDist(double threshold,
225 const boost::dynamic_bitset<> &refHvyAtoms,
226 const boost::dynamic_bitset<> &prbHvyAtoms);
227 void fillFromLAP(LAP &lap);
228 double scoreAlignment(double (*scoringFunc)(const unsigned int,
229 const unsigned int, void *),
230 void *data);
231 void prepareMatchWeightsVect(RDKit::MatchVectType &matchVect,
232 RDNumeric::DoubleVector &weights,
233 double (*weightFunc)(const unsigned int,
234 const unsigned int, void *),
235 void *data);
size()236 unsigned int size() { return d_SDMPtrVect.size(); }
237
238 private:
239 typedef struct SDMElement {
240 unsigned int idx[2];
241 int score;
242 int cost;
243 double sqDist;
244 O3AConstraint *o3aConstraint;
245 } SDMElement;
246 const Conformer *d_prbConf;
247 const Conformer *d_refConf;
248 O3AConstraintVect *d_o3aConstraintVect;
249 std::vector<boost::shared_ptr<SDMElement>> d_SDMPtrVect;
compareSDMScore(boost::shared_ptr<SDMElement> a,boost::shared_ptr<SDMElement> b)250 static bool compareSDMScore(boost::shared_ptr<SDMElement> a,
251 boost::shared_ptr<SDMElement> b) {
252 return ((a->score != b->score)
253 ? (a->score < b->score)
254 : ((a->cost != b->cost)
255 ? (a->cost < b->cost)
256 : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
257 : (a->idx[1] < b->idx[1]))));
258 };
compareSDMDist(boost::shared_ptr<SDMElement> a,boost::shared_ptr<SDMElement> b)259 static bool compareSDMDist(boost::shared_ptr<SDMElement> a,
260 boost::shared_ptr<SDMElement> b) {
261 double aWeight = (a->o3aConstraint ? a->o3aConstraint->getWeight() : 0.0);
262 double bWeight = (b->o3aConstraint ? b->o3aConstraint->getWeight() : 0.0);
263 return ((aWeight != bWeight)
264 ? (aWeight > bWeight)
265 : ((a->sqDist != b->sqDist)
266 ? (a->sqDist < b->sqDist)
267 : ((a->idx[0] != b->idx[0]) ? (a->idx[0] < b->idx[0])
268 : (a->idx[1] < b->idx[1]))));
269 };
270 };
271
272 class RDKIT_MOLALIGN_EXPORT O3A {
273 public:
274 //! pre-defined atom typing schemes
275 typedef enum { MMFF94 = 0, CRIPPEN } AtomTypeScheme;
276 O3A(ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
277 AtomTypeScheme atomTypes = MMFF94, const int prbCid = -1,
278 const int refCid = -1, const bool reflect = false,
279 const unsigned int maxIters = 50, unsigned int options = 0,
280 const MatchVectType *constraintMap = nullptr,
281 const RDNumeric::DoubleVector *constraintWeights = nullptr,
282 LAP *extLAP = nullptr, MolHistogram *extPrbHist = nullptr,
283 MolHistogram *extRefHist = nullptr);
284 O3A(int (*costFunc)(const unsigned int, const unsigned int, double, void *),
285 double (*weightFunc)(const unsigned int, const unsigned int, void *),
286 double (*scoringFunc)(const unsigned int, const unsigned int, void *),
287 void *data, ROMol &prbMol, const ROMol &refMol, const int prbCid,
288 const int refCid, const boost::dynamic_bitset<> &prbHvyAtoms,
289 const boost::dynamic_bitset<> &refHvyAtoms, const bool reflect = false,
290 const unsigned int maxIters = 50, unsigned int options = 0,
291 O3AConstraintVect *o3aConstraintVect = nullptr,
292 ROMol *extWorkPrbMol = nullptr, LAP *extLAP = nullptr,
293 MolHistogram *extPrbHist = nullptr, MolHistogram *extRefHist = nullptr);
~O3A()294 ~O3A() {
295 if (d_o3aMatchVect) {
296 delete d_o3aMatchVect;
297 }
298 if (d_o3aWeights) {
299 delete d_o3aWeights;
300 }
301 };
302 double align();
303 double trans(RDGeom::Transform3D &trans);
score()304 double score() { return d_o3aScore; };
matches()305 const RDKit::MatchVectType *matches() { return d_o3aMatchVect; };
weights()306 const RDNumeric::DoubleVector *weights() { return d_o3aWeights; };
307
308 private:
309 ROMol *d_prbMol;
310 const ROMol *d_refMol;
311 int d_prbCid;
312 int d_refCid;
313 bool d_reflect;
314 unsigned int d_maxIters;
315 const RDKit::MatchVectType *d_o3aMatchVect;
316 const RDNumeric::DoubleVector *d_o3aWeights;
317 double d_o3aScore;
318 };
319
320 RDKIT_MOLALIGN_EXPORT void randomTransform(ROMol &mol, const int cid = -1,
321 const int seed = -1);
322 RDKIT_MOLALIGN_EXPORT const RDGeom::POINT3D_VECT *reflect(
323 const Conformer &conf);
324 RDKIT_MOLALIGN_EXPORT int o3aMMFFCostFunc(const unsigned int prbIdx,
325 const unsigned int refIdx,
326 double hSum, void *data);
327 RDKIT_MOLALIGN_EXPORT double o3aMMFFWeightFunc(const unsigned int prbIdx,
328 const unsigned int refIdx,
329 void *data);
330 RDKIT_MOLALIGN_EXPORT double o3aMMFFScoringFunc(const unsigned int prbIdx,
331 const unsigned int refIdx,
332 void *data);
333 RDKIT_MOLALIGN_EXPORT int o3aCrippenCostFunc(const unsigned int prbIdx,
334 const unsigned int refIdx,
335 double hSum, void *data);
336 RDKIT_MOLALIGN_EXPORT double o3aCrippenWeightFunc(const unsigned int prbIdx,
337 const unsigned int refIdx,
338 void *data);
339 RDKIT_MOLALIGN_EXPORT double o3aCrippenScoringFunc(const unsigned int prbIdx,
340 const unsigned int refIdx,
341 void *data);
342
343 RDKIT_MOLALIGN_EXPORT void getO3AForProbeConfs(
344 ROMol &prbMol, const ROMol &refMol, void *prbProp, void *refProp,
345 std::vector<boost::shared_ptr<O3A>> &res, int numThreads = 1,
346 O3A::AtomTypeScheme atomTypes = O3A::MMFF94, const int refCid = -1,
347 const bool reflect = false, const unsigned int maxIters = 50,
348 unsigned int options = 0, const MatchVectType *constraintMap = nullptr,
349 const RDNumeric::DoubleVector *constraintWeights = nullptr);
350 } // namespace MolAlign
351 } // namespace RDKit
352 #endif
353