1 /**********************************************************************
2 conformersearch.h - Conformer searching using genetic algorithm.
3 
4 Copyright (C) 2010 Tim Vandermeersch
5 Some portions Copyright (C) 2012 Materials Design, Inc.
6 Some portions Copyright (C) 2016 Torsten Sachse
7 
8 This file is part of the Open Babel project.
9 For more information, see <http://openbabel.org/>
10 
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation version 2 of the License.
14 
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 GNU General Public License for more details.
19 ***********************************************************************/
20 
21 #ifndef OB_CONFORMERSEARCH_H
22 #define OB_CONFORMERSEARCH_H
23 
24 #include <openbabel/mol.h>
25 #include <openbabel/rotor.h>
26 #include <openbabel/rotamer.h>
27 
28 namespace OpenBabel {
29 
30   typedef std::vector<int> RotorKey;
31   typedef std::vector<RotorKey> RotorKeys;
32   typedef std::map<std::vector<int>,double> mapRotorEnergy;
33 
34   ///@addtogroup conformer Conformer Searching
35   ///@{
36 
37   //////////////////////////////////////////////////////////
38   //
39   //  OBConformerFilter(s)
40   //
41   //////////////////////////////////////////////////////////
42 
43   /**
44    * @class OBConformerFilter conformersearch.h <openbabel/conformersearch.h>
45    * @brief Interface used by OBConformerSearch for filtering conformers.
46    *
47    * The OBConformerFilter class defines an interface used by the OBConformerSearch
48    * class. A conformer filter is used as a first selection method when generating
49    * conformers. A typical example is a steric filter to ensure atoms are never
50    * closer than some specified distance.
51    *
52    * @see OBConformerSearch OBConformerFilters
53    * @since 2.3
54    */
55   class OBAPI OBConformerFilter
56   {
57     public:
58       /**
59        * Check if a conformer is good (i.e. passes the filter). The molecule, rotor
60        * key and coords are passed as parameters to this function and can be used
61        * for checking the confoormer.
62        * @return True if the conformer passes the filter.
63        */
64       virtual bool IsGood(const OBMol &mol, const RotorKey &key, double *coords) = 0;
65       virtual ~OBConformerFilter() = 0;
66   };
67 
68   /**
69    * @class OBConformerFilters conformersearch.h <openbabel/conformersearch.h>
70    * @brief Class for combining OBConformerFilter objects.
71    *
72    * The OBConformerFilters class makes it easy to combine OBConformerFilter
73    * objects. A list of OBConformerFilter objects is specified when constructing
74    * this class. The IsGood implementation simply checks all the specified
75    * filters and returns false if any of the filters returns false.
76    *
77    * @see OBConformerSearch OBConformerFilter
78    * @since 2.3
79    */
80   class OBAPI OBConformerFilters : public OBConformerFilter
81   {
82     public:
83       /**
84        * Constructor specifiying the filters to combine.
85        */
OBConformerFilters(const std::vector<OBConformerFilter * > & filters)86       OBConformerFilters(const std::vector<OBConformerFilter*> &filters) : m_filters(filters)
87       {
88       }
89       /**
90        * IsGood reimplementation.
91        * @return True if all filters pass.
92        */
IsGood(const OBMol & mol,const RotorKey & key,double * coords)93       bool IsGood(const OBMol &mol, const RotorKey &key, double *coords)
94       {
95         for (unsigned int i = 0; i < m_filters.size(); ++i)
96           if (!m_filters[i]->IsGood(mol, key, coords))
97             return false;
98         return true;
99       }
100     protected:
101       std::vector<OBConformerFilter*> m_filters;
102   };
103 
104   /**
105    * @class OBStericConformerFilter conformersearch.h <openbabel/conformersearch.h>
106    * @brief A steric conformer filter class.
107    *
108    * The OBStericConformerFilter class is the default filter used by OBConformerSearch.
109    * The filter removes all molecules which have at least 2 atoms closer together than
110    * the specified distance.
111    * @since 2.3
112    */
113   class OBAPI OBStericConformerFilter : public OBConformerFilter
114   {
115     public:
116       OBStericConformerFilter ();
117       OBStericConformerFilter (double cutoff, double vdw_factor = 0.5, bool check_hydrogens = true);
118       bool IsGood(const OBMol &mol, const RotorKey &key, double *coords);
119     private:
120       double m_cutoff; //!< Internal cutoff (used as a squared distance)
121       double m_vdw_factor;		//!< Factor applied to Van der Waals distance check
122       bool m_check_hydrogens;		//!< Check hydrOgens if true
123   };
124 
125   //////////////////////////////////////////////////////////
126   //
127   //  OBConformerScore(s)
128   //
129   //////////////////////////////////////////////////////////
130 
131   /**
132    * @class OBConformerScore conformersearch.h <openbabel/conformersearch.h>
133    * @brief Interface used by OBConformerSearch for scoring conformers.
134    *
135    * The OBConformerScore class defines an interface to assign scores to conformers.
136    * This class is used by OBConformerSearch to make the class flexible. A higher
137    * score means the conformer with index is more favourable. A typical example is
138    * the force field energy to find the lowest energy conformer (note: the energy is
139    * not directly usable as score since lower values are better). Another example
140    * is to use some measure of diversity (e.g. RMSD) to generate a diverse set of
141    * conformers.
142    * @since 2.3
143    */
144   class OBAPI OBConformerScore
145   {
146     public:
147       /**
148        * Conformer scores can be preferably high or low.
149        */
150       enum Preferred { HighScore, LowScore };
151       /**
152        * Preferred order for subclass scoring function.
153        */
154       virtual Preferred GetPreferred() = 0;
155       /**
156        * Convergence criteria used.
157        */
158       enum Convergence { Highest, Lowest, Sum, Average };
159       /**
160        * Convergence criteria for subclass scoring function.
161        */
162       virtual Convergence GetConvergence() = 0;
163       /**
164        * Score an individual conformer specified by index.
165        */
166       virtual double Score(OBMol &mol, unsigned int index, const RotorKeys &keys,
167           const std::vector<double*> &conformers) = 0;
168       virtual ~OBConformerScore() = 0;
169   };
170 
171   /**
172    * @class OBRMSDConformerScore conformersearch.h <openbabel/conformersearch.h>
173    * @brief A RMSD conformer scoring class.
174    *
175    * Score conformers by the RMSD between the conformer with specified index and
176    * the closest conformer. This results in a diverse set of conformers.
177    * @since 2.3
178    */
179   class OBAPI OBRMSDConformerScore : public OBConformerScore
180   {
181     public:
GetPreferred()182       Preferred GetPreferred() { return HighScore; }
GetConvergence()183       Convergence GetConvergence() { return Average; }
184       double Score(OBMol &mol, unsigned int index, const RotorKeys &keys,
185           const std::vector<double*> &conformers);
186   };
187 
188   /**
189    * @class OBEnergyConformerScore conformersearch.h <openbabel/conformersearch.h>
190    * @brief A lowest energy conformer scoring class.
191    * @since 2.3
192    */
193   class OBAPI OBEnergyConformerScore : public OBConformerScore
194   {
195     public:
OBEnergyConformerScore()196       OBEnergyConformerScore () {
197 	energy_map.clear ();
198 	energy_ncompute = 0;
199 	energy_nrequest = 0;
200       }
GetNbEnergyCompute()201       long unsigned int GetNbEnergyCompute () {return energy_ncompute;}
GetNbEnergyRequest()202       long unsigned int GetNbEnergyRequest () {return energy_nrequest;}
GetPreferred()203       Preferred GetPreferred() { return LowScore; }
GetConvergence()204       Convergence GetConvergence() { return Lowest; }
205       double Score(OBMol &mol, unsigned int index, const RotorKeys &keys,
206           const std::vector<double*> &conformers);
207     private:
208       mapRotorEnergy energy_map;
209       long unsigned int energy_ncompute;
210       long unsigned int energy_nrequest;
211   };
212 
213   /**
214    * @class OBMinimizingEnergyConformerScore conformersearch.h <openbabel/conformersearch.h>
215    * @brief A lowest energy conformer scoring class (after minimization)
216    * @since 2.4
217    */
218   class OBAPI OBMinimizingEnergyConformerScore : public OBConformerScore
219   {
220     public:
OBMinimizingEnergyConformerScore()221       OBMinimizingEnergyConformerScore () {
222 	energy_map.clear ();
223 	energy_ncompute = 0;
224 	energy_nrequest = 0;
225       }
GetNbEnergyCompute()226       long unsigned int GetNbEnergyCompute () {return energy_ncompute;}
GetNbEnergyRequest()227       long unsigned int GetNbEnergyRequest () {return energy_nrequest;}
GetPreferred()228       Preferred GetPreferred() { return LowScore; }
GetConvergence()229       Convergence GetConvergence() { return Lowest; }
230       double Score(OBMol &mol, unsigned int index, const RotorKeys &keys,
231           const std::vector<double*> &conformers);
232     private:
233       mapRotorEnergy energy_map;
234       long unsigned int energy_ncompute;
235       long unsigned int energy_nrequest;
236   };
237 
238   /**
239    * @class OBMinimizingRMSDConformerScore conformersearch.h <openbabel/conformersearch.h>
240    * @brief An RMSD conformer scoring class, after a short minimization
241    *
242    * This scores conformers by the RMSD between the conformer and the closest, to produce a
243    * diverse set of conformers, but after minimization. This ensures each conformer is
244    * "reasonable" and avoids steric clashes.
245    * @since 2.4
246    */
247   class OBAPI OBMinimizingRMSDConformerScore : public OBConformerScore
248   {
249     public:
GetPreferred()250       Preferred GetPreferred() { return HighScore; }
GetConvergence()251       Convergence GetConvergence() { return Average; }
252       double Score(OBMol &mol, unsigned int index, const RotorKeys &keys,
253           const std::vector<double*> &conformers);
254   };
255 
256   //////////////////////////////////////////////////////////
257   //
258   //  OBConformerSearch
259   //
260   //////////////////////////////////////////////////////////
261 
262   /**
263    * @class OBConformerSearch conformersearch.h <openbabel/conformersearch.h>
264    * @brief Conformer searching using genetic algorithm.
265    * See @ref ConformerSearching
266    * @since 2.3
267    */
268   class OBAPI OBConformerSearch
269   {
270     public:
271       OBConformerSearch();
272       ~OBConformerSearch();
273       /**
274        * Setup this instance with the specified molecule.
275        *
276        * @param mol The molecule with initial conformer.
277        * @param numConformers The number of conformers that should be generated. This
278        *        is also the number of conformers selected for each generation.
279        * @param numChildren When a new generation is generated, for each of the
280        *        numConformers conformers, numChildren children are created.
281        * @param mutability The mutability determines how frequent a permutation occurs
282        *        when generating the next generation.
283        * @param convergence The number of identical generations before considering
284        *        the process converged.
285        */
286       bool Setup(const OBMol &mol, int numConformers = 30, int numChildren = 5,
287           int mutability = 5, int convergence = 25);
288       /**
289        * Set the number of conformers.
290        */
SetNumConformers(int numConformers)291       void SetNumConformers(int numConformers) { m_numConformers = numConformers; }
292       /**
293        * Set the number of children generated for each parent.
294        */
SetNumChildren(int numChildren)295       void SetNumChildren(int numChildren) { m_numChildren = numChildren; }
296       /**
297        * Set the mutability.
298        */
SetMutability(int mutability)299       void SetMutability(int mutability) { m_mutability = mutability; }
300       /**
301        * Set the convergence (i.e. number of generations that did not change the score
302        * before considering the iteration converged).
303        */
SetConvergence(int convergence)304       void SetConvergence(int convergence) { m_convergence = convergence; }
305       /**
306        * Set the bonds to be fixed.
307        */
SetFixedBonds(const OBBitVec & fixedBonds)308       void SetFixedBonds(const OBBitVec &fixedBonds) { m_fixedBonds = fixedBonds; }
309 
310 
311       /**
312        * Set the filter method used to check if a newly generated is acceptable. Typical
313        * examples are a steric filter or electrostatic energy filter. The filters make a binary
314        * selection and one group will be scored in the next step. A number of filters
315        * are provided by default but implementing a custom filter is easy.
316        *
317        * There is also a OBConformerFilters class to make it easy to use multiple filters.
318        */
SetFilter(OBConformerFilter * filter)319       void SetFilter(OBConformerFilter *filter)
320       {
321         delete m_filter;
322         m_filter = filter;
323       }
324       /**
325        * All acceptable conformers are scored to select the fittest conformers from
326        * the population. A higher score means the conformer is desired in the final
327        * set or next generation. Typical scoring would be force field energy to find
328        * the lowest energy conformer. Another example is the highest RMSD between
329        * conformer i and the closest conformer to create a diverse set of conformers.
330        * Diversity could also be expressed in other ways and implementing a scoring
331        * class is easy.
332        */
SetScore(OBConformerScore * score)333       void SetScore(OBConformerScore *score)
334       {
335         delete m_score;
336         m_score = score;
337       }
338 
339       /**
340       * Set whether or not you want rotors to be printed prior to the conformer search.
341       */
PrintRotors(bool printrotors)342       void PrintRotors(bool printrotors) { m_printrotors = printrotors; }
343 
344       /**
345        * Perform conformer search using a genetic algorithm.
346        */
347       void Search();
348 
GetRotorKeys()349       const RotorKeys& GetRotorKeys() const
350       {
351         return m_rotorKeys;
352       }
353 
354       void GetConformers(OBMol &mol);
355 
356       /* @brief Set an output stream for logging. If NULL pointer is provided, logging is disabled. */
SetLogStream(std::ostream * sptr)357       void SetLogStream  (std::ostream *sptr) {m_logstream = sptr;}
358 
359       /*************************************************/
360       /* Methods related to fitness sharing parameters */
361 
362       /* @brief Set the use of fitness sharing ON (default) or OFF*/
363       void SetSharing (bool value = true)  {use_sharing = value;}
364 
365       /* @brief Get the targeted number of niches, for dynamic niche sharing */
GetNbNiches()366       int GetNbNiches () {return nb_niches;}
367 
368       /* @brief Set the targeted number of niches, for dynamic niche sharing */
SetNbNiches(int value)369       void SetNbNiches (int value) {nb_niches = value;}
370 
371       /* @brief Get niches radius, for dynamic niche sharing.*/
GetNicheRadius()372       double GetNicheRadius () {return niche_radius;}
373 
374       /* @brief Set niches radius, for dynamic niche sharing.*/
SetNicheRadius(double value)375       void SetNicheRadius (double value) {niche_radius = value;}
376 
377       /* @brief Get the alpha sharing parameter */
GetAlphaSharing()378       double GetAlphaSharing () {return alpha_share;}
379 
380       /* @brief Set the alpha sharing parameter */
SetAlphaSharing(double value)381       void SetAlphaSharing (double value) {alpha_share = value;}
382 
383       /* @brief Get the sigma sharing parameter */
GetSigmaSharing()384       double GetSigmaSharing () {return sigma_share;}
385 
386       /* @brief Set the sigma sharing parameter */
SetSigmaSharing(double value)387       void SetSigmaSharing (double value) {sigma_share = value;}
388 
389       /* @brief Get the (uniform) crossover probability */
GetCrossoverProbability()390       double GetCrossoverProbability () {return p_crossover;}
391 
392       /* @brief Set the (uniform) crossover probability */
SetCrossoverProbability(double value)393       void SetCrossoverProbability (double value) {p_crossover = value;}
394 
395       /* @brief Get the niche mating probability, for dynamic niche sharing */
GetNicheMating()396       double GetNicheMating () {return niche_mating;}
397 
398       /* @brief Set the (uniform) crossover probability */
SetNicheMating(double value)399       void SetNicheMating (double value) {niche_mating = value;}
400 
401       /* @brief Set the local optimization rate */
SetLocalOptRate(int value)402       void SetLocalOptRate (int value) {local_opt_rate = value;}
403 
404       /* @brief Get the local optimization rate*/
SetLocalOptRate()405       int  SetLocalOptRate() {return local_opt_rate;}
406 
407     private:
408       /**
409        * Create the next generation.
410        */
411       void NextGeneration();
412       /**
413        * Select the fittest numConformers from the parents and their children.
414        */
415       double MakeSelection();
416       /**
417        * Check if a conformer key is unique.
418        */
419       bool IsUniqueKey(const RotorKeys &keys, const RotorKey &key) const;
420       /**
421        * Check the specified rotor key using the set filter.
422        */
423       bool IsGood(const RotorKey &key);
424 
425       //! @brief Genetic similarity measure, i.e. "distance" between two rotor keys.
426       int key_distance (const RotorKey &key1, const RotorKey &key2);
427       //! @brief Make a local search on the best individual
428       int local_opt ();
429       //! @brief Produces one or two offsprings
430       int reproduce (RotorKey &key1, RotorKey &key2);
431       //! @brief Score, sort the current population, compute shared fitness values (and niches)
432       int score_population ();
433       //! @brief Compute shared fitness values for a given population
434       int share_fitness ();
435       //! @brief Perform one generation with fitness sharing
436       double sharing_generation ();
437 
438       unsigned int m_numConformers; //!< The desired number of conformers. This is also the population size.
439       int m_numChildren; //!< The number of children generated each generation
440       int m_mutability; //!< The mutability for generating the next generation
441       int m_convergence; //!< Number of generations that remain unchanged before quiting
442 
443       std::vector<double> vscores;                    //!< Current population score vector
444       std::vector<double> vshared_fitnes;             //!< Current population shared fitness vector
445       std::vector<std::vector <int> > dynamic_niches; //!< The dynamically found niches
446       std::vector<int> niche_map;                     //!< Procide the sharing niche index, given the key inddex
447 
448       void *d; // Opaque pointer - currently for storing OBRandom* which may be removed in future
449       bool use_sharing;		//!< Whether to use sharing or not.
450       double alpha_share;	//!< The alpha parameter in sharing function
451       double sigma_share;		//!< The sigma parameter in sharing function
452       int nb_niches;		//!< The number of dynamic niches to be found
453       double niche_radius;		//!< A pre-determined niche radius, for dynamic niche sharing.
454       double p_crossover;	//!< Crossover probability
455       double niche_mating;	//!< Probability of forcing the second parent in the first parent
456       int local_opt_rate;       //!< Perform a random local optimization every local_opt_rate generations. Disabled if set to
457       OBBitVec      m_fixedBonds; //!< Bonds that are fixed
458       OBMol         m_mol; //!< The molecule with starting coordinates
459       OBRotorList   m_rotorList; //!< The OBRotorList for the molecule
460       RotorKeys     m_rotorKeys; //!< The current population
461       bool          m_printrotors; //!< Wheter or not to print all rotors that are found instead of performing the conformer search
462 
463       OBConformerFilter *m_filter;
464       OBConformerScore  *m_score;
465 
466       std::ostream *m_logstream;	//!< A pointer to a log stream (NULL means no loogging)
467   };
468 
469   /**
470    * @page ConformerSearching
471    *
472    * @section introduction Introduction
473    * All conformer searching methods in OpenBabel are based on the concept of rotor
474    * keys. A rotor key is simply an array of values specifying the rotations around
475    * rotatable bonds. The number of possible values for a rotatable bond depends on
476    * the bond type. The classes implementing the rotor key concept are OBRotor,
477    * OBRotorList and OBRotamer.
478    *
479    * Previous OpenBabel releases contained only methods for finding stable (low
480    * energy) conformers by using the force fields. The 2.3 release introduces a new
481    * flexible class (OBConformerSearch) implementing a genetic algorithm. The scoring
482    * or ranking of conformers is done by a separate class derived from the abstract
483    * OBConformerScore class. Reimplementing this class allows for all sorts of scoring
484    * functions (e.g. RMSD, torson, energy, ... based).
485    *
486    * @section stable Finding stable conformers
487    *
488    * @subsection systematic Systematic rotor search
489    * This is the simplest of all conformer searching methods and only suitable when
490    * there are only a few rotatable bonds. Since the number of conformers grows
491    * exponentially with the number of bonds, enumerating all conformers for reasonably
492    * sized molecules quickly results in unacceptable performance. A systematic rotor
493    * search can be done with the OBForceField::SystematicRotorSearch method.
494    *
495    * @subsection random Random rotor search
496    * The random rotor search generates random rotor keys and evaluates the energy.
497    * The lowest energy conformer is kept. All rotor keys are random and no attempt
498    * is made to improve good scoring keys. In most cases the weighted rotor search
499    * is a better choice. A random rotor search can be performed with the
500    * OBForceField::RandomRotorSearch method.
501    *
502    * @subsection weighted Weighted rotor search
503    * The weighted rotor search uses the score (energy) from a generated and tries
504    * to optimize good scoring keys. This allows the global minimum energy conformer
505    * to be identified much faster compared to the random rotor search. However,
506    * finding the global minimum can be time consuming for large molecules and there
507    * is no guarantee that the found minimum is the global minimum. A weighted rotor
508    * search is performed with the OBForceField::WeightedRotorSearch method.
509    *
510    * @subsection genetic Genetic algorithm
511    * The OBConformerSearch class introduced in OpenBabel 2.3 implements a genetic
512    * algorithm for finding conformers. The class is configurable and with the
513    * right scoring function (OBEnergyConformerScore), stable conformers can be
514    * found. Read the OBConformerSearch section for more details on how the genetic
515    * algorithm works.
516    *
517    * @section diverse Finding a diverse set of conformers
518    * A divers set of conformers can be generated using the OBConformerSearch class
519    * with a scoring function like OBRMSDConformerScore. A diverse set of conformers
520    * is often useful when screening bioactive molecules since the interaction with
521    * the target can stabilize a higher conformational energy. See the next section
522    * for details.
523    *
524    * @section OBConformerSearch
525    * The genetic algorithm starts by generating the initial population of rotor keys.
526    * The initial population contains up to numConformers, duplicate keys are ignored.
527    * OBConformerFilter objects are used to filter conformers before including them.
528    * A typical filter is a steric filter to ignore all conformers with atoms to close
529    * together.
530    *
531    * For each generation, numChildren children are created by permuting the parent
532    * rotor keys. The mutability setting determines how frequent a permutation is made
533    * (e.g. 5 means 1/5 bonds are permuted, 10 means 1/10).
534    * Again, duplicated and filtered molecules are ignored. The population now contains
535    * up to numConformer * (1 + numChildren). All these rotor keys are scored using the
536    * specified OBConformerScore class. Next, all keys are ordered by their score and
537    * the best numConformers conformers are selected as parents for the next
538    * generation.
539    *
540    * New generations are generated until the specified number of generations (i.e.
541    * convergence) don't improve the score.
542    *
543    * @subsection OBConformerFilter
544    * Filters are used to exclude certain generated conformers. For example, the
545    * OBStericConformerFilter filters out all conformers with atoms to close together.
546    * Custom filters can be used to consider additional properties (e.g. electrostatic
547    * energy, distance between pharmacophore groups, ...). Filters can easily be
548    * combined using the OBConformerFilters class.
549    *
550    * @subsection OBConformerScore
551    * OBConformerScore derived classes compute the score for conformers. The scoring
552    * function is probably the most important part of the algorithm since it selects
553    * which conformers will be used as parents for the next generation. Derived
554    * classes also specify if the scores are descending or ascending and what
555    * convergence criteria should be used (i.e. lowest, highest, sum or average).
556    * There is no easy way to combine scoring functions since scores are not always
557    * additive. However, a custom scoring function could be used that combines several
558    * other scoring functions with weights for each score.
559    *
560    * The scoring function determines what kind of conformers are selected. For
561    * example, the OBEnergyConformerScore class selects stable, low energy conformers.
562    * A simple variation to this would be to only use the non-bonded energy. The
563    * result for these two scoring functions should be similar.
564    *
565    * The OBRMSDConformerScore scoring function can be used to generate a diverse set
566    * of conformers. A diverse set is often useful when screening bioactive molecules
567    * since the interaction with the target can stabilize a higher conformational
568    * energy. To calculate the RMSD between conformers, Kabsch alignment (OBAlign)
569    * is used. The alignment takes symmetry into account.
570    *
571    * @image html energyconformerscore.png
572    * @image html rmsdconformerscore.png "30 Conformers for methotrexate. top: OBEnergyConformerScore bottom: OBRMSDConformerScore"
573    */
574 
575   ///@}
576 };
577 
578 #endif
579 
580 /// @file conformersearch.h
581 /// @brief Conformer searching using genetic algorithm
582