1 /********************************************************************** 2 conformersearch.h - Conformer searching using genetic algorithm. 3 4 Copyright (C) 2010 Tim Vandermeersch 5 Some portions Copyright (C) 2012 Materials Design, Inc. 6 Some portions Copyright (C) 2016 Torsten Sachse 7 8 This file is part of the Open Babel project. 9 For more information, see <http://openbabel.org/> 10 11 This program is free software; you can redistribute it and/or modify 12 it under the terms of the GNU General Public License as published by 13 the Free Software Foundation version 2 of the License. 14 15 This program is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 ***********************************************************************/ 20 21 #ifndef OB_CONFORMERSEARCH_H 22 #define OB_CONFORMERSEARCH_H 23 24 #include <openbabel/mol.h> 25 #include <openbabel/rotor.h> 26 #include <openbabel/rotamer.h> 27 28 namespace OpenBabel { 29 30 typedef std::vector<int> RotorKey; 31 typedef std::vector<RotorKey> RotorKeys; 32 typedef std::map<std::vector<int>,double> mapRotorEnergy; 33 34 ///@addtogroup conformer Conformer Searching 35 ///@{ 36 37 ////////////////////////////////////////////////////////// 38 // 39 // OBConformerFilter(s) 40 // 41 ////////////////////////////////////////////////////////// 42 43 /** 44 * @class OBConformerFilter conformersearch.h <openbabel/conformersearch.h> 45 * @brief Interface used by OBConformerSearch for filtering conformers. 46 * 47 * The OBConformerFilter class defines an interface used by the OBConformerSearch 48 * class. A conformer filter is used as a first selection method when generating 49 * conformers. A typical example is a steric filter to ensure atoms are never 50 * closer than some specified distance. 51 * 52 * @see OBConformerSearch OBConformerFilters 53 * @since 2.3 54 */ 55 class OBAPI OBConformerFilter 56 { 57 public: 58 /** 59 * Check if a conformer is good (i.e. passes the filter). The molecule, rotor 60 * key and coords are passed as parameters to this function and can be used 61 * for checking the confoormer. 62 * @return True if the conformer passes the filter. 63 */ 64 virtual bool IsGood(const OBMol &mol, const RotorKey &key, double *coords) = 0; 65 virtual ~OBConformerFilter() = 0; 66 }; 67 68 /** 69 * @class OBConformerFilters conformersearch.h <openbabel/conformersearch.h> 70 * @brief Class for combining OBConformerFilter objects. 71 * 72 * The OBConformerFilters class makes it easy to combine OBConformerFilter 73 * objects. A list of OBConformerFilter objects is specified when constructing 74 * this class. The IsGood implementation simply checks all the specified 75 * filters and returns false if any of the filters returns false. 76 * 77 * @see OBConformerSearch OBConformerFilter 78 * @since 2.3 79 */ 80 class OBAPI OBConformerFilters : public OBConformerFilter 81 { 82 public: 83 /** 84 * Constructor specifiying the filters to combine. 85 */ OBConformerFilters(const std::vector<OBConformerFilter * > & filters)86 OBConformerFilters(const std::vector<OBConformerFilter*> &filters) : m_filters(filters) 87 { 88 } 89 /** 90 * IsGood reimplementation. 91 * @return True if all filters pass. 92 */ IsGood(const OBMol & mol,const RotorKey & key,double * coords)93 bool IsGood(const OBMol &mol, const RotorKey &key, double *coords) 94 { 95 for (unsigned int i = 0; i < m_filters.size(); ++i) 96 if (!m_filters[i]->IsGood(mol, key, coords)) 97 return false; 98 return true; 99 } 100 protected: 101 std::vector<OBConformerFilter*> m_filters; 102 }; 103 104 /** 105 * @class OBStericConformerFilter conformersearch.h <openbabel/conformersearch.h> 106 * @brief A steric conformer filter class. 107 * 108 * The OBStericConformerFilter class is the default filter used by OBConformerSearch. 109 * The filter removes all molecules which have at least 2 atoms closer together than 110 * the specified distance. 111 * @since 2.3 112 */ 113 class OBAPI OBStericConformerFilter : public OBConformerFilter 114 { 115 public: 116 OBStericConformerFilter (); 117 OBStericConformerFilter (double cutoff, double vdw_factor = 0.5, bool check_hydrogens = true); 118 bool IsGood(const OBMol &mol, const RotorKey &key, double *coords); 119 private: 120 double m_cutoff; //!< Internal cutoff (used as a squared distance) 121 double m_vdw_factor; //!< Factor applied to Van der Waals distance check 122 bool m_check_hydrogens; //!< Check hydrOgens if true 123 }; 124 125 ////////////////////////////////////////////////////////// 126 // 127 // OBConformerScore(s) 128 // 129 ////////////////////////////////////////////////////////// 130 131 /** 132 * @class OBConformerScore conformersearch.h <openbabel/conformersearch.h> 133 * @brief Interface used by OBConformerSearch for scoring conformers. 134 * 135 * The OBConformerScore class defines an interface to assign scores to conformers. 136 * This class is used by OBConformerSearch to make the class flexible. A higher 137 * score means the conformer with index is more favourable. A typical example is 138 * the force field energy to find the lowest energy conformer (note: the energy is 139 * not directly usable as score since lower values are better). Another example 140 * is to use some measure of diversity (e.g. RMSD) to generate a diverse set of 141 * conformers. 142 * @since 2.3 143 */ 144 class OBAPI OBConformerScore 145 { 146 public: 147 /** 148 * Conformer scores can be preferably high or low. 149 */ 150 enum Preferred { HighScore, LowScore }; 151 /** 152 * Preferred order for subclass scoring function. 153 */ 154 virtual Preferred GetPreferred() = 0; 155 /** 156 * Convergence criteria used. 157 */ 158 enum Convergence { Highest, Lowest, Sum, Average }; 159 /** 160 * Convergence criteria for subclass scoring function. 161 */ 162 virtual Convergence GetConvergence() = 0; 163 /** 164 * Score an individual conformer specified by index. 165 */ 166 virtual double Score(OBMol &mol, unsigned int index, const RotorKeys &keys, 167 const std::vector<double*> &conformers) = 0; 168 virtual ~OBConformerScore() = 0; 169 }; 170 171 /** 172 * @class OBRMSDConformerScore conformersearch.h <openbabel/conformersearch.h> 173 * @brief A RMSD conformer scoring class. 174 * 175 * Score conformers by the RMSD between the conformer with specified index and 176 * the closest conformer. This results in a diverse set of conformers. 177 * @since 2.3 178 */ 179 class OBAPI OBRMSDConformerScore : public OBConformerScore 180 { 181 public: GetPreferred()182 Preferred GetPreferred() { return HighScore; } GetConvergence()183 Convergence GetConvergence() { return Average; } 184 double Score(OBMol &mol, unsigned int index, const RotorKeys &keys, 185 const std::vector<double*> &conformers); 186 }; 187 188 /** 189 * @class OBEnergyConformerScore conformersearch.h <openbabel/conformersearch.h> 190 * @brief A lowest energy conformer scoring class. 191 * @since 2.3 192 */ 193 class OBAPI OBEnergyConformerScore : public OBConformerScore 194 { 195 public: OBEnergyConformerScore()196 OBEnergyConformerScore () { 197 energy_map.clear (); 198 energy_ncompute = 0; 199 energy_nrequest = 0; 200 } GetNbEnergyCompute()201 long unsigned int GetNbEnergyCompute () {return energy_ncompute;} GetNbEnergyRequest()202 long unsigned int GetNbEnergyRequest () {return energy_nrequest;} GetPreferred()203 Preferred GetPreferred() { return LowScore; } GetConvergence()204 Convergence GetConvergence() { return Lowest; } 205 double Score(OBMol &mol, unsigned int index, const RotorKeys &keys, 206 const std::vector<double*> &conformers); 207 private: 208 mapRotorEnergy energy_map; 209 long unsigned int energy_ncompute; 210 long unsigned int energy_nrequest; 211 }; 212 213 /** 214 * @class OBMinimizingEnergyConformerScore conformersearch.h <openbabel/conformersearch.h> 215 * @brief A lowest energy conformer scoring class (after minimization) 216 * @since 2.4 217 */ 218 class OBAPI OBMinimizingEnergyConformerScore : public OBConformerScore 219 { 220 public: OBMinimizingEnergyConformerScore()221 OBMinimizingEnergyConformerScore () { 222 energy_map.clear (); 223 energy_ncompute = 0; 224 energy_nrequest = 0; 225 } GetNbEnergyCompute()226 long unsigned int GetNbEnergyCompute () {return energy_ncompute;} GetNbEnergyRequest()227 long unsigned int GetNbEnergyRequest () {return energy_nrequest;} GetPreferred()228 Preferred GetPreferred() { return LowScore; } GetConvergence()229 Convergence GetConvergence() { return Lowest; } 230 double Score(OBMol &mol, unsigned int index, const RotorKeys &keys, 231 const std::vector<double*> &conformers); 232 private: 233 mapRotorEnergy energy_map; 234 long unsigned int energy_ncompute; 235 long unsigned int energy_nrequest; 236 }; 237 238 /** 239 * @class OBMinimizingRMSDConformerScore conformersearch.h <openbabel/conformersearch.h> 240 * @brief An RMSD conformer scoring class, after a short minimization 241 * 242 * This scores conformers by the RMSD between the conformer and the closest, to produce a 243 * diverse set of conformers, but after minimization. This ensures each conformer is 244 * "reasonable" and avoids steric clashes. 245 * @since 2.4 246 */ 247 class OBAPI OBMinimizingRMSDConformerScore : public OBConformerScore 248 { 249 public: GetPreferred()250 Preferred GetPreferred() { return HighScore; } GetConvergence()251 Convergence GetConvergence() { return Average; } 252 double Score(OBMol &mol, unsigned int index, const RotorKeys &keys, 253 const std::vector<double*> &conformers); 254 }; 255 256 ////////////////////////////////////////////////////////// 257 // 258 // OBConformerSearch 259 // 260 ////////////////////////////////////////////////////////// 261 262 /** 263 * @class OBConformerSearch conformersearch.h <openbabel/conformersearch.h> 264 * @brief Conformer searching using genetic algorithm. 265 * See @ref ConformerSearching 266 * @since 2.3 267 */ 268 class OBAPI OBConformerSearch 269 { 270 public: 271 OBConformerSearch(); 272 ~OBConformerSearch(); 273 /** 274 * Setup this instance with the specified molecule. 275 * 276 * @param mol The molecule with initial conformer. 277 * @param numConformers The number of conformers that should be generated. This 278 * is also the number of conformers selected for each generation. 279 * @param numChildren When a new generation is generated, for each of the 280 * numConformers conformers, numChildren children are created. 281 * @param mutability The mutability determines how frequent a permutation occurs 282 * when generating the next generation. 283 * @param convergence The number of identical generations before considering 284 * the process converged. 285 */ 286 bool Setup(const OBMol &mol, int numConformers = 30, int numChildren = 5, 287 int mutability = 5, int convergence = 25); 288 /** 289 * Set the number of conformers. 290 */ SetNumConformers(int numConformers)291 void SetNumConformers(int numConformers) { m_numConformers = numConformers; } 292 /** 293 * Set the number of children generated for each parent. 294 */ SetNumChildren(int numChildren)295 void SetNumChildren(int numChildren) { m_numChildren = numChildren; } 296 /** 297 * Set the mutability. 298 */ SetMutability(int mutability)299 void SetMutability(int mutability) { m_mutability = mutability; } 300 /** 301 * Set the convergence (i.e. number of generations that did not change the score 302 * before considering the iteration converged). 303 */ SetConvergence(int convergence)304 void SetConvergence(int convergence) { m_convergence = convergence; } 305 /** 306 * Set the bonds to be fixed. 307 */ SetFixedBonds(const OBBitVec & fixedBonds)308 void SetFixedBonds(const OBBitVec &fixedBonds) { m_fixedBonds = fixedBonds; } 309 310 311 /** 312 * Set the filter method used to check if a newly generated is acceptable. Typical 313 * examples are a steric filter or electrostatic energy filter. The filters make a binary 314 * selection and one group will be scored in the next step. A number of filters 315 * are provided by default but implementing a custom filter is easy. 316 * 317 * There is also a OBConformerFilters class to make it easy to use multiple filters. 318 */ SetFilter(OBConformerFilter * filter)319 void SetFilter(OBConformerFilter *filter) 320 { 321 delete m_filter; 322 m_filter = filter; 323 } 324 /** 325 * All acceptable conformers are scored to select the fittest conformers from 326 * the population. A higher score means the conformer is desired in the final 327 * set or next generation. Typical scoring would be force field energy to find 328 * the lowest energy conformer. Another example is the highest RMSD between 329 * conformer i and the closest conformer to create a diverse set of conformers. 330 * Diversity could also be expressed in other ways and implementing a scoring 331 * class is easy. 332 */ SetScore(OBConformerScore * score)333 void SetScore(OBConformerScore *score) 334 { 335 delete m_score; 336 m_score = score; 337 } 338 339 /** 340 * Set whether or not you want rotors to be printed prior to the conformer search. 341 */ PrintRotors(bool printrotors)342 void PrintRotors(bool printrotors) { m_printrotors = printrotors; } 343 344 /** 345 * Perform conformer search using a genetic algorithm. 346 */ 347 void Search(); 348 GetRotorKeys()349 const RotorKeys& GetRotorKeys() const 350 { 351 return m_rotorKeys; 352 } 353 354 void GetConformers(OBMol &mol); 355 356 /* @brief Set an output stream for logging. If NULL pointer is provided, logging is disabled. */ SetLogStream(std::ostream * sptr)357 void SetLogStream (std::ostream *sptr) {m_logstream = sptr;} 358 359 /*************************************************/ 360 /* Methods related to fitness sharing parameters */ 361 362 /* @brief Set the use of fitness sharing ON (default) or OFF*/ 363 void SetSharing (bool value = true) {use_sharing = value;} 364 365 /* @brief Get the targeted number of niches, for dynamic niche sharing */ GetNbNiches()366 int GetNbNiches () {return nb_niches;} 367 368 /* @brief Set the targeted number of niches, for dynamic niche sharing */ SetNbNiches(int value)369 void SetNbNiches (int value) {nb_niches = value;} 370 371 /* @brief Get niches radius, for dynamic niche sharing.*/ GetNicheRadius()372 double GetNicheRadius () {return niche_radius;} 373 374 /* @brief Set niches radius, for dynamic niche sharing.*/ SetNicheRadius(double value)375 void SetNicheRadius (double value) {niche_radius = value;} 376 377 /* @brief Get the alpha sharing parameter */ GetAlphaSharing()378 double GetAlphaSharing () {return alpha_share;} 379 380 /* @brief Set the alpha sharing parameter */ SetAlphaSharing(double value)381 void SetAlphaSharing (double value) {alpha_share = value;} 382 383 /* @brief Get the sigma sharing parameter */ GetSigmaSharing()384 double GetSigmaSharing () {return sigma_share;} 385 386 /* @brief Set the sigma sharing parameter */ SetSigmaSharing(double value)387 void SetSigmaSharing (double value) {sigma_share = value;} 388 389 /* @brief Get the (uniform) crossover probability */ GetCrossoverProbability()390 double GetCrossoverProbability () {return p_crossover;} 391 392 /* @brief Set the (uniform) crossover probability */ SetCrossoverProbability(double value)393 void SetCrossoverProbability (double value) {p_crossover = value;} 394 395 /* @brief Get the niche mating probability, for dynamic niche sharing */ GetNicheMating()396 double GetNicheMating () {return niche_mating;} 397 398 /* @brief Set the (uniform) crossover probability */ SetNicheMating(double value)399 void SetNicheMating (double value) {niche_mating = value;} 400 401 /* @brief Set the local optimization rate */ SetLocalOptRate(int value)402 void SetLocalOptRate (int value) {local_opt_rate = value;} 403 404 /* @brief Get the local optimization rate*/ SetLocalOptRate()405 int SetLocalOptRate() {return local_opt_rate;} 406 407 private: 408 /** 409 * Create the next generation. 410 */ 411 void NextGeneration(); 412 /** 413 * Select the fittest numConformers from the parents and their children. 414 */ 415 double MakeSelection(); 416 /** 417 * Check if a conformer key is unique. 418 */ 419 bool IsUniqueKey(const RotorKeys &keys, const RotorKey &key) const; 420 /** 421 * Check the specified rotor key using the set filter. 422 */ 423 bool IsGood(const RotorKey &key); 424 425 //! @brief Genetic similarity measure, i.e. "distance" between two rotor keys. 426 int key_distance (const RotorKey &key1, const RotorKey &key2); 427 //! @brief Make a local search on the best individual 428 int local_opt (); 429 //! @brief Produces one or two offsprings 430 int reproduce (RotorKey &key1, RotorKey &key2); 431 //! @brief Score, sort the current population, compute shared fitness values (and niches) 432 int score_population (); 433 //! @brief Compute shared fitness values for a given population 434 int share_fitness (); 435 //! @brief Perform one generation with fitness sharing 436 double sharing_generation (); 437 438 unsigned int m_numConformers; //!< The desired number of conformers. This is also the population size. 439 int m_numChildren; //!< The number of children generated each generation 440 int m_mutability; //!< The mutability for generating the next generation 441 int m_convergence; //!< Number of generations that remain unchanged before quiting 442 443 std::vector<double> vscores; //!< Current population score vector 444 std::vector<double> vshared_fitnes; //!< Current population shared fitness vector 445 std::vector<std::vector <int> > dynamic_niches; //!< The dynamically found niches 446 std::vector<int> niche_map; //!< Procide the sharing niche index, given the key inddex 447 448 void *d; // Opaque pointer - currently for storing OBRandom* which may be removed in future 449 bool use_sharing; //!< Whether to use sharing or not. 450 double alpha_share; //!< The alpha parameter in sharing function 451 double sigma_share; //!< The sigma parameter in sharing function 452 int nb_niches; //!< The number of dynamic niches to be found 453 double niche_radius; //!< A pre-determined niche radius, for dynamic niche sharing. 454 double p_crossover; //!< Crossover probability 455 double niche_mating; //!< Probability of forcing the second parent in the first parent 456 int local_opt_rate; //!< Perform a random local optimization every local_opt_rate generations. Disabled if set to 457 OBBitVec m_fixedBonds; //!< Bonds that are fixed 458 OBMol m_mol; //!< The molecule with starting coordinates 459 OBRotorList m_rotorList; //!< The OBRotorList for the molecule 460 RotorKeys m_rotorKeys; //!< The current population 461 bool m_printrotors; //!< Wheter or not to print all rotors that are found instead of performing the conformer search 462 463 OBConformerFilter *m_filter; 464 OBConformerScore *m_score; 465 466 std::ostream *m_logstream; //!< A pointer to a log stream (NULL means no loogging) 467 }; 468 469 /** 470 * @page ConformerSearching 471 * 472 * @section introduction Introduction 473 * All conformer searching methods in OpenBabel are based on the concept of rotor 474 * keys. A rotor key is simply an array of values specifying the rotations around 475 * rotatable bonds. The number of possible values for a rotatable bond depends on 476 * the bond type. The classes implementing the rotor key concept are OBRotor, 477 * OBRotorList and OBRotamer. 478 * 479 * Previous OpenBabel releases contained only methods for finding stable (low 480 * energy) conformers by using the force fields. The 2.3 release introduces a new 481 * flexible class (OBConformerSearch) implementing a genetic algorithm. The scoring 482 * or ranking of conformers is done by a separate class derived from the abstract 483 * OBConformerScore class. Reimplementing this class allows for all sorts of scoring 484 * functions (e.g. RMSD, torson, energy, ... based). 485 * 486 * @section stable Finding stable conformers 487 * 488 * @subsection systematic Systematic rotor search 489 * This is the simplest of all conformer searching methods and only suitable when 490 * there are only a few rotatable bonds. Since the number of conformers grows 491 * exponentially with the number of bonds, enumerating all conformers for reasonably 492 * sized molecules quickly results in unacceptable performance. A systematic rotor 493 * search can be done with the OBForceField::SystematicRotorSearch method. 494 * 495 * @subsection random Random rotor search 496 * The random rotor search generates random rotor keys and evaluates the energy. 497 * The lowest energy conformer is kept. All rotor keys are random and no attempt 498 * is made to improve good scoring keys. In most cases the weighted rotor search 499 * is a better choice. A random rotor search can be performed with the 500 * OBForceField::RandomRotorSearch method. 501 * 502 * @subsection weighted Weighted rotor search 503 * The weighted rotor search uses the score (energy) from a generated and tries 504 * to optimize good scoring keys. This allows the global minimum energy conformer 505 * to be identified much faster compared to the random rotor search. However, 506 * finding the global minimum can be time consuming for large molecules and there 507 * is no guarantee that the found minimum is the global minimum. A weighted rotor 508 * search is performed with the OBForceField::WeightedRotorSearch method. 509 * 510 * @subsection genetic Genetic algorithm 511 * The OBConformerSearch class introduced in OpenBabel 2.3 implements a genetic 512 * algorithm for finding conformers. The class is configurable and with the 513 * right scoring function (OBEnergyConformerScore), stable conformers can be 514 * found. Read the OBConformerSearch section for more details on how the genetic 515 * algorithm works. 516 * 517 * @section diverse Finding a diverse set of conformers 518 * A divers set of conformers can be generated using the OBConformerSearch class 519 * with a scoring function like OBRMSDConformerScore. A diverse set of conformers 520 * is often useful when screening bioactive molecules since the interaction with 521 * the target can stabilize a higher conformational energy. See the next section 522 * for details. 523 * 524 * @section OBConformerSearch 525 * The genetic algorithm starts by generating the initial population of rotor keys. 526 * The initial population contains up to numConformers, duplicate keys are ignored. 527 * OBConformerFilter objects are used to filter conformers before including them. 528 * A typical filter is a steric filter to ignore all conformers with atoms to close 529 * together. 530 * 531 * For each generation, numChildren children are created by permuting the parent 532 * rotor keys. The mutability setting determines how frequent a permutation is made 533 * (e.g. 5 means 1/5 bonds are permuted, 10 means 1/10). 534 * Again, duplicated and filtered molecules are ignored. The population now contains 535 * up to numConformer * (1 + numChildren). All these rotor keys are scored using the 536 * specified OBConformerScore class. Next, all keys are ordered by their score and 537 * the best numConformers conformers are selected as parents for the next 538 * generation. 539 * 540 * New generations are generated until the specified number of generations (i.e. 541 * convergence) don't improve the score. 542 * 543 * @subsection OBConformerFilter 544 * Filters are used to exclude certain generated conformers. For example, the 545 * OBStericConformerFilter filters out all conformers with atoms to close together. 546 * Custom filters can be used to consider additional properties (e.g. electrostatic 547 * energy, distance between pharmacophore groups, ...). Filters can easily be 548 * combined using the OBConformerFilters class. 549 * 550 * @subsection OBConformerScore 551 * OBConformerScore derived classes compute the score for conformers. The scoring 552 * function is probably the most important part of the algorithm since it selects 553 * which conformers will be used as parents for the next generation. Derived 554 * classes also specify if the scores are descending or ascending and what 555 * convergence criteria should be used (i.e. lowest, highest, sum or average). 556 * There is no easy way to combine scoring functions since scores are not always 557 * additive. However, a custom scoring function could be used that combines several 558 * other scoring functions with weights for each score. 559 * 560 * The scoring function determines what kind of conformers are selected. For 561 * example, the OBEnergyConformerScore class selects stable, low energy conformers. 562 * A simple variation to this would be to only use the non-bonded energy. The 563 * result for these two scoring functions should be similar. 564 * 565 * The OBRMSDConformerScore scoring function can be used to generate a diverse set 566 * of conformers. A diverse set is often useful when screening bioactive molecules 567 * since the interaction with the target can stabilize a higher conformational 568 * energy. To calculate the RMSD between conformers, Kabsch alignment (OBAlign) 569 * is used. The alignment takes symmetry into account. 570 * 571 * @image html energyconformerscore.png 572 * @image html rmsdconformerscore.png "30 Conformers for methotrexate. top: OBEnergyConformerScore bottom: OBRMSDConformerScore" 573 */ 574 575 ///@} 576 }; 577 578 #endif 579 580 /// @file conformersearch.h 581 /// @brief Conformer searching using genetic algorithm 582